List Info

Thread: OpenSSL: openssl/ Configure openssl/crypto/bn/ Makefile openssl/...




OpenSSL: openssl/ Configure openssl/crypto/bn/ Makefile openssl/...
user name
2006-04-30 21:15:32
  OpenSSL CVS Repository
  http://cvs.openssl.org/
 
____________________________________________________________
________________

  Server: cvs.openssl.org                  Name:   Andy
Polyakov
  Root:   /v/openssl/cvs                   Email:  approopenssl.org
  Module: openssl                          Date:  
30-Apr-2006 23:15:30
  Branch: HEAD                             Handle:
2006043022152504

  Added files:
    openssl/crypto/bn/asm   ppc-mont.pl
    openssl/crypto/perlasm  ppc-xlate.pl
  Modified files:
    openssl                 Configure
    openssl/crypto/bn       Makefile

  Log:
    Yet another "teaser" Montgomery
multiplication module, for PowerPC.

  Summary:
    Revision    Changes     Path
    1.533       +9  -8      openssl/Configure
    1.18        +8  -0      openssl/crypto/bn/Makefile
    1.1         +327 -0    
openssl/crypto/bn/asm/ppc-mont.pl
    1.1         +113 -0    
openssl/crypto/perlasm/ppc-xlate.pl
 
____________________________________________________________
________________

  patch -p0 <<' .'
  Index: openssl/Configure
 
============================================================
================
  $ cvs diff -u -r1.532 -r1.533 Configure
  --- openssl/Configure	16 Apr 2006 14:42:54 -0000	1.532
  +++ openssl/Configure	30 Apr 2006 21:15:25 -0000	1.533
   -314,7 +314,7 
   # *-generic* is endian-neutral target, but ./config is
free to
   # throw in -D[BL]_ENDIAN, whichever appropriate...
   "linux-generic32","gcc:-DTERMIO -O3
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG
RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJ
OR).\$(SHLIB_MINOR)",
  -"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK
DES_RISC1
DES_UNROLL::linux_ppc32.o:::::::::::dlfcn:linux-shared:-fPIC
::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
  +"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK
DES_RISC1 DES_UNROLL::linux_ppc32.o
linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
   #### IA-32 targets...
   "linux-ia32-icc",	"icc:-DL_ENDIAN
-DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG
$
$:$:dlfcn:linux-shared:-KPIC::.so
.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
   "linux-elf",	"gcc:-DL_ENDIAN -DTERMIO
-O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG
$
$:$:dlfcn:linux-shared:-fPIC::.so
.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
   -322,7 +322,7 
   ####
   "linux-generic64","gcc:-DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJ
OR).\$(SHLIB_MINOR)",
   # -bpowerpc64-linux is transient option, -m64 should be
the one to use...
  -"linux-ppc64",	"gcc:-bpowerpc64-linux
-DB_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1
DES_UNROLL::linux_ppc64.o:::::::::::dlfcn:linux-shared:-fPIC
:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)&qu
ot;,
  +"linux-ppc64",	"gcc:-bpowerpc64-linux
-DB_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o
linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowe
rpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
   "linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_UNROLL
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
   "linux-ia64-ecc","ecc:-DL_ENDIAN
-DTERMIO -O2 -Wall
-no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
   "linux-ia64-icc","icc:-DL_ENDIAN
-DTERMIO -O2 -Wall
-no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_RISC1
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
   -407,12 +407,12 
   
   #### IBM's AIX.
   "aix3-cc",  "cc:-O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
  -"aix-gcc",  "gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o:::::::::::dlfcn:",
  -"aix64-gcc","gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o:::::::::::dlfcn::::::-X64",
  +"aix-gcc",  "gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o
aix_ppc32-mont.o:::::::::::dlfcn:",
  +"aix64-gcc","gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o
aix_ppc64-mont.o:::::::::::dlfcn::::::-X64",
   # Below targets assume AIX 5. Idea is to effectively
disregard $OBJECT_MODE
   # at build time. $OBJECT_MODE is respected at ./config
stage!
  -"aix-cc",   "cc:-q32 -O -DB_ENDIAN
-qmaxmem=16384::-qthreaded:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o:::::::::::dlfcn:aix-shared::-q32:.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
  -"aix64-cc", "cc:-q64 -O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o:::::::::::dlfcn:aix-shared::-q64:.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
  +"aix-cc",   "cc:-q32 -O -DB_ENDIAN
-qmaxmem=16384::-qthreaded:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o
aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SH
LIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
  +"aix64-cc", "cc:-q64 -O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o
aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SH
LIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
   
   #
   # Cray T90 and similar (SDSC)
   -504,9 +504,10 
   
   ##### MacOS X (a.k.a. Rhapsody or Darwin) setup
   "rhapsody-ppc-cc","cc:-O3
-DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR
RC4_CHUNK DES_UNROLL BF_PTR:$::",
  -"darwin-ppc-cc","cc:-O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_L
LONG RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR::osx_ppc32.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
  +"darwin-ppc-cc","cc:-O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_L
LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o
osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
  +"darwin64-ppc-cc","cc:-m64 -O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXT
Y_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR::osx_ppc64.o
osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-m64
-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
   "darwin-i386-cc","cc:-O3
-fomit-frame-pointer
-DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL BF_PTR:$:dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
  -"debug-darwin-ppc-cc","cc:-DBN_DEBUG
-DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall
-O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL
BF_PTR::osx_ppc32.o:::::::::::dlfcn:darwin-shared:-fPIC:-dyn
amiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
  +"debug-darwin-ppc-cc","cc:-DBN_DEBUG
-DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall
-O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL BF_PTR::osx_ppc32.o
osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynami
clib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
   
   ##### A/UX
   "aux3-gcc","gcc:-O2
-DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR:::",
   .
  patch -p0 <<' .'
  Index: openssl/crypto/bn/Makefile
 
============================================================
================
  $ cvs diff -u -r1.17 -r1.18 Makefile
  --- openssl/crypto/bn/Makefile	12 Feb 2006 23:21:49
-0000	1.17
  +++ openssl/crypto/bn/Makefile	30 Apr 2006 21:15:27
-0000	1.18
   -120,6 +120,14 
   aix_ppc32.s: asm/ppc.pl;	$(PERL) asm/ppc.pl $
   aix_ppc64.s: asm/ppc.pl;	$(PERL) asm/ppc.pl $
   osx_ppc32.s: asm/ppc.pl;	$(PERL) $< $
  +osx_ppc64.s: asm/ppc.pl;	$(PERL) $< $
  +
  +linux_ppc32-mont.s: asm/ppc-mont.pl;	$(PERL) $< $
  +linux_ppc64-mont.s: asm/ppc-mont.pl;	$(PERL) $< $
  +aix_ppc32-mont.s: asm/ppc-mont.pl;	$(PERL)
asm/ppc-mont.pl $
  +aix_ppc64-mont.s: asm/ppc-mont.pl;	$(PERL)
asm/ppc-mont.pl $
  +osx_ppc32-mont.s: asm/ppc-mont.pl;	$(PERL) $< $
  +osx_ppc64-mont.s: asm/ppc-mont.pl;	$(PERL) $< $
   
   files:
   	$(PERL) $(TOP)/util/files.pl Makefile >>
$(TOP)/MINFO
   .
  patch -p0 <<' .'
  Index: openssl/crypto/bn/asm/ppc-mont.pl
 
============================================================
================
  $ cvs diff -u -r0 -r1.1 ppc-mont.pl
  --- /dev/null	2006-04-30 23:15:29 +0200
  +++ ppc-mont.pl	2006-04-30 23:15:29 +0200
   -0,0 +1,327 
  +#!/usr/bin/env perl
  +
  +#
============================================================
========
  +# Written by Andy Polyakov <approfy.chalmers.se> for the OpenSSL
  +# project. Rights for redistribution and usage in source
and binary
  +# forms are granted according to the OpenSSL license.
  +#
============================================================
========
  +
  +# April 2006
  +
  +# "Teaser" Montgomery multiplication module
for PowerPC. It's possible
  +# to gain a bit more by modulo-scheduling outer loop,
then dedicated
  +# squaring procedure should give further 20% and code can
be adapted
  +# for 32-bit application running on 64-bit CPU. As for
the latter.
  +# It won't be able to achieve "native"
64-bit performance, because in
  +# 32-bit application context every addc instruction will
have to be
  +# expanded as addc, twice right shift by 32 and finally
adde, etc.
  +# So far RSA *sign* performance improvement over
pre-bn_mul_mont asm
  +# for 64-bit application running on PPC970/G5 is:
  +#
  +# 512-bit	+65%	
  +# 1024-bit	+35%
  +# 2048-bit	+18%
  +# 4096-bit	+4%
  +
  +$output = shift;
  +
  +if ($output =~ /32\-mont\.s/) {
  +	$BITS=	32;
  +	$BNSZ=	$BITS/8;
  +	$SIZE_T=4;
  +	$RZONE=	224;
  +	$FRAME=	$SIZE_T*16;
  +
  +	$LD=	"lwz";		# load
  +	$LDU=	"lwzu";		# load and update
  +	$LDX=	"lwzx";		# load indexed
  +	$ST=	"stw";		# store
  +	$STU=	"stwu";		# store and update
  +	$STX=	"stwx";		# store indexed
  +	$STUX=	"stwux";	# store indexed and update
  +	$UMULL=	"mullw";	# unsigned multiply low
  +	$UMULH=	"mulhwu";	# unsigned multiply high
  +	$UCMP=	"cmplw";	# unsigned compare
  +	$PUSH=	$ST;
  +	$POP=	$LD;
  +} elsif ($output =~ /64\-mont\.s/) {
  +	$BITS=	64;
  +	$BNSZ=	$BITS/8;
  +	$SIZE_T=8;
  +	$RZONE=	288;
  +	$FRAME=	$SIZE_T*16;
  +
  +	# same as above, but 64-bit mnemonics...
  +	$LD=	"ld";		# load
  +	$LDU=	"ldu";		# load and update
  +	$LDX=	"ldx";		# load indexed
  +	$ST=	"std";		# store
  +	$STU=	"stdu";		# store and update
  +	$STX=	"stdx";		# store indexed
  +	$STUX=	"stdux";	# store indexed and update
  +	$UMULL=	"mulld";	# unsigned multiply low
  +	$UMULH=	"mulhdu";	# unsigned multiply high
  +	$UCMP=	"cmpld";	# unsigned compare
  +	$PUSH=	$ST;
  +	$POP=	$LD;
  +} else { die "nonsense $output"; }
  +
  +( defined shift || open STDOUT,"| $^X
../perlasm/ppc-xlate.pl $output" ) ||
  +	die "can't call ../perlasm/ppc-xlate.pl:
$!";
  +
  +$sp="r1";
  +$toc="r2";
  +$rp="r3";	$ovf="r3";
  +$ap="r4";
  +$bp="r5";
  +$np="r6";
  +$n0="r7";
  +$num="r8";
  +$rp="r9";	# $rp is reassigned
  +$aj="r10";
  +$nj="r11";
  +$tj="r12";
  +# non-volatile registers
  +$i="r14";
  +$j="r15";
  +$tp="r16";
  +$m0="r17";
  +$m1="r18";
  +$lo0="r19";
  +$hi0="r20";
  +$lo1="r21";
  +$hi1="r22";
  +$alo="r23";
  +$ahi="r24";
  +$nlo="r25";
  +#
  +$nhi="r0";
  +
  +$code=<<___;
  +.text
  +
  +.globl	.bn_mul_mont
  +.align	4
  +.bn_mul_mont:
  +	cmpwi	$num,4
  +	mr	$rp,r3		; $rp is reassigned
  +	li	r3,0
  +	bltlr
  +
  +	slwi	$num,$num,`log($BNSZ)/log(2)`
  +	li	$tj,-4096
  +	addi	$ovf,$num,`$FRAME+$RZONE`
  +	subf	$ovf,$ovf,$sp	; $sp-$ovf
  +	and	$ovf,$ovf,$tj	; minimize TLB usage
  +	subf	$ovf,$sp,$ovf	; $ovf-$sp
  +	srwi	$num,$num,`log($BNSZ)/log(2)`
  +	$STUX	$sp,$sp,$ovf
  +
  +	$PUSH	r14,`4*$SIZE_T`($sp)
  +	$PUSH	r15,`5*$SIZE_T`($sp)
  +	$PUSH	r16,`6*$SIZE_T`($sp)
  +	$PUSH	r17,`7*$SIZE_T`($sp)
  +	$PUSH	r18,`8*$SIZE_T`($sp)
  +	$PUSH	r19,`9*$SIZE_T`($sp)
  +	$PUSH	r20,`10*$SIZE_T`($sp)
  +	$PUSH	r21,`11*$SIZE_T`($sp)
  +	$PUSH	r22,`12*$SIZE_T`($sp)
  +	$PUSH	r23,`13*$SIZE_T`($sp)
  +	$PUSH	r24,`14*$SIZE_T`($sp)
  +	$PUSH	r25,`15*$SIZE_T`($sp)
  +
  +	$LD	$n0,0($n0)	; pull n0[0] value
  +	addi	$num,$num,-2	; adjust $num for counter register
  +
  +	$LD	$m0,0($bp)	; m0=bp[0]
  +	$LD	$aj,0($ap)	; ap[0]
  +	addi	$tp,$sp,$FRAME
  +	$UMULL	$lo0,$aj,$m0	; ap[0]*bp[0]
  +	$UMULH	$hi0,$aj,$m0
  +
  +	$LD	$aj,$BNSZ($ap)	; ap[1]
  +	$LD	$nj,0($np)	; np[0]
  +
  +	$UMULL	$m1,$lo0,$n0	; "tp[0]"*n0
  +
  +	$UMULL	$alo,$aj,$m0	; ap[1]*bp[0]
  +	$UMULH	$ahi,$aj,$m0
  +
  +	$UMULL	$lo1,$nj,$m1	; np[0]*m1
  +	$UMULH	$hi1,$nj,$m1
  +	$LD	$nj,$BNSZ($np)	; np[1]
  +	addc	$lo1,$lo1,$lo0
  +	addze	$hi1,$hi1
  +
  +	$UMULL	$nlo,$nj,$m1	; np[1]*m1
  +	$UMULH	$nhi,$nj,$m1
  +
  +	mtctr	$num
  +	li	$j,`2*$BNSZ`
  +.align	4
  +L1st:
  +	$LDX	$aj,$ap,$j	; ap[j]
  +	$LDX	$nj,$np,$j	; np[j]
  +	addc	$lo0,$alo,$hi0
  +	addze	$hi0,$ahi
  +	$UMULL	$alo,$aj,$m0	; ap[j]*bp[0]
  +	$UMULH	$ahi,$aj,$m0
  +
  +	addc	$lo1,$nlo,$hi1
  +	addze	$hi1,$nhi
  +	$UMULL	$nlo,$nj,$m1	; np[j]*m1
  +	$UMULH	$nhi,$nj,$m1
  +	addc	$lo1,$lo1,$lo0	; np[j]*m1+ap[j]*bp[0]
  +	addze	$hi1,$hi1
  +	$ST	$lo1,0($tp)	; tp[j-1]
  +
  +	addi	$j,$j,$BNSZ	; j++
  +	addi	$tp,$tp,$BNSZ	; tp++
  +	bdnz-	L1st
  +;L1st
  +	addc	$lo0,$alo,$hi0
  +	addze	$hi0,$ahi
  +
  +	addc	$lo1,$nlo,$hi1
  +	addze	$hi1,$nhi
  +	addc	$lo1,$lo1,$lo0	; np[j]*m1+ap[j]*bp[0]
  +	addze	$hi1,$hi1
  +	$ST	$lo1,0($tp)	; tp[j-1]
  +
  +	li	$ovf,0
  +	addc	$hi1,$hi1,$hi0
  +	addze	$ovf,$ovf	; upmost overflow bit
  +	$ST	$hi1,$BNSZ($tp)
  +
  +	li	$i,$BNSZ
  +.align	4
  +Louter:
  +	$LDX	$m0,$bp,$i	; m0=bp[i]
  +	$LD	$aj,0($ap)	; ap[0]
  +	addi	$tp,$sp,$FRAME
  +	$LD	$tj,$FRAME($sp)	; tp[0]
  +	$UMULL	$lo0,$aj,$m0	; ap[0]*bp[i]
  +	$UMULH	$hi0,$aj,$m0
  +	$LD	$aj,$BNSZ($ap)	; ap[1]
  +	$LD	$nj,0($np)	; np[0]
  +	addc	$lo0,$lo0,$tj	; ap[0]*bp[i]+tp[0]
  +	addze	$hi0,$hi0
  +
  +	$UMULL	$m1,$lo0,$n0	; tp[0]*n0
  +
  +	$UMULL	$alo,$aj,$m0	; ap[j]*bp[i]
  +	$UMULH	$ahi,$aj,$m0
  +
  +	$UMULL	$lo1,$nj,$m1	; np[0]*m1
  +	$UMULH	$hi1,$nj,$m1
  +	$LD	$nj,$BNSZ($np)	; np[1]
  +	addc	$lo1,$lo1,$lo0
  +	addze	$hi1,$hi1
  +
  +	$UMULL	$nlo,$nj,$m1	; np[1]*m1
  +	$UMULH	$nhi,$nj,$m1
  +
  +	mtctr	$num
  +	li	$j,`2*$BNSZ`
  +.align	4
  +Linner:
  +	$LDX	$aj,$ap,$j	; ap[j]
  +	$LD	$tj,$BNSZ($tp)	; tp[j]
  +	addc	$lo0,$alo,$hi0
  +	addze	$hi0,$ahi
  +	$LDX	$nj,$np,$j	; np[j]
  +	addc	$lo0,$lo0,$tj	; ap[j]*bp[i]+tp[j]
  +	addze	$hi0,$hi0
  +	$UMULL	$alo,$aj,$m0	; ap[j]*bp[i]
  +	$UMULH	$ahi,$aj,$m0
  +
  +	addc	$lo1,$nlo,$hi1
  +	addze	$hi1,$nhi
  +	$UMULL	$nlo,$nj,$m1	; np[j]*m1
  +	$UMULH	$nhi,$nj,$m1
  +	addc	$lo1,$lo1,$lo0	; np[j]*m1+ap[j]*bp[i]+tp[j]
  +	addze	$hi1,$hi1
  +	$ST	$lo1,0($tp)	; tp[j-1]
  +
  +	addi	$j,$j,$BNSZ	; j++
  +	addi	$tp,$tp,$BNSZ	; tp++
  +	bdnz-	Linner
  +;Linner
  +	$LD	$tj,$BNSZ($tp)	; tp[j]
  +	addc	$lo0,$alo,$hi0
  +	addze	$hi0,$ahi
  +	addc	$lo0,$lo0,$tj	; ap[j]*bp[i]+tp[j]
  +	addze	$hi0,$hi0
  +
  +	addc	$lo1,$nlo,$hi1
  +	addze	$hi1,$nhi
  +	addc	$lo1,$lo1,$lo0	; np[j]*m1+ap[j]*bp[i]+tp[j]
  +	addze	$hi1,$hi1
  +	$ST	$lo1,0($tp)	; tp[j-1]
  +
  +	addic	$ovf,$ovf,-1	; move upmost overflow to XER[CA]
  +	li	$ovf,0
  +	adde	$hi1,$hi1,$hi0
  +	addze	$ovf,$ovf
  +	$ST	$hi1,$BNSZ($tp)
  +;
  +	slwi	$tj,$num,`log($BNSZ)/log(2)`
  +	$UCMP	$i,$tj
  +	addi	$i,$i,$BNSZ
  +	ble-	Louter
  +
  +	addi	$num,$num,2	; restore $num
  +	addi	$tp,$sp,$FRAME
  +	mtctr	$num
  +	li	$j,0
  +
  +	subfc.	$ovf,$j,$ovf	; sets XER[CA]
  +	bne	Lsub
  +	$UCMP	$hi1,$nj
  +	bge	Lsub
  +.align	4
  +Lcopy:
  +	$LDX	$tj,$tp,$j
  +	$STX	$tj,$rp,$j
  +	$STX	$j,$tp,$j	; zap at once
  +	addi	$j,$j,$BNSZ
  +	bdnz-	Lcopy
  +
  +Lexit:
  +	$POP	r14,`4*$SIZE_T`($sp)
  +	$POP	r15,`5*$SIZE_T`($sp)
  +	$POP	r16,`6*$SIZE_T`($sp)
  +	$POP	r17,`7*$SIZE_T`($sp)
  +	$POP	r18,`8*$SIZE_T`($sp)
  +	$POP	r19,`9*$SIZE_T`($sp)
  +	$POP	r20,`10*$SIZE_T`($sp)
  +	$POP	r21,`11*$SIZE_T`($sp)
  +	$POP	r22,`12*$SIZE_T`($sp)
  +	$POP	r23,`13*$SIZE_T`($sp)
  +	$POP	r24,`14*$SIZE_T`($sp)
  +	$POP	r25,`15*$SIZE_T`($sp)
  +	$POP	$sp,0($sp)
  +	li	r3,1
  +	blr
  +	.long	0
  +.align	4
  +Lsub:	$LDX	$tj,$tp,$j
  +	$LDX	$nj,$np,$j
  +	subfe	$tj,$nj,$tj	; tp[j]-np[j]
  +	$STX	$tj,$rp,$j
  +	addi	$j,$j,$BNSZ
  +	bdnz-	Lsub
  +	li	$j,0
  +	subfe.	$ovf,$j,$ovf
  +	mtctr	$num
  +	bne	Lcopy
  +.align	4
  +Lzap:	$STX	$j,$tp,$j
  +	addi	$j,$j,$BNSZ
  +	bdnz-	Lzap
  +	b	Lexit
  +___
  +
  +$code =~ s/\`([^\`]*)\`/eval $1/gem;
  +print $code;
  +close STDOUT;
   .
  patch -p0 <<' .'
  Index: openssl/crypto/perlasm/ppc-xlate.pl
 
============================================================
================
  $ cvs diff -u -r0 -r1.1 ppc-xlate.pl
  --- /dev/null	2006-04-30 23:15:19 +0200
  +++ ppc-xlate.pl	2006-04-30 23:15:28 +0200
   -0,0 +1,113 
  +#!/usr/bin/env perl
  +
  +# PowerPC assembler distiller by <appro>.
  +
  +my $output = shift;
  +open STDOUT,">$output" || die
"can't open $output: $!";
  +
  +my $flavour = $output;
  +my %GLOBALS;
  +my $dotinlocallabels=0;
  +
 
+###########################################################
#####
  +# directives which need special treatment on different
platforms
 
+###########################################################
#####
  +my $globl = sub {
  +    my $junk = shift;
  +    my $name = shift;
  +    my $global = \$GLOBALS{$name};
  +    my $ret;
  +
  +    $name =~ s|^[\.\_]||;
  + 
  +    SWITCH: for ($flavour) {
  +	/aix/		&& do { $name = ".$name";
  +				last;
  +			      };
  +	/osx/		&& do { $name = "_$name";
  +				last;
  +			      };
  +	/linux.*32/	&& do {	$ret .=
".globl	$name\n";
  +				$ret .= ".type	$name,\function";
  +				$dotinlocallabels = 1;
  +				last;
  +			      };
  +	/linux.*64/	&& do {	$ret .=
".globl	.$name\n";
  +				$ret .= ".type	.$name,\function\n";
  +				$ret .=
".section	\".opd\",\"aw\"
n";
  +				$ret .= ".globl	$name\n";
  +				$ret .= ".align	3\n";
  +				$ret .= "$name:\n";
  +				$ret .= ".quad	.$name,.TOC.\tocbase,0\n";
  +				$ret .= ".size	$name,24\n";
  +				$ret .= ".previous\n";
  +
  +				$name = ".$name";
  +				$dotinlocallabels = 1;
  +				last;
  +			      };
  +    }
  +
  +    $ret = ".globl	$name" if (!$ret);
  +    $$global = $name;
  +    $ret;
  +};
  +my $machine = sub {
  +    my $junk = shift;
  +    my $arch = shift;
  +    $arch = "ppc970" if ($arch eq
"any" and $flavour =~ /osx/);
  +    ".machine	$arch";
  +};
  +
 
+###########################################################
#####
  +# simplified mnemonics not handled by at least one
assembler
 
+###########################################################
#####
  +my $cmplw = sub {
  +    my $f = shift;
  +    my $cr = 0; $cr = shift if ($#_>1);
  +    "	cmpl$f	".join(',',$cr,0,_);
  +};
  +my $cmpld = sub {
  +    my $f = shift;
  +    my $cr = 0; $cr = shift if ($#_>1);
  +    "	cmpl$f	".join(',',$cr,1,_);
  +};
  +my $bdnz = sub {
  +    my $f = shift;
  +    my $bo = $f=~/[\+\-]/ ? 17 : 16;
  +    "	bc	$bo,0,".shift;
  +};
  +
  +while($line=<>) {
  +
  +    $line =~ s|[#!;].*$||;	# get rid of asm-style
comments...
  +    $line =~ s|/\*.*\*/||;	# ... and C-style
comments...
  +    $line =~ s|^\s+||;		# ... and skip white spaces in
beginning...
  +    $line =~ s|\s+$||;		# ... and at the end
  +
  +    {
  +	$line =~ s|\b\.L(\w+)|L$1|g;	# common denominator for
Locallabel
  +	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
  +    }
  +
  +    {
  +	$line =~ s|(^[\.\w]+)\:\s*||;
  +	my $label = $1;
  +	printf "%s:",($GLOBALS{$label} or $label) if
($label);
  +    }
  +
  +    {
  +	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
  +	my $c = $1; $c = "\t" if ($c eq
"");
  +	my $mnemonic = $2;
  +	my $f = $3;
  +	my $opcode = eval("\$$mnemonic");
  +	if (ref($opcode) eq 'CODE') { $line =
&$opcode($f,split(',',$line)); }
  +	elsif ($mnemonic)           { $line =
$c.$mnemonic.$f."\t".$line; }
  +    }
  +
  +    print $line if ($line);
  +    print "\n";
  +}
  +
  +close STDOUT;
   .
____________________________________________________________
__________
OpenSSL Project                                 http://www.openssl.org
CVS Repository Commit List                    
openssl-cvsopenssl.org
Automated List Manager                          
majordomoopenssl.org
[1]

about | contact  Other archives ( Real Estate discussion Medical topics )