OpenSSL CVS Repository
http://cvs.openssl.org/
____________________________________________________________
________________
Server: cvs.openssl.org Name: Andy
Polyakov
Root: /v/openssl/cvs Email: appro openssl.org
Module: openssl Date:
30-Apr-2006 23:15:30
Branch: HEAD Handle:
2006043022152504
Added files:
openssl/crypto/bn/asm ppc-mont.pl
openssl/crypto/perlasm ppc-xlate.pl
Modified files:
openssl Configure
openssl/crypto/bn Makefile
Log:
Yet another "teaser" Montgomery
multiplication module, for PowerPC.
Summary:
Revision Changes Path
1.533 +9 -8 openssl/Configure
1.18 +8 -0 openssl/crypto/bn/Makefile
1.1 +327 -0
openssl/crypto/bn/asm/ppc-mont.pl
1.1 +113 -0
openssl/crypto/perlasm/ppc-xlate.pl
____________________________________________________________
________________
patch -p0 <<' .'
Index: openssl/Configure
============================================================
================
$ cvs diff -u -r1.532 -r1.533 Configure
--- openssl/Configure 16 Apr 2006 14:42:54 -0000 1.532
+++ openssl/Configure 30 Apr 2006 21:15:25 -0000 1.533
 -314,7 +314,7 
# *-generic* is endian-neutral target, but ./config is
free to
# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG
RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJ
OR).\$(SHLIB_MINOR)",
-"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK
DES_RISC1
DES_UNROLL::linux_ppc32.o:::::::::::dlfcn:linux-shared:-fPIC
::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK
DES_RISC1 DES_UNROLL::linux_ppc32.o
linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### IA-32 targets...
"linux-ia32-icc", "icc:-DL_ENDIAN
-DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG
$
$:$:dlfcn:linux-shared:-KPIC::.so
.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO
-O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG
$
$:$:dlfcn:linux-shared:-fPIC::.so
.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 -322,7 +322,7 
####
"linux-generic64","gcc:-DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJ
OR).\$(SHLIB_MINOR)",
# -bpowerpc64-linux is transient option, -m64 should be
the one to use...
-"linux-ppc64", "gcc:-bpowerpc64-linux
-DB_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1
DES_UNROLL::linux_ppc64.o:::::::::::dlfcn:linux-shared:-fPIC
:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)&qu
ot;,
+"linux-ppc64", "gcc:-bpowerpc64-linux
-DB_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o
linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowe
rpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO
-O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_UNROLL
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-ecc","ecc:-DL_ENDIAN
-DTERMIO -O2 -Wall
-no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-icc","icc:-DL_ENDIAN
-DTERMIO -O2 -Wall
-no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK
DES_RISC1
DES_INT:$:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_
MAJOR).\$(SHLIB_MINOR)",
 -407,12 +407,12 
#### IBM's AIX.
"aix3-cc", "cc:-O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
-"aix-gcc", "gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o:::::::::::dlfcn:",
-"aix64-gcc","gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o:::::::::::dlfcn::::::-X64",
+"aix-gcc", "gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o
aix_ppc32-mont.o:::::::::::dlfcn:",
+"aix64-gcc","gcc:-O
-DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o
aix_ppc64-mont.o:::::::::::dlfcn::::::-X64",
# Below targets assume AIX 5. Idea is to effectively
disregard $OBJECT_MODE
# at build time. $OBJECT_MODE is respected at ./config
stage!
-"aix-cc", "cc:-q32 -O -DB_ENDIAN
-qmaxmem=16384::-qthreaded:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o:::::::::::dlfcn:aix-shared::-q32:.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
-"aix64-cc", "cc:-q64 -O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o:::::::::::dlfcn:aix-shared::-q64:.so.
$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
+"aix-cc", "cc:-q32 -O -DB_ENDIAN
-qmaxmem=16384::-qthreaded:AIX::BN_LLONG
RC4_CHAR::aix_ppc32.o
aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SH
LIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
+"aix64-cc", "cc:-q64 -O -DB_ENDIAN
-qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG
RC4_CHAR::aix_ppc64.o
aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SH
LIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
#
# Cray T90 and similar (SDSC)
 -504,9 +504,10 
##### MacOS X (a.k.a. Rhapsody or Darwin) setup
"rhapsody-ppc-cc","cc:-O3
-DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR
RC4_CHUNK DES_UNROLL BF_PTR:$::",
-"darwin-ppc-cc","cc:-O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_L
LONG RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR::osx_ppc32.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
+"darwin-ppc-cc","cc:-O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_L
LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o
osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
+"darwin64-ppc-cc","cc:-m64 -O3
-DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXT
Y_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR::osx_ppc64.o
osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC
-fno-common:-m64
-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"darwin-i386-cc","cc:-O3
-fomit-frame-pointer
-DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL BF_PTR:$:dlfcn:darwin-shared:-fPIC
-fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).d
ylib",
-"debug-darwin-ppc-cc","cc:-DBN_DEBUG
-DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall
-O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL
BF_PTR::osx_ppc32.o:::::::::::dlfcn:darwin-shared:-fPIC:-dyn
amiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"debug-darwin-ppc-cc","cc:-DBN_DEBUG
-DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall
-O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK
DES_UNROLL BF_PTR::osx_ppc32.o
osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynami
clib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
##### A/UX
"aux3-gcc","gcc:-O2
-DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL
BF_PTR:::",
 .
patch -p0 <<' .'
Index: openssl/crypto/bn/Makefile
============================================================
================
$ cvs diff -u -r1.17 -r1.18 Makefile
--- openssl/crypto/bn/Makefile 12 Feb 2006 23:21:49
-0000 1.17
+++ openssl/crypto/bn/Makefile 30 Apr 2006 21:15:27
-0000 1.18
 -120,6 +120,14 
aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $
aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $
osx_ppc32.s: asm/ppc.pl; $(PERL) $< $
+osx_ppc64.s: asm/ppc.pl; $(PERL) $< $
+
+linux_ppc32-mont.s: asm/ppc-mont.pl; $(PERL) $< $
+linux_ppc64-mont.s: asm/ppc-mont.pl; $(PERL) $< $
+aix_ppc32-mont.s: asm/ppc-mont.pl; $(PERL)
asm/ppc-mont.pl $
+aix_ppc64-mont.s: asm/ppc-mont.pl; $(PERL)
asm/ppc-mont.pl $
+osx_ppc32-mont.s: asm/ppc-mont.pl; $(PERL) $< $
+osx_ppc64-mont.s: asm/ppc-mont.pl; $(PERL) $< $
files:
$(PERL) $(TOP)/util/files.pl Makefile >>
$(TOP)/MINFO
 .
patch -p0 <<' .'
Index: openssl/crypto/bn/asm/ppc-mont.pl
============================================================
================
$ cvs diff -u -r0 -r1.1 ppc-mont.pl
--- /dev/null 2006-04-30 23:15:29 +0200
+++ ppc-mont.pl 2006-04-30 23:15:29 +0200
 -0,0 +1,327 
+#!/usr/bin/env perl
+
+#
============================================================
========
+# Written by Andy Polyakov <appro fy.chalmers.se> for the OpenSSL
+# project. Rights for redistribution and usage in source
and binary
+# forms are granted according to the OpenSSL license.
+#
============================================================
========
+
+# April 2006
+
+# "Teaser" Montgomery multiplication module
for PowerPC. It's possible
+# to gain a bit more by modulo-scheduling outer loop,
then dedicated
+# squaring procedure should give further 20% and code can
be adapted
+# for 32-bit application running on 64-bit CPU. As for
the latter.
+# It won't be able to achieve "native"
64-bit performance, because in
+# 32-bit application context every addc instruction will
have to be
+# expanded as addc, twice right shift by 32 and finally
adde, etc.
+# So far RSA *sign* performance improvement over
pre-bn_mul_mont asm
+# for 64-bit application running on PPC970/G5 is:
+#
+# 512-bit +65%
+# 1024-bit +35%
+# 2048-bit +18%
+# 4096-bit +4%
+
+$output = shift;
+
+if ($output =~ /32\-mont\.s/) {
+ $BITS= 32;
+ $BNSZ= $BITS/8;
+ $SIZE_T=4;
+ $RZONE= 224;
+ $FRAME= $SIZE_T*16;
+
+ $LD= "lwz"; # load
+ $LDU= "lwzu"; # load and update
+ $LDX= "lwzx"; # load indexed
+ $ST= "stw"; # store
+ $STU= "stwu"; # store and update
+ $STX= "stwx"; # store indexed
+ $STUX= "stwux"; # store indexed and update
+ $UMULL= "mullw"; # unsigned multiply low
+ $UMULH= "mulhwu"; # unsigned multiply high
+ $UCMP= "cmplw"; # unsigned compare
+ $PUSH= $ST;
+ $POP= $LD;
+} elsif ($output =~ /64\-mont\.s/) {
+ $BITS= 64;
+ $BNSZ= $BITS/8;
+ $SIZE_T=8;
+ $RZONE= 288;
+ $FRAME= $SIZE_T*16;
+
+ # same as above, but 64-bit mnemonics...
+ $LD= "ld"; # load
+ $LDU= "ldu"; # load and update
+ $LDX= "ldx"; # load indexed
+ $ST= "std"; # store
+ $STU= "stdu"; # store and update
+ $STX= "stdx"; # store indexed
+ $STUX= "stdux"; # store indexed and update
+ $UMULL= "mulld"; # unsigned multiply low
+ $UMULH= "mulhdu"; # unsigned multiply high
+ $UCMP= "cmpld"; # unsigned compare
+ $PUSH= $ST;
+ $POP= $LD;
+} else { die "nonsense $output"; }
+
+( defined shift || open STDOUT,"| $^X
../perlasm/ppc-xlate.pl $output" ) ||
+ die "can't call ../perlasm/ppc-xlate.pl:
$!";
+
+$sp="r1";
+$toc="r2";
+$rp="r3"; $ovf="r3";
+$ap="r4";
+$bp="r5";
+$np="r6";
+$n0="r7";
+$num="r8";
+$rp="r9"; # $rp is reassigned
+$aj="r10";
+$nj="r11";
+$tj="r12";
+# non-volatile registers
+$i="r14";
+$j="r15";
+$tp="r16";
+$m0="r17";
+$m1="r18";
+$lo0="r19";
+$hi0="r20";
+$lo1="r21";
+$hi1="r22";
+$alo="r23";
+$ahi="r24";
+$nlo="r25";
+#
+$nhi="r0";
+
+$code=<<___;
+.text
+
+.globl .bn_mul_mont
+.align 4
+.bn_mul_mont:
+ cmpwi $num,4
+ mr $rp,r3 ; $rp is reassigned
+ li r3,0
+ bltlr
+
+ slwi $num,$num,`log($BNSZ)/log(2)`
+ li $tj,-4096
+ addi $ovf,$num,`$FRAME+$RZONE`
+ subf $ovf,$ovf,$sp ; $sp-$ovf
+ and $ovf,$ovf,$tj ; minimize TLB usage
+ subf $ovf,$sp,$ovf ; $ovf-$sp
+ srwi $num,$num,`log($BNSZ)/log(2)`
+ $STUX $sp,$sp,$ovf
+
+ $PUSH r14,`4*$SIZE_T`($sp)
+ $PUSH r15,`5*$SIZE_T`($sp)
+ $PUSH r16,`6*$SIZE_T`($sp)
+ $PUSH r17,`7*$SIZE_T`($sp)
+ $PUSH r18,`8*$SIZE_T`($sp)
+ $PUSH r19,`9*$SIZE_T`($sp)
+ $PUSH r20,`10*$SIZE_T`($sp)
+ $PUSH r21,`11*$SIZE_T`($sp)
+ $PUSH r22,`12*$SIZE_T`($sp)
+ $PUSH r23,`13*$SIZE_T`($sp)
+ $PUSH r24,`14*$SIZE_T`($sp)
+ $PUSH r25,`15*$SIZE_T`($sp)
+
+ $LD $n0,0($n0) ; pull n0[0] value
+ addi $num,$num,-2 ; adjust $num for counter register
+
+ $LD $m0,0($bp) ; m0=bp[0]
+ $LD $aj,0($ap) ; ap[0]
+ addi $tp,$sp,$FRAME
+ $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
+ $UMULH $hi0,$aj,$m0
+
+ $LD $aj,$BNSZ($ap) ; ap[1]
+ $LD $nj,0($np) ; np[0]
+
+ $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
+
+ $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
+ $UMULH $ahi,$aj,$m0
+
+ $UMULL $lo1,$nj,$m1 ; np[0]*m1
+ $UMULH $hi1,$nj,$m1
+ $LD $nj,$BNSZ($np) ; np[1]
+ addc $lo1,$lo1,$lo0
+ addze $hi1,$hi1
+
+ $UMULL $nlo,$nj,$m1 ; np[1]*m1
+ $UMULH $nhi,$nj,$m1
+
+ mtctr $num
+ li $j,`2*$BNSZ`
+.align 4
+L1st:
+ $LDX $aj,$ap,$j ; ap[j]
+ $LDX $nj,$np,$j ; np[j]
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
+ $UMULH $ahi,$aj,$m0
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ $UMULL $nlo,$nj,$m1 ; np[j]*m1
+ $UMULH $nhi,$nj,$m1
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ addi $j,$j,$BNSZ ; j++
+ addi $tp,$tp,$BNSZ ; tp++
+ bdnz- L1st
+;L1st
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ li $ovf,0
+ addc $hi1,$hi1,$hi0
+ addze $ovf,$ovf ; upmost overflow bit
+ $ST $hi1,$BNSZ($tp)
+
+ li $i,$BNSZ
+.align 4
+Louter:
+ $LDX $m0,$bp,$i ; m0=bp[i]
+ $LD $aj,0($ap) ; ap[0]
+ addi $tp,$sp,$FRAME
+ $LD $tj,$FRAME($sp) ; tp[0]
+ $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
+ $UMULH $hi0,$aj,$m0
+ $LD $aj,$BNSZ($ap) ; ap[1]
+ $LD $nj,0($np) ; np[0]
+ addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
+ addze $hi0,$hi0
+
+ $UMULL $m1,$lo0,$n0 ; tp[0]*n0
+
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
+ $UMULH $ahi,$aj,$m0
+
+ $UMULL $lo1,$nj,$m1 ; np[0]*m1
+ $UMULH $hi1,$nj,$m1
+ $LD $nj,$BNSZ($np) ; np[1]
+ addc $lo1,$lo1,$lo0
+ addze $hi1,$hi1
+
+ $UMULL $nlo,$nj,$m1 ; np[1]*m1
+ $UMULH $nhi,$nj,$m1
+
+ mtctr $num
+ li $j,`2*$BNSZ`
+.align 4
+Linner:
+ $LDX $aj,$ap,$j ; ap[j]
+ $LD $tj,$BNSZ($tp) ; tp[j]
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+ $LDX $nj,$np,$j ; np[j]
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
+ addze $hi0,$hi0
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
+ $UMULH $ahi,$aj,$m0
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ $UMULL $nlo,$nj,$m1 ; np[j]*m1
+ $UMULH $nhi,$nj,$m1
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ addi $j,$j,$BNSZ ; j++
+ addi $tp,$tp,$BNSZ ; tp++
+ bdnz- Linner
+;Linner
+ $LD $tj,$BNSZ($tp) ; tp[j]
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
+ addze $hi0,$hi0
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
+ li $ovf,0
+ adde $hi1,$hi1,$hi0
+ addze $ovf,$ovf
+ $ST $hi1,$BNSZ($tp)
+;
+ slwi $tj,$num,`log($BNSZ)/log(2)`
+ $UCMP $i,$tj
+ addi $i,$i,$BNSZ
+ ble- Louter
+
+ addi $num,$num,2 ; restore $num
+ addi $tp,$sp,$FRAME
+ mtctr $num
+ li $j,0
+
+ subfc. $ovf,$j,$ovf ; sets XER[CA]
+ bne Lsub
+ $UCMP $hi1,$nj
+ bge Lsub
+.align 4
+Lcopy:
+ $LDX $tj,$tp,$j
+ $STX $tj,$rp,$j
+ $STX $j,$tp,$j ; zap at once
+ addi $j,$j,$BNSZ
+ bdnz- Lcopy
+
+Lexit:
+ $POP r14,`4*$SIZE_T`($sp)
+ $POP r15,`5*$SIZE_T`($sp)
+ $POP r16,`6*$SIZE_T`($sp)
+ $POP r17,`7*$SIZE_T`($sp)
+ $POP r18,`8*$SIZE_T`($sp)
+ $POP r19,`9*$SIZE_T`($sp)
+ $POP r20,`10*$SIZE_T`($sp)
+ $POP r21,`11*$SIZE_T`($sp)
+ $POP r22,`12*$SIZE_T`($sp)
+ $POP r23,`13*$SIZE_T`($sp)
+ $POP r24,`14*$SIZE_T`($sp)
+ $POP r25,`15*$SIZE_T`($sp)
+ $POP $sp,0($sp)
+ li r3,1
+ blr
+ .long 0
+.align 4
+Lsub: $LDX $tj,$tp,$j
+ $LDX $nj,$np,$j
+ subfe $tj,$nj,$tj ; tp[j]-np[j]
+ $STX $tj,$rp,$j
+ addi $j,$j,$BNSZ
+ bdnz- Lsub
+ li $j,0
+ subfe. $ovf,$j,$ovf
+ mtctr $num
+ bne Lcopy
+.align 4
+Lzap: $STX $j,$tp,$j
+ addi $j,$j,$BNSZ
+ bdnz- Lzap
+ b Lexit
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
 .
patch -p0 <<' .'
Index: openssl/crypto/perlasm/ppc-xlate.pl
============================================================
================
$ cvs diff -u -r0 -r1.1 ppc-xlate.pl
--- /dev/null 2006-04-30 23:15:19 +0200
+++ ppc-xlate.pl 2006-04-30 23:15:28 +0200
 -0,0 +1,113 
+#!/usr/bin/env perl
+
+# PowerPC assembler distiller by <appro>.
+
+my $output = shift;
+open STDOUT,">$output" || die
"can't open $output: $!";
+
+my $flavour = $output;
+my %GLOBALS;
+my $dotinlocallabels=0;
+
+###########################################################
#####
+# directives which need special treatment on different
platforms
+###########################################################
#####
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ $name =~ s|^[\.\_]||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { $name = ".$name";
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux.*32/ && do { $ret .=
".globl $name\n";
+ $ret .= ".type $name,\ function";
+ $dotinlocallabels = 1;
+ last;
+ };
+ /linux.*64/ && do { $ret .=
".globl .$name\n";
+ $ret .= ".type .$name,\ function\n";
+ $ret .=
".section \".opd\",\"aw\"
n";
+ $ret .= ".globl $name\n";
+ $ret .= ".align 3\n";
+ $ret .= "$name:\n";
+ $ret .= ".quad .$name,.TOC.\ tocbase,0\n";
+ $ret .= ".size $name,24\n";
+ $ret .= ".previous\n";
+
+ $name = ".$name";
+ $dotinlocallabels = 1;
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ $arch = "ppc970" if ($arch eq
"any" and $flavour =~ /osx/);
+ ".machine $arch";
+};
+
+###########################################################
#####
+# simplified mnemonics not handled by at least one
assembler
+###########################################################
#####
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ " cmpl$f ".join(',',$cr,0, _);
+};
+my $cmpld = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ " cmpl$f ".join(',',$cr,1, _);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 17 : 16;
+ " bc $bo,0,".shift;
+};
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style
comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style
comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in
beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for
Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ printf "%s:",($GLOBALS{$label} or $label) if
($label);
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq
"");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ if (ref($opcode) eq 'CODE') { $line =
&$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line =
$c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
 .
____________________________________________________________
__________
OpenSSL Project http://www.openssl.org
CVS Repository Commit List
openssl-cvs openssl.org
Automated List Manager
majordomo openssl.org
|