List Info

Thread: OpenSSL: openssl/crypto/bn/asm/ ppc.pl




OpenSSL: openssl/crypto/bn/asm/ ppc.pl
user name
2006-10-17 14:37:08
  OpenSSL CVS Repository
  http://cvs.openssl.org/
 
____________________________________________________________
________________

  Server: cvs.openssl.org                  Name:   Andy
Polyakov
  Root:   /v/openssl/cvs                   Email:  approopenssl.org
  Module: openssl                          Date:  
17-Oct-2006 16:37:07
  Branch: HEAD                             Handle:
2006101715370700

  Modified files:
    openssl/crypto/bn/asm   ppc.pl

  Log:
    bn/asm/ppc.pl to use ppc-xlate.pl.

  Summary:
    Revision    Changes     Path
    1.5         +64 -160    openssl/crypto/bn/asm/ppc.pl
 
____________________________________________________________
________________

  patch -p0 <<' .'
  Index: openssl/crypto/bn/asm/ppc.pl
 
============================================================
================
  $ cvs diff -u -r1.4 -r1.5 ppc.pl
  --- openssl/crypto/bn/asm/ppc.pl	3 Jul 2005 09:17:50
-0000	1.4
  +++ openssl/crypto/bn/asm/ppc.pl	17 Oct 2006 14:37:07
-0000	1.5
   -151,91 +151,15 
   	$TR=	"td";		# conditional trap
   } else { die "nonsense $opf"; }
   
  -( defined shift || open STDOUT,">$opf" ) ||
die "can't open $opf: $!";
  +$0 =~ m/(.*[/\])[^/\]+$/; $dir=$1;
  +( $xlate="$ppc-xlate.pl" and -f $xlate )
or
  +( $xlate="$../../perlasm/ppc-xlate.pl" and
-f $xlate) or
  +die "can't locate ppc-xlate.pl";
   
  -# function entry points from the AIX code
  -#
  -# There are other, more elegant, ways to handle this. We
(IBM) chose
  -# this approach as it plays well with scripts we run to
'namespace'
  -# OpenSSL .i.e. we add a prefix to all the public symbols
so we can
  -# co-exist in the same process with other implementations
of OpenSSL.
  -# 'cleverer' ways of doing these substitutions tend to
hide data we
  -# need to be obvious.
  -#
  -my items = ("bn_sqr_comba4",
  -	     "bn_sqr_comba8",
  -	     "bn_mul_comba4",
  -	     "bn_mul_comba8",
  -	     "bn_sub_words",
  -	     "bn_add_words",
  -	     "bn_div_words",
  -	     "bn_sqr_words",
  -	     "bn_mul_words",
  -	     "bn_mul_add_words");
  -
  -if    ($opf =~ /linux/)	{  do_linux();	}
  -elsif ($opf =~ /aix/)	{  do_aix();	}
  -elsif ($opf =~ /osx/)	{  do_osx();	}
  -else			{  do_bsd();	}
  -
  -sub do_linux {
  -    $d=&data();
  -
  -    if ($BITS==64) {
  -      foreach $t (items) {
  -        $d =~ s/.$t:/
  -t.sectiont".opd","aw"
  -t.alignt3
  -t.globlt$t
  -$t:
  -t.quadt.$t,.TOC.tocbase,0
  -t.sizet$t,24
  -t.previousn
  -t.typet.$t,function
  -t.globlt.$t
  -.$t:/g;
  -      }
  -    }
  -    else {
  -      foreach $t (items) {
  -        $d=~s/.$t/$t/g;
  -      }
  -    }
  -    # hide internal labels to avoid pollution of name
table...
  -    $d=~s/Lppcasm_/.Lppcasm_/gm;
  -    print $d;
  -}
  -
  -sub do_aix {
  -    # AIX assembler is smart enough to please the linker
without
  -    # making us do something special...
  -    print &data();
  -}
  -
  -# MacOSX 32 bit
  -sub do_osx {
  -    $d=&data();
  -    # Change the bn symbol prefix from '.' to '_'
  -    foreach $t (items) {
  -      $d=~s/.$t/_$t/g;
  -    }
  -    # Change .machine to something OS X asm will accept
  -    $d=~s/.machine.*/.text/g;
  -    $d=~s/#/;/g; # change comment from '#' to ';'
  -    print $d;
  -}
  -
  -# BSD (Untested)
  -sub do_bsd {
  -    $d=&data();
  -    foreach $t (items) {
  -      $d=~s/.$t/_$t/g;
  -    }
  -    print $d;
  -}
  +( defined shift || open STDOUT,"| $^X $xlate
$opf" ) ||
  +	die "can't call $xlate: $!";
   
  -sub data {
  -	local($data)=<<EOF;
  +$data=<<EOF;
  
#-----------------------------------------------------------
---------
   #
   #
   -297,33 +221,20 
   #
   #	Defines to be used in the assembly code.
   #	
  -.set r0,0	# we use it as storage for value of 0
  -.set SP,1	# preserved
  -.set RTOC,2	# preserved 
  -.set r3,3	# 1st argument/return value
  -.set r4,4	# 2nd argument/volatile register
  -.set r5,5	# 3rd argument/volatile register
  -.set r6,6	# ...
  -.set r7,7
  -.set r8,8
  -.set r9,9
  -.set r10,10
  -.set r11,11
  -.set r12,12
  -.set r13,13	# not used, nor any other "below"
it...
  -
  -.set BO_IF_NOT,4
  -.set BO_IF,12
  -.set BO_dCTR_NZERO,16
  -.set BO_dCTR_ZERO,18
  -.set BO_ALWAYS,20
  -.set CR0_LT,0;
  -.set CR0_GT,1;
  -.set CR0_EQ,2
  -.set CR1_FX,4;
  -.set CR1_FEX,5;
  -.set CR1_VX,6
  -.set LR,8
  +#.set r0,0	# we use it as storage for value of 0
  +#.set SP,1	# preserved
  +#.set RTOC,2	# preserved 
  +#.set r3,3	# 1st argument/return value
  +#.set r4,4	# 2nd argument/volatile register
  +#.set r5,5	# 3rd argument/volatile register
  +#.set r6,6	# ...
  +#.set r7,7
  +#.set r8,8
  +#.set r9,9
  +#.set r10,10
  +#.set r11,11
  +#.set r12,12
  +#.set r13,13	# not used, nor any other "below"
it...
   
   #	Declare function names to be global
   #	NOTE:	For gcc these names MUST be changed to remove
   -478,7 +389,7 
   
   	$ST		r9,`6*$BNSZ`(r3)	#r[6]=c1
   	$ST		r10,`7*$BNSZ`(r3)	#r[7]=c2
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -903,7 +814,7 
   	$ST		r9, `15*$BNSZ`(r3)	#r[15]=c1;
   
   
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   
   	.long	0x00000000
   
   -1055,7 +966,7 
   
   	$ST	r10,`6*$BNSZ`(r3)	#r[6]=c1
   	$ST	r11,`7*$BNSZ`(r3)	#r[7]=c2
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -1591,7 +1502,7 
   	adde	r10,r10,r9
   	$ST	r12,`14*$BNSZ`(r3)	#r[14]=c3;
   	$ST	r10,`15*$BNSZ`(r3)	#r[15]=c1;
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -1623,7 +1534,7 
   	subfc.	r7,r0,r6        # If r6 is 0 then result is 0.
   				# if r6 > 0 then result !=0
   				# In either case carry bit is set.
  -	bc	BO_IF,CR0_EQ,Lppcasm_sub_adios
  +	beq	Lppcasm_sub_adios
   	addi	r4,r4,-$BNSZ
   	addi	r3,r3,-$BNSZ
   	addi	r5,r5,-$BNSZ
   -1635,11 +1546,11 
   				# if carry = 1 this is r7-r8. Else it
   				# is r7-r8 -1 as we need.
   	$STU	r6,$BNSZ(r3)
  -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
  +	bdnz-	Lppcasm_sub_mainloop
   Lppcasm_sub_adios:	
   	subfze	r3,r0		# if carry bit is set then r3 = 0 else -1
   	andi.	r3,r3,1         # keep only last bit.
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   
   -1670,7 +1581,7 
   #	check for r6 = 0. Is this needed?
   #
   	addic.	r6,r6,0		#test r6 and clear carry bit.
  -	bc	BO_IF,CR0_EQ,Lppcasm_add_adios
  +	beq	Lppcasm_add_adios
   	addi	r4,r4,-$BNSZ
   	addi	r3,r3,-$BNSZ
   	addi	r5,r5,-$BNSZ
   -1680,10 +1591,10 
   	$LDU	r8,$BNSZ(r5)
   	adde	r8,r7,r8
   	$STU	r8,$BNSZ(r3)
  -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
  +	bdnz-	Lppcasm_add_mainloop
   Lppcasm_add_adios:	
   	addze	r3,r0			#return carry bit.
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -1707,24 +1618,24 
   #	r5 = d
   	
   	$UCMPI	0,r5,0			# compare r5 and 0
  -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div1	# proceed if d!=0
  +	bne	Lppcasm_div1		# proceed if d!=0
   	li	r3,-1			# d=0 return -1
  -	bclr	BO_ALWAYS,CR0_LT	
  +	blr
   Lppcasm_div1:
   	xor	r0,r0,r0		#r0=0
   	li	r8,$BITS
   	$CNTLZ.	r7,r5			#r7 = num leading 0s in d.
  -	bc	BO_IF,CR0_EQ,Lppcasm_div2	#proceed if no leading
zeros
  +	beq	Lppcasm_div2		#proceed if no leading zeros
   	subf	r8,r7,r8		#r8 = BN_num_bits_word(d)
   	$SHR.	r9,r3,r8		#are there any bits above r8'th?
   	$TR	16,r9,r0		#if there're, signal to dump core...
   Lppcasm_div2:
   	$UCMP	0,r3,r5			#h>=d?
  -	bc	BO_IF,CR0_LT,Lppcasm_div3	#goto Lppcasm_div3 if not
  +	blt	Lppcasm_div3		#goto Lppcasm_div3 if not
   	subf	r3,r5,r3		#h-=d ; 
   Lppcasm_div3:				#r7 = BN_BITS2-i. so r7=i
   	cmpi	0,0,r7,0		# is (i == 0)?
  -	bc	BO_IF,CR0_EQ,Lppcasm_div4
  +	beq	Lppcasm_div4
   	$SHL	r3,r3,r7		# h = (h<< i)
   	$SHR	r8,r4,r8		# r8 = (l >> BN_BITS2 -i)
   	$SHL	r5,r5,r7		# d<<=i
   -1741,7 +1652,7 
   	$SHRI	r11,r4,`$BITS/2`	#r11=
(l&BN_MASK2h)>>BN_BITS4
   					# compute here for innerloop.
   	$UCMP	0,r8,r9			# is (h>>BN_BITS4)==dh
  -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div5	# goto Lppcasm_div5 if
not
  +	bne	Lppcasm_div5		# goto Lppcasm_div5 if not
   
   	li	r8,-1
   	$CLRU	r8,r8,`$BITS/2`		#q = BN_MASK2l 
   -1762,9 +1673,9 
   					# the following 2 instructions do that
   	$SHLI	r7,r10,`$BITS/2`	# r7 = (t<<BN_BITS4)
   	or	r7,r7,r11		# r7|=((l&BN_MASK2h)>>BN_BITS4)
  -	$UCMP	1,r6,r7			# compare (tl <= r7)
  -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
  -	bc	BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
  +	$UCMP	cr1,r6,r7		# compare (tl <= r7)
  +	bne	Lppcasm_divinnerexit
  +	ble	cr1,Lppcasm_divinnerexit
   	addi	r8,r8,-1		#q--
   	subf	r12,r9,r12		#th -=dh
   	$CLRU	r10,r5,`$BITS/2`	#r10=dl. t is no longer needed in
loop.
   -1773,14 +1684,14 
   Lppcasm_divinnerexit:
   	$SHRI	r10,r6,`$BITS/2`	#t=(tl>>BN_BITS4)
  
	$SHLI	r11,r6,`$BITS/2`	#tl=(tl<<BN_BITS4)&BN_MASK
2h;
  -	$UCMP	1,r4,r11		# compare l and tl
  +	$UCMP	cr1,r4,r11		# compare l and tl
   	add	r12,r12,r10		# th+=t
  -	bc	BO_IF_NOT,CR1_FX,Lppcasm_div7  # if (l>=tl) goto
Lppcasm_div7
  +	bge	cr1,Lppcasm_div7	# if (l>=tl) goto Lppcasm_div7
   	addi	r12,r12,1		# th++
   Lppcasm_div7:
   	subf	r11,r11,r4		#r11=l-tl
  -	$UCMP	1,r3,r12		#compare h and th
  -	bc	BO_IF_NOT,CR1_FX,Lppcasm_div8	#if (h>=th) goto
Lppcasm_div8
  +	$UCMP	cr1,r3,r12		#compare h and th
  +	bge	cr1,Lppcasm_div8	#if (h>=th) goto Lppcasm_div8
   	addi	r8,r8,-1		# q--
   	add	r3,r5,r3		# h+=d
   Lppcasm_div8:
   -1791,12 +1702,12 
   					# the following 2 instructions will do this.
   	$INSR	r11,r12,`$BITS/2`,`$BITS/2`	# r11 is the value we
want rotated $BITS/2.
   	$ROTL	r3,r11,`$BITS/2`	# rotate by $BITS/2 and store in
r3
  -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break
;
  +	bdz	Lppcasm_div9		#if (count==0) break ;
   	$SHLI	r0,r8,`$BITS/2`		#ret =q<<BN_BITS4
   	b	Lppcasm_divouterloop
   Lppcasm_div9:
   	or	r3,r8,r0
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -1822,7 +1733,7 
   #	No unrolling done here. Not performance critical.
   
   	addic.	r5,r5,0			#test r5.
  -	bc	BO_IF,CR0_EQ,Lppcasm_sqr_adios
  +	beq	Lppcasm_sqr_adios
   	addi	r4,r4,-$BNSZ
   	addi	r3,r3,-$BNSZ
   	mtctr	r5
   -1833,9 +1744,9 
   	$UMULH  r8,r6,r6
   	$STU	r7,$BNSZ(r3)
   	$STU	r8,$BNSZ(r3)
  -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
  +	bdnz-	Lppcasm_sqr_mainloop
   Lppcasm_sqr_adios:	
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   
   -1858,7 +1769,7 
   	xor	r0,r0,r0
   	xor	r12,r12,r12		# used for carry
   	rlwinm.	r7,r5,30,2,31		# num >> 2
  -	bc	BO_IF,CR0_EQ,Lppcasm_mw_REM
  +	beq	Lppcasm_mw_REM
   	mtctr	r7
   Lppcasm_mw_LOOP:	
   					#mul(rp[0],ap[0],w,c1);
   -1896,11 +1807,11 
   	
   	addi	r3,r3,`4*$BNSZ`
   	addi	r4,r4,`4*$BNSZ`
  -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
  +	bdnz-	Lppcasm_mw_LOOP
   
   Lppcasm_mw_REM:
   	andi.	r5,r5,0x3
  -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
  +	beq	Lppcasm_mw_OVER
   					#mul(rp[0],ap[0],w,c1);
   	$LD	r8,`0*$BNSZ`(r4)
   	$UMULL	r9,r6,r8
   -1912,7 +1823,7 
   	
   	addi	r5,r5,-1
   	cmpli	0,0,r5,0
  -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
  +	beq	Lppcasm_mw_OVER
   
   	
   					#mul(rp[1],ap[1],w,c1);
   -1926,7 +1837,7 
   	
   	addi	r5,r5,-1
   	cmpli	0,0,r5,0
  -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
  +	beq	Lppcasm_mw_OVER
   	
   					#mul_add(rp[2],ap[2],w,c1);
   	$LD	r8,`2*$BNSZ`(r4)
   -1939,7 +1850,7 
   		
   Lppcasm_mw_OVER:	
   	addi	r3,r12,0
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   
   #
   -1964,7 +1875,7 
   	xor	r0,r0,r0		#r0 = 0
   	xor	r12,r12,r12  		#r12 = 0 . used for carry		
   	rlwinm.	r7,r5,30,2,31		# num >> 2
  -	bc	BO_IF,CR0_EQ,Lppcasm_maw_leftover	# if (num < 4)
go LPPCASM_maw_leftover
  +	beq	Lppcasm_maw_leftover	# if (num < 4) go
LPPCASM_maw_leftover
   	mtctr	r7
   Lppcasm_maw_mainloop:	
   					#mul_add(rp[0],ap[0],w,c1);
   -2017,11 +1928,11 
   	$ST	r11,`3*$BNSZ`(r3)
   	addi	r3,r3,`4*$BNSZ`
   	addi	r4,r4,`4*$BNSZ`
  -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
  +	bdnz-	Lppcasm_maw_mainloop
   	
   Lppcasm_maw_leftover:
   	andi.	r5,r5,0x3
  -	bc	BO_IF,CR0_EQ,Lppcasm_maw_adios
  +	beq	Lppcasm_maw_adios
   	addi	r3,r3,-$BNSZ
   	addi	r4,r4,-$BNSZ
   					#mul_add(rp[0],ap[0],w,c1);
   -2036,7 +1947,7 
   	addze	r12,r10
   	$ST	r9,0(r3)
   	
  -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
  +	bdz	Lppcasm_maw_adios
   					#mul_add(rp[1],ap[1],w,c1);
   	$LDU	r8,$BNSZ(r4)	
   	$UMULL	r9,r6,r8
   -2048,7 +1959,7 
   	addze	r12,r10
   	$ST	r9,0(r3)
   	
  -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
  +	bdz	Lppcasm_maw_adios
   					#mul_add(rp[2],ap[2],w,c1);
   	$LDU	r8,$BNSZ(r4)
   	$UMULL	r9,r6,r8
   -2062,17 +1973,10 
   		
   Lppcasm_maw_adios:	
   	addi	r3,r12,0
  -	bclr	BO_ALWAYS,CR0_LT
  +	blr
   	.long	0x00000000
   	.align	4
   EOF
  -	$data =~ s/`([^`]*)`/eval $1/gem;
  -
  -	# if some assembler chokes on some simplified mnemonic,
  -	# this is the spot to fix it up, e.g.:
  -	# GNU as doesn't seem to accept cmplw, 32-bit unsigned
compare
  -	$data =~
s/^(s*)cmplw(s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
  -	# assembler X doesn't accept li, load immediate value
  -	#$data =~
s/^(s*)li(s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
  -	return($data);
  -}
  +$data =~ s/`([^`]*)`/eval $1/gem;
  +print $data;
  +close STDOUT;
   .
____________________________________________________________
__________
OpenSSL Project                                 http://www.openssl.org
CVS Repository Commit List                    
openssl-cvsopenssl.org
Automated List Manager                          
majordomoopenssl.org
[1]

about | contact  Other archives ( Real Estate discussion Medical topics )