List Info

Thread: OpenSSL: openssl/crypto/sha/asm/ sha512-ppc.pl




OpenSSL: openssl/crypto/sha/asm/ sha512-ppc.pl
user name
2006-11-27 13:11:15
  OpenSSL CVS Repository
  http://cvs.openssl.org/
 
____________________________________________________________
________________

  Server: cvs.openssl.org                  Name:   Andy
Polyakov
  Root:   /v/openssl/cvs                   Email:  approopenssl.org
  Module: openssl                          Date:  
27-Nov-2006 14:11:15
  Branch: HEAD                             Handle:
2006112713111500

  Modified files:
    openssl/crypto/sha/asm  sha512-ppc.pl

  Log:
    sha512-ppc.pl mutli-thread safety fix.

  Summary:
    Revision    Changes     Path
    1.3         +55 -31    
openssl/crypto/sha/asm/sha512-ppc.pl
 
____________________________________________________________
________________

  patch -p0 <<' .'
  Index: openssl/crypto/sha/asm/sha512-ppc.pl
 
============================================================
================
  $ cvs diff -u -r1.2 -r1.3 sha512-ppc.pl
  --- openssl/crypto/sha/asm/sha512-ppc.pl	17 Oct 2006
16:13:18 -0000	1.2
  +++ openssl/crypto/sha/asm/sha512-ppc.pl	27 Nov 2006
13:11:15 -0000	1.3
   -2,8 +2,9 
   
   #
============================================================
========
   # Written by Andy Polyakov <approfy.chalmers.se> for the OpenSSL
  -# project. Rights for redistribution and usage in source
and binary
  -# forms are granted according to the OpenSSL license.
  +# project. The module is, however, dual licensed under
OpenSSL and
  +# CRYPTOGAMS licenses depending on where you obtain it.
For further
  +# details see http://www
.openssl.org/~appro/cryptogams/.
   #
============================================================
========
   
   # I let hardware handle unaligned input, except on page
boundaries
   -17,7 +18,21 
   # PPC970,gcc-4.0.0	+50%	+38%	|	+40%	+410%(*)
   #
   # (*)	64-bit code in 32-bit application context, which
actually is
  -#	on TODO list
  +#	on TODO list. It should be noted that for safe
deployment in
  +#	32-bit *mutli-threaded* context asyncronous signals
should be
  +#	blocked upon entry to SHA512 block routine. This is
because
  +#	32-bit signaling procedure invalidates upper halves of
GPRs.
  +#	Context switch procedure preserves them, but not
signaling:-(
  +
  +# Second version is true multi-thread safe. Trouble with
the original
  +# version was that it was using thread local storage
pointer register.
  +# Well, it scrupulously preserved it, but the problem
would arise the
  +# moment asynchronous signal was delivered and signal
handler would
  +# dereference the TLS pointer. While it's never the case
in openssl
  +# application or test suite, we have to respect this
scenario and not
  +# use TLS pointer register. Alternative would be to
require caller to
  +# block signals prior calling this routine. For the
record, in 32-bit
  +# context R2 serves as TLS pointer, while in 64-bit
context - R13.
   
   $output=shift;
   
   -69,24 +84,24 
   $FRAME=32*$SIZE_T;
   
   $sp ="r1";
  -$toc="r2";	# zapped by $Tbl
  +$toc="r2";
   $ctx="r3";	# zapped by $a0
  -$inp="r4";
  -$num="r5";	# zapped by $a1
  +$inp="r4";	# zapped by $a1
  +$num="r5";	# zapped by $t0
   
   $T  ="r0";
  -$Tbl="r2";
   $a0 ="r3";
  -$a1 ="r5";
  -$t0 ="r6";
  -$t1 ="r7";
  +$a1 ="r4";
  +$t0 ="r5";
  +$t1 ="r6";
  +$Tbl="r7";
   
   $A  ="r8";
   $B  ="r9";
   $C  ="r10";
   $D  ="r11";
   $E  ="r12";
  -$F  ="r13";
  +$F  ="r13";	$F="r2" if ($SIZE_T==8);#
reassigned to exempt TLS pointer
   $G  ="r14";
   $H  ="r15";
   
   -94,6 +109,8 
   X=("r16","r17","r18",&q
uot;r19","r20","r21","r22"
;,"r23",
      
"r24","r25","r26","r27&qu
ot;,"r28","r29","r30","r3
1");
   
  +$inp="r31";	# reassigned $inp! aliases with
X[15]
  +
   sub ROUND_00_15 {
   my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=_;
   $code.=<<___;
   -184,6 +201,7 
   	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
   
   	$LD	$A,`0*$SZ`($ctx)
  +	mr	$inp,r4				; incarnate $inp
   	$LD	$B,`1*$SZ`($ctx)
   	$LD	$C,`2*$SZ`($ctx)
   	$LD	$D,`3*$SZ`($ctx)
   -197,8 +215,9 
   	andi.	r0,$inp,3
   	bne	Lunaligned
   Laligned:
  -	add	$t0,$inp,$num
  -	$PUSH	$t0,`$FRAME-$SIZE_T*23`($sp)	; end pointer
  +	add	$num,$inp,$num
  +	$PUSH	$num,`$FRAME-$SIZE_T*24`($sp)	; end pointer
  +	$PUSH	$inp,`$FRAME-$SIZE_T*23`($sp)	; inp pointer
   	bl	Lsha2_block_private
   Ldone:
   	$POP	r0,`$FRAME-$SIZE_T*21`($sp)
   -242,15 +261,17 
   	$UCMP	$num,$t1
   	ble-	Laligned		; didn't cross the page boundary
   	subfc	$num,$t1,$num
  -	add	$t0,$inp,$t1
  -	$PUSH	$num,`$FRAME-$SIZE_T*24`($sp)
  -	$PUSH	$t0,`$FRAME-$SIZE_T*23`($sp)	; end pointer
  +	add	$t1,$inp,$t1
  +	$PUSH	$num,`$FRAME-$SIZE_T*25`($sp)	; save real
remaining num
  +	$PUSH	$t1,`$FRAME-$SIZE_T*24`($sp)	; intermediate end
pointer
  +	$PUSH	$inp,`$FRAME-$SIZE_T*23`($sp)	; inp pointer
   	bl	Lsha2_block_private
  -	$POP	$num,`$FRAME-$SIZE_T*24`($sp)
  +	; $inp equals to the intermediate end pointer here
  +	$POP	$num,`$FRAME-$SIZE_T*25`($sp)	; restore real
remaining num
   Lcross_page:
   	li	$t1,`16*$SZ/4`
   	mtctr	$t1
  -	addi	r20,$sp,$FRAME	; spot below the frame
  +	addi	r20,$sp,$FRAME			; aligned spot below the frame
   Lmemcpy:
   	lbz	r16,0($inp)
   	lbz	r17,1($inp)
   -264,15 +285,16 
   	addi	r20,r20,4
   	bdnz	Lmemcpy
   
  -	$PUSH	$inp,`$FRAME-$SIZE_T*25`($sp)
  -	addi	$inp,$sp,$FRAME
  -	addi	$t0,$sp,`$FRAME+16*$SZ`
  -	$PUSH	$num,`$FRAME-$SIZE_T*24`($sp)
  -	$PUSH	$t0,`$FRAME-$SIZE_T*23`($sp)	; end pointer
  +	$PUSH	$inp,`$FRAME-$SIZE_T*26`($sp)	; save real inp
  +	addi	$t1,$sp,`$FRAME+16*$SZ`		; fictitious end pointer
  +	addi	$inp,$sp,$FRAME			; fictitious inp pointer
  +	$PUSH	$num,`$FRAME-$SIZE_T*25`($sp)	; save real num
  +	$PUSH	$t1,`$FRAME-$SIZE_T*24`($sp)	; end pointer
  +	$PUSH	$inp,`$FRAME-$SIZE_T*23`($sp)	; inp pointer
   	bl	Lsha2_block_private
  -	$POP	$inp,`$FRAME-$SIZE_T*25`($sp)
  -	$POP	$num,`$FRAME-$SIZE_T*24`($sp)
  -	addic.	$num,$num,`-16*$SZ`
  +	$POP	$inp,`$FRAME-$SIZE_T*26`($sp)	; restore real inp
  +	$POP	$num,`$FRAME-$SIZE_T*25`($sp)	; restore real num
  +	addic.	$num,$num,`-16*$SZ`		; num--
   	bne-	Lunaligned
   	b	Ldone
   ___
   -309,9 +331,10 
   $code.=<<___;
   	bdnz-	Lrounds
   
  -	subi	$Tbl,$Tbl,`($rounds-16)*$SZ`
   	$POP	$ctx,`$FRAME-$SIZE_T*22`($sp)
  -	$POP	$num,`$FRAME-$SIZE_T*23`($sp)	; end pointer
  +	$POP	$inp,`$FRAME-$SIZE_T*23`($sp)	; inp pointer
  +	$POP	$num,`$FRAME-$SIZE_T*24`($sp)	; end pointer
  +	subi	$Tbl,$Tbl,`($rounds-16)*$SZ`	; rewind Tbl
   
   	$LD	r16,`0*$SZ`($ctx)
   	$LD	r17,`1*$SZ`($ctx)
   -320,9 +343,11 
   	$LD	r20,`4*$SZ`($ctx)
   	$LD	r21,`5*$SZ`($ctx)
   	$LD	r22,`6*$SZ`($ctx)
  +	addi	$inp,$inp,`16*$SZ`		; advance inp
   	$LD	r23,`7*$SZ`($ctx)
   	add	$A,$A,r16
   	add	$B,$B,r17
  +	$PUSH	$inp,`$FRAME-$SIZE_T*23`($sp)
   	add	$C,$C,r18
   	$ST	$A,`0*$SZ`($ctx)
   	add	$D,$D,r19
   -335,7 +360,6 
   	$ST	$E,`4*$SZ`($ctx)
   	add	$H,$H,r23
   	$ST	$F,`5*$SZ`($ctx)
  -	addi	$inp,$inp,`16*$SZ`
   	$ST	$G,`6*$SZ`($ctx)
   	$UCMP	$inp,$num
   	$ST	$H,`7*$SZ`($ctx)
   -349,21 +373,21 
   .align	6
   LPICmeup:
   	bl	LPIC
  +	addi	$Tbl,$Tbl,`64-4`	; "distance" between .
and last nop
   	b	LPICedup
   	nop
   	nop
   	nop
   	nop
   	nop
  -	nop
   LPIC:	mflr	$Tbl
  -	addi	$Tbl,$Tbl,`64-4`	; "distance" between bl
and last nop
   	blr
   	nop
   	nop
   	nop
   	nop
   	nop
  +	nop
   ___
   $code.=<<___ if ($SZ==8);
   	.long	0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
   .
____________________________________________________________
__________
OpenSSL Project                                 http://www.openssl.org
CVS Repository Commit List                    
openssl-cvsopenssl.org
Automated List Manager                          
majordomoopenssl.org
[1]

about | contact  Other archives ( Real Estate discussion Medical topics )