OpenSSL CVS Repository
http://cvs.openssl.org/
____________________________________________________________
________________
Server: cvs.openssl.org Name: Andy
Polyakov
Root: /v/openssl/cvs Email: appro openssl.org
Module: openssl Date:
27-Nov-2006 14:11:15
Branch: HEAD Handle:
2006112713111500
Modified files:
openssl/crypto/sha/asm sha512-ppc.pl
Log:
sha512-ppc.pl mutli-thread safety fix.
Summary:
Revision Changes Path
1.3 +55 -31
openssl/crypto/sha/asm/sha512-ppc.pl
____________________________________________________________
________________
patch -p0 <<' .'
Index: openssl/crypto/sha/asm/sha512-ppc.pl
============================================================
================
$ cvs diff -u -r1.2 -r1.3 sha512-ppc.pl
--- openssl/crypto/sha/asm/sha512-ppc.pl 17 Oct 2006
16:13:18 -0000 1.2
+++ openssl/crypto/sha/asm/sha512-ppc.pl 27 Nov 2006
13:11:15 -0000 1.3
 -2,8 +2,9 
#
============================================================
========
# Written by Andy Polyakov <appro fy.chalmers.se> for the OpenSSL
-# project. Rights for redistribution and usage in source
and binary
-# forms are granted according to the OpenSSL license.
+# project. The module is, however, dual licensed under
OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it.
For further
+# details see http://www
.openssl.org/~appro/cryptogams/.
#
============================================================
========
# I let hardware handle unaligned input, except on page
boundaries
 -17,7 +18,21 
# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*)
#
# (*) 64-bit code in 32-bit application context, which
actually is
-# on TODO list
+# on TODO list. It should be noted that for safe
deployment in
+# 32-bit *mutli-threaded* context asyncronous signals
should be
+# blocked upon entry to SHA512 block routine. This is
because
+# 32-bit signaling procedure invalidates upper halves of
GPRs.
+# Context switch procedure preserves them, but not
signaling:-(
+
+# Second version is true multi-thread safe. Trouble with
the original
+# version was that it was using thread local storage
pointer register.
+# Well, it scrupulously preserved it, but the problem
would arise the
+# moment asynchronous signal was delivered and signal
handler would
+# dereference the TLS pointer. While it's never the case
in openssl
+# application or test suite, we have to respect this
scenario and not
+# use TLS pointer register. Alternative would be to
require caller to
+# block signals prior calling this routine. For the
record, in 32-bit
+# context R2 serves as TLS pointer, while in 64-bit
context - R13.
$output=shift;
 -69,24 +84,24 
$FRAME=32*$SIZE_T;
$sp ="r1";
-$toc="r2"; # zapped by $Tbl
+$toc="r2";
$ctx="r3"; # zapped by $a0
-$inp="r4";
-$num="r5"; # zapped by $a1
+$inp="r4"; # zapped by $a1
+$num="r5"; # zapped by $t0
$T ="r0";
-$Tbl="r2";
$a0 ="r3";
-$a1 ="r5";
-$t0 ="r6";
-$t1 ="r7";
+$a1 ="r4";
+$t0 ="r5";
+$t1 ="r6";
+$Tbl="r7";
$A ="r8";
$B ="r9";
$C ="r10";
$D ="r11";
$E ="r12";
-$F ="r13";
+$F ="r13"; $F="r2" if ($SIZE_T==8);#
reassigned to exempt TLS pointer
$G ="r14";
$H ="r15";
 -94,6 +109,8 
X=("r16","r17","r18",&q
uot;r19","r20","r21","r22"
;,"r23",
"r24","r25","r26","r27&qu
ot;,"r28","r29","r30","r3
1");
+$inp="r31"; # reassigned $inp! aliases with
X[15]
+
sub ROUND_00_15 {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)= _;
$code.=<<___;
 -184,6 +201,7 
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$LD $A,`0*$SZ`($ctx)
+ mr $inp,r4 ; incarnate $inp
$LD $B,`1*$SZ`($ctx)
$LD $C,`2*$SZ`($ctx)
$LD $D,`3*$SZ`($ctx)
 -197,8 +215,9 
andi. r0,$inp,3
bne Lunaligned
Laligned:
- add $t0,$inp,$num
- $PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
+ add $num,$inp,$num
+ $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
+ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
bl Lsha2_block_private
Ldone:
$POP r0,`$FRAME-$SIZE_T*21`($sp)
 -242,15 +261,17 
$UCMP $num,$t1
ble- Laligned ; didn't cross the page boundary
subfc $num,$t1,$num
- add $t0,$inp,$t1
- $PUSH $num,`$FRAME-$SIZE_T*24`($sp)
- $PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
+ add $t1,$inp,$t1
+ $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real
remaining num
+ $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; intermediate end
pointer
+ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
bl Lsha2_block_private
- $POP $num,`$FRAME-$SIZE_T*24`($sp)
+ ; $inp equals to the intermediate end pointer here
+ $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real
remaining num
Lcross_page:
li $t1,`16*$SZ/4`
mtctr $t1
- addi r20,$sp,$FRAME ; spot below the frame
+ addi r20,$sp,$FRAME ; aligned spot below the frame
Lmemcpy:
lbz r16,0($inp)
lbz r17,1($inp)
 -264,15 +285,16 
addi r20,r20,4
bdnz Lmemcpy
- $PUSH $inp,`$FRAME-$SIZE_T*25`($sp)
- addi $inp,$sp,$FRAME
- addi $t0,$sp,`$FRAME+16*$SZ`
- $PUSH $num,`$FRAME-$SIZE_T*24`($sp)
- $PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
+ $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
+ addi $t1,$sp,`$FRAME+16*$SZ` ; fictitious end pointer
+ addi $inp,$sp,$FRAME ; fictitious inp pointer
+ $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
+ $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
+ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
bl Lsha2_block_private
- $POP $inp,`$FRAME-$SIZE_T*25`($sp)
- $POP $num,`$FRAME-$SIZE_T*24`($sp)
- addic. $num,$num,`-16*$SZ`
+ $POP $inp,`$FRAME-$SIZE_T*26`($sp) ; restore real inp
+ $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
+ addic. $num,$num,`-16*$SZ` ; num--
bne- Lunaligned
b Ldone
___
 -309,9 +331,10 
$code.=<<___;
bdnz- Lrounds
- subi $Tbl,$Tbl,`($rounds-16)*$SZ`
$POP $ctx,`$FRAME-$SIZE_T*22`($sp)
- $POP $num,`$FRAME-$SIZE_T*23`($sp) ; end pointer
+ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
+ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
+ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
$LD r16,`0*$SZ`($ctx)
$LD r17,`1*$SZ`($ctx)
 -320,9 +343,11 
$LD r20,`4*$SZ`($ctx)
$LD r21,`5*$SZ`($ctx)
$LD r22,`6*$SZ`($ctx)
+ addi $inp,$inp,`16*$SZ` ; advance inp
$LD r23,`7*$SZ`($ctx)
add $A,$A,r16
add $B,$B,r17
+ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
add $C,$C,r18
$ST $A,`0*$SZ`($ctx)
add $D,$D,r19
 -335,7 +360,6 
$ST $E,`4*$SZ`($ctx)
add $H,$H,r23
$ST $F,`5*$SZ`($ctx)
- addi $inp,$inp,`16*$SZ`
$ST $G,`6*$SZ`($ctx)
$UCMP $inp,$num
$ST $H,`7*$SZ`($ctx)
 -349,21 +373,21 
.align 6
LPICmeup:
bl LPIC
+ addi $Tbl,$Tbl,`64-4` ; "distance" between .
and last nop
b LPICedup
nop
nop
nop
nop
nop
- nop
LPIC: mflr $Tbl
- addi $Tbl,$Tbl,`64-4` ; "distance" between bl
and last nop
blr
nop
nop
nop
nop
nop
+ nop
___
$code.=<<___ if ($SZ==8);
.long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
 .
____________________________________________________________
__________
OpenSSL Project http://www.openssl.org
CVS Repository Commit List
openssl-cvs openssl.org
Automated List Manager
majordomo openssl.org
|