OpenSSL CVS Repository
http://cvs.openssl.org/
____________________________________________________________
________________
Server: cvs.openssl.org Name: Andy
Polyakov
Root: /v/openssl/cvs Email: appro openssl.org
Module: openssl Date:
16-Sep-2007 20:47:24
Branch: HEAD Handle:
2007091619472400
Modified files:
openssl/crypto/sha/asm sha256-586.pl sha512-586.pl
Log:
Minor sha[256|512]-586 performance tweaks.
Summary:
Revision Changes Path
1.2 +12 -7
openssl/crypto/sha/asm/sha256-586.pl
1.3 +24 -21
openssl/crypto/sha/asm/sha512-586.pl
____________________________________________________________
________________
patch -p0 <<' .'
Index: openssl/crypto/sha/asm/sha256-586.pl
============================================================
================
$ cvs diff -u -r1.1 -r1.2 sha256-586.pl
--- openssl/crypto/sha/asm/sha256-586.pl 13 Sep 2007
07:26:18 -0000 1.1
+++ openssl/crypto/sha/asm/sha256-586.pl 16 Sep 2007
18:47:24 -0000 1.2
 -45,13 +45,17 
$K256="ebp";
sub BODY_00_15() {
+ my $in_16_64=shift;
+
&mov ("ecx",$E);
+
&add ($T,&DWP(4*(8+15+16-9),"esp")) if
($in_16_64); # T += X[-7]
&ror ("ecx",6);
&mov ("edi",$E);
&ror ("edi",11);
&mov ("esi",$Foff);
&xor ("ecx","edi");
&ror ("edi",25-11);
+ &mov (&DWP(4*(8+15),"esp"),$T) if
($in_16_64); # save X[0]
&xor ("ecx","edi"); # Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
 -88,6 +92,7 
&add ($K256,4);
&add ($A,$T); # h += T
+
&mov ($T,&DWP(4*(8+15+16-1),"esp")) if
($in_16_64); # preload T
&add ($E,"esi"); # d += K256[i]
&add ($A,"esi"); # h += K256[i]
}
 -159,10 +164,10 
&cmp ("esi",0xc19bf174);
&jne (&label("00_15"));
+ &mov ($T,&DWP(4*(8+15+16-1),"esp")); #
preloaded in BODY_00_15(1)
&set_label("16_63",16);
- &mov ($T,&DWP(4*(8+15+16-1),"esp"));
-
&mov ("ecx",&DWP(4*(8+15+16-14),"esp&
quot;));
&mov ("esi",$T);
+
&mov ("ecx",&DWP(4*(8+15+16-14),"esp&
quot;));
&shr ($T,3);
&ror ("esi",7);
&xor ($T,"esi");
 -176,13 +181,13 
&xor ("ecx","edi");
&ror ("edi",19-17);
&add ($T,"esi"); # T += X[-16]
- &xor ("ecx","edi") #
sigma1(X[-2])
+ &xor ("edi","ecx") #
sigma1(X[-2])
- &add ($T,"ecx"); # T += sigma1(X[-2])
- &add ($T,&DWP(4*(8+15+16-9),"esp")); #
T += X[-7]
- &mov (&DWP(4*(8+15),"esp"),$T); # save
X[0]
+ &add ($T,"edi"); # T += sigma1(X[-2])
+ #
&add ($T,&DWP(4*(8+15+16-9),"esp")); # T
+= X[-7], moved to BODY_00_15(1)
+ # &mov (&DWP(4*(8+15),"esp"),$T); #
save X[0]
- &BODY_00_15();
+ &BODY_00_15(1);
&cmp ("esi",0xc67178f2);
&jne (&label("16_63"));
 .
patch -p0 <<' .'
Index: openssl/crypto/sha/asm/sha512-586.pl
============================================================
================
$ cvs diff -u -r1.2 -r1.3 sha512-586.pl
--- openssl/crypto/sha/asm/sha512-586.pl 14 Sep 2007
21:06:14 -0000 1.2
+++ openssl/crypto/sha/asm/sha512-586.pl 16 Sep 2007
18:47:24 -0000 1.3
 -68,6 +68,8 
# mm5-mm7, but it's done on on-demand basis...
sub BODY_00_15_sse2 {
+ my $prefetch=shift;
+
&movq ("mm5",$Fsse2); # load f
&movq ("mm6",$Gsse2); # load g
&movq ("mm7",$Hsse2); # load h
 -96,7 +98,7 
&pxor ("mm5","mm6"); # f^=g
&movq ($E,$Dsse2); # e = load d
&paddq ("mm3","mm5"); #
T1+=Ch(e,f,g)
-
+ &movq (&QWP(0,"esp"),$A); #
modulo-scheduled save a
&paddq ("mm3","mm7"); # T1+=h
&movq ("mm5",$A); # %mm5 is sliding
right
 -114,15 +116,16 
&pxor ("mm7","mm6");
&psllq ("mm6",6);
&pxor ("mm7","mm5");
- &movq (&QWP(0,"esp"),$A); #
modulo-scheduled save a
+ &sub ("esp",8);
&pxor ("mm7","mm6"); #
T2=Sigma0_512(a)
&movq ("mm5",$A); # %mm5=a
&por ($A,"mm2"); # a=a|c
+ &movq ("mm6",&QWP(8*(9+16-14),"esp&
quot;)) if ($prefetch);
&pand ("mm5","mm2"); #
%mm5=a&c
&pand ($A,"mm1"); # a=(a|c)&b
+ &movq ("mm2",&QWP(8*(9+16-1),"esp&q
uot;)) if ($prefetch);
&por ("mm5",$A); #
%mm5=(a&c)|((a|c)&b)
- &sub ("esp",8);
&paddq ("mm7","mm5"); #
T2+=Maj(a,b,c)
&movq ($A,"mm3"); # a=T1
 -327,48 +330,48 
&cmp (&LB("edx"),0x35);
&jne (&label("00_14_sse2"));
- &BODY_00_15_sse2();
+ &BODY_00_15_sse2(1);
&set_label("16_79_sse2",16);
- &movq ("mm3",&QWP(8*(9+16-1),"esp&q
uot;));
- &movq ("mm6",&QWP(8*(9+16-14),"esp&
quot;));
- &movq ("mm1","mm3");
+ #&movq ("mm2",&QWP(8*(9+16-1),"esp&
quot;)); #prefetched in BODY_00_15
+ #&movq ("mm6",&QWP(8*(9+16-14),"esp
"));
+ &movq ("mm1","mm2");
- &psrlq ("mm3",1);
+ &psrlq ("mm2",1);
&movq ("mm7","mm6");
&psrlq ("mm6",6);
- &movq ("mm2","mm3");
+ &movq ("mm3","mm2");
- &psrlq ("mm3",7-1);
+ &psrlq ("mm2",7-1);
&movq ("mm5","mm6");
&psrlq ("mm6",19-6);
- &pxor ("mm2","mm3");
+ &pxor ("mm3","mm2");
- &psrlq ("mm3",8-7);
+ &psrlq ("mm2",8-7);
&pxor ("mm5","mm6");
&psrlq ("mm6",61-19);
- &pxor ("mm2","mm3");
+ &pxor ("mm3","mm2");
- &movq ("mm3",&QWP(8*(9+16),"esp&quo
t;));
+ &movq ("mm2",&QWP(8*(9+16),"esp&quo
t;));
&psllq ("mm1",56);
&pxor ("mm5","mm6");
&psllq ("mm7",3);
- &pxor ("mm2","mm1");
+ &pxor ("mm3","mm1");
- &paddq ("mm3",&QWP(8*(9+16-9),"esp&
quot;));
+ &paddq ("mm2",&QWP(8*(9+16-9),"esp&
quot;));
&psllq ("mm1",63-56);
&pxor ("mm5","mm7");
&psllq ("mm7",45-3);
- &pxor ("mm2","mm1");
+ &pxor ("mm3","mm1");
&pxor ("mm5","mm7");
- &paddq ("mm2","mm5");
- &paddq ("mm2","mm3");
- &movq (&QWP(8*9,"esp"),"mm2");
+ &paddq ("mm3","mm5");
+ &paddq ("mm3","mm2");
+ &movq (&QWP(8*9,"esp"),"mm3");
- &BODY_00_15_sse2();
+ &BODY_00_15_sse2(1);
&cmp (&LB("edx"),0x17);
&jne (&label("16_79_sse2"));
 .
____________________________________________________________
__________
OpenSSL Project http://www.openssl.org
CVS Repository Commit List
openssl-cvs openssl.org
Automated List Manager
majordomo openssl.org
|