List Info

Thread: OpenSSL: openssl/crypto/sha/asm/ sha256-586.pl sha512-586.pl




OpenSSL: openssl/crypto/sha/asm/ sha256-586.pl sha512-586.pl
country flaguser name
Germany
2007-09-16 13:47:25
  OpenSSL CVS Repository
  http://cvs.openssl.org/
 
____________________________________________________________
________________

  Server: cvs.openssl.org                  Name:   Andy
Polyakov
  Root:   /v/openssl/cvs                   Email:  approopenssl.org
  Module: openssl                          Date:  
16-Sep-2007 20:47:24
  Branch: HEAD                             Handle:
2007091619472400

  Modified files:
    openssl/crypto/sha/asm  sha256-586.pl sha512-586.pl

  Log:
    Minor sha[256|512]-586 performance tweaks.

  Summary:
    Revision    Changes     Path
    1.2         +12 -7     
openssl/crypto/sha/asm/sha256-586.pl
    1.3         +24 -21    
openssl/crypto/sha/asm/sha512-586.pl
 
____________________________________________________________
________________

  patch -p0 <<' .'
  Index: openssl/crypto/sha/asm/sha256-586.pl
 
============================================================
================
  $ cvs diff -u -r1.1 -r1.2 sha256-586.pl
  --- openssl/crypto/sha/asm/sha256-586.pl	13 Sep 2007
07:26:18 -0000	1.1
  +++ openssl/crypto/sha/asm/sha256-586.pl	16 Sep 2007
18:47:24 -0000	1.2
   -45,13 +45,17 
   $K256="ebp";
   
   sub BODY_00_15() {
  +    my $in_16_64=shift;
  +
   	&mov	("ecx",$E);
  +	
&add	($T,&DWP(4*(8+15+16-9),"esp"))	if
($in_16_64);	# T += X[-7]
   	&ror	("ecx",6);
   	&mov	("edi",$E);
   	&ror	("edi",11);
   	 &mov	("esi",$Foff);
   	&xor	("ecx","edi");
   	&ror	("edi",25-11);
  +	 &mov	(&DWP(4*(8+15),"esp"),$T)	if
($in_16_64);	# save X[0]
   	&xor	("ecx","edi");	# Sigma1(e)
   	 &mov	("edi",$Goff);
   	&add	($T,"ecx");	# T += Sigma1(e)
   -88,6 +92,7 
   
   	&add	($K256,4);
   	&add	($A,$T);	# h += T
  +	
&mov	($T,&DWP(4*(8+15+16-1),"esp"))	if
($in_16_64);	# preload T
   	&add	($E,"esi");	# d += K256[i]
   	&add	($A,"esi");	# h += K256[i]
   }
   -159,10 +164,10 
   	&cmp	("esi",0xc19bf174);
   	&jne	(&label("00_15"));
   
  +	&mov	($T,&DWP(4*(8+15+16-1),"esp"));	#
preloaded in BODY_00_15(1)
   &set_label("16_63",16);
  -	&mov	($T,&DWP(4*(8+15+16-1),"esp"));
  -	
&mov	("ecx",&DWP(4*(8+15+16-14),"esp&
quot;));
   	&mov	("esi",$T);
  +	
&mov	("ecx",&DWP(4*(8+15+16-14),"esp&
quot;));
   	&shr	($T,3);
   	&ror	("esi",7);
   	&xor	($T,"esi");
   -176,13 +181,13 
   	&xor	("ecx","edi");
   	&ror	("edi",19-17);
   	 &add	($T,"esi");			# T += X[-16]
  -	&xor	("ecx","edi")			#
sigma1(X[-2])
  +	&xor	("edi","ecx")			#
sigma1(X[-2])
   
  -	&add	($T,"ecx");			# T += sigma1(X[-2])
  -	&add	($T,&DWP(4*(8+15+16-9),"esp"));	#
T += X[-7]
  -	&mov	(&DWP(4*(8+15),"esp"),$T);	# save
X[0]
  +	&add	($T,"edi");			# T += sigma1(X[-2])
  +	#
&add	($T,&DWP(4*(8+15+16-9),"esp"));	# T
+= X[-7], moved to BODY_00_15(1)
  +	# &mov	(&DWP(4*(8+15),"esp"),$T);	#
save X[0]
   
  -	&BODY_00_15();
  +	&BODY_00_15(1);
   
   	&cmp	("esi",0xc67178f2);
   	&jne	(&label("16_63"));
   .
  patch -p0 <<' .'
  Index: openssl/crypto/sha/asm/sha512-586.pl
 
============================================================
================
  $ cvs diff -u -r1.2 -r1.3 sha512-586.pl
  --- openssl/crypto/sha/asm/sha512-586.pl	14 Sep 2007
21:06:14 -0000	1.2
  +++ openssl/crypto/sha/asm/sha512-586.pl	16 Sep 2007
18:47:24 -0000	1.3
   -68,6 +68,8 
   		# mm5-mm7, but it's done on on-demand basis...
   
   sub BODY_00_15_sse2 {
  +    my $prefetch=shift;
  +
   	&movq	("mm5",$Fsse2);			# load f
   	&movq	("mm6",$Gsse2);			# load g
   	&movq	("mm7",$Hsse2);			# load h
   -96,7 +98,7 
   	&pxor	("mm5","mm6");			# f^=g
   	&movq	($E,$Dsse2);			# e = load d
   	&paddq	("mm3","mm5");			#
T1+=Ch(e,f,g)
  -
  +	&movq	(&QWP(0,"esp"),$A);		#
modulo-scheduled save a
   	&paddq	("mm3","mm7");			# T1+=h
   
   	&movq	("mm5",$A);			# %mm5 is sliding
right
   -114,15 +116,16 
   	&pxor	("mm7","mm6");
   	&psllq	("mm6",6);
   	&pxor	("mm7","mm5");
  -	&movq	(&QWP(0,"esp"),$A);		#
modulo-scheduled save a
  +	&sub	("esp",8);
   	&pxor	("mm7","mm6");			#
T2=Sigma0_512(a)
   
   	&movq	("mm5",$A);			# %mm5=a
   	&por	($A,"mm2");			# a=a|c
 
+	&movq	("mm6",&QWP(8*(9+16-14),"esp&
quot;))	if ($prefetch);
   	&pand	("mm5","mm2");			#
%mm5=a&c
   	&pand	($A,"mm1");			# a=(a|c)&b
 
+	&movq	("mm2",&QWP(8*(9+16-1),"esp&q
uot;))	if ($prefetch);
   	&por	("mm5",$A);			#
%mm5=(a&c)|((a|c)&b)
  -	&sub	("esp",8);
   	&paddq	("mm7","mm5");			#
T2+=Maj(a,b,c)
   	&movq	($A,"mm3");			# a=T1
   
   -327,48 +330,48 
   	&cmp	(&LB("edx"),0x35);
   	&jne	(&label("00_14_sse2"));
   
  -	&BODY_00_15_sse2();
  +	&BODY_00_15_sse2(1);
   
   &set_label("16_79_sse2",16);
 
-	&movq	("mm3",&QWP(8*(9+16-1),"esp&q
uot;));
 
-	&movq	("mm6",&QWP(8*(9+16-14),"esp&
quot;));
  -	&movq	("mm1","mm3");
 
+	#&movq	("mm2",&QWP(8*(9+16-1),"esp&
quot;));	#prefetched in BODY_00_15 
 
+	#&movq	("mm6",&QWP(8*(9+16-14),"esp
"));
  +	&movq	("mm1","mm2");
   
  -	&psrlq	("mm3",1);
  +	&psrlq	("mm2",1);
   	&movq	("mm7","mm6");
   	&psrlq	("mm6",6);
  -	&movq	("mm2","mm3");
  +	&movq	("mm3","mm2");
   
  -	&psrlq	("mm3",7-1);
  +	&psrlq	("mm2",7-1);
   	&movq	("mm5","mm6");
   	&psrlq	("mm6",19-6);
  -	&pxor	("mm2","mm3");
  +	&pxor	("mm3","mm2");
   
  -	&psrlq	("mm3",8-7);
  +	&psrlq	("mm2",8-7);
   	&pxor	("mm5","mm6");
   	&psrlq	("mm6",61-19);
  -	&pxor	("mm2","mm3");
  +	&pxor	("mm3","mm2");
   
 
-	&movq	("mm3",&QWP(8*(9+16),"esp&quo
t;));
 
+	&movq	("mm2",&QWP(8*(9+16),"esp&quo
t;));
   
   	&psllq	("mm1",56);
   	&pxor	("mm5","mm6");
   	&psllq	("mm7",3);
  -	&pxor	("mm2","mm1");
  +	&pxor	("mm3","mm1");
   
 
-	&paddq	("mm3",&QWP(8*(9+16-9),"esp&
quot;));
 
+	&paddq	("mm2",&QWP(8*(9+16-9),"esp&
quot;));
   
   	&psllq	("mm1",63-56);
   	&pxor	("mm5","mm7");
   	&psllq	("mm7",45-3);
  -	&pxor	("mm2","mm1");
  +	&pxor	("mm3","mm1");
   	&pxor	("mm5","mm7");
   
  -	&paddq	("mm2","mm5");
  -	&paddq	("mm2","mm3");
 
-	&movq	(&QWP(8*9,"esp"),"mm2");

  +	&paddq	("mm3","mm5");
  +	&paddq	("mm3","mm2");
 
+	&movq	(&QWP(8*9,"esp"),"mm3");

   
  -	&BODY_00_15_sse2();
  +	&BODY_00_15_sse2(1);
   
   	&cmp	(&LB("edx"),0x17);
   	&jne	(&label("16_79_sse2"));
   .
____________________________________________________________
__________
OpenSSL Project                                 http://www.openssl.org
CVS Repository Commit List                    
openssl-cvsopenssl.org
Automated List Manager                          
majordomoopenssl.org

[1]

about | contact  Other archives ( Real Estate discussion Medical topics )