List Info

Thread: OpenSSL: openssl/crypto/bn/asm/ sparcv9a-mont.pl




OpenSSL: openssl/crypto/bn/asm/ sparcv9a-mont.pl
user name
2006-11-28 07:20:37
  OpenSSL CVS Repository
  http://cvs.openssl.org/
 
____________________________________________________________
________________

  Server: cvs.openssl.org                  Name:   Andy
Polyakov
  Root:   /v/openssl/cvs                   Email:  approopenssl.org
  Module: openssl                          Date:  
28-Nov-2006 08:20:37
  Branch: HEAD                             Handle:
2006112807203600

  Modified files:
    openssl/crypto/bn/asm   sparcv9a-mont.pl

  Log:
    This is "informational" commit. Its mere
purpose is to expose "modulo
    factor" in inner loops.

  Summary:
    Revision    Changes     Path
    1.8         +190 -12   
openssl/crypto/bn/asm/sparcv9a-mont.pl
 
____________________________________________________________
________________

  patch -p0 <<' .'
  Index: openssl/crypto/bn/asm/sparcv9a-mont.pl
 
============================================================
================
  $ cvs diff -u -r1.7 -r1.8 sparcv9a-mont.pl
  --- openssl/crypto/bn/asm/sparcv9a-mont.pl	19 Dec 2005
09:10:06 -0000	1.7
  +++ openssl/crypto/bn/asm/sparcv9a-mont.pl	28 Nov 2006
07:20:36 -0000	1.8
   -286,19 +286,16 
   	!or	%o7,%o0,%o0		! 64-bit result
   	srlx	%o3,16,%g1		! 34-bit carry
   
  -	ba	.L1st
   	add	$j,8,$j
  -.align	32
  -.L1st:
  -	add	$ap,$j,%o3
  -	add	$np,$j,%o4
  -	ld	[%o3+0],$alo_	! load a[j] as pair of 32-bit words
  +	add	$ap,$j,%o4
  +	add	$np,$j,%o5
  +	ld	[%o4+0],$alo_	! load a[j] as pair of 32-bit words
   	fzeros	$alo
  -	ld	[%o3+4],$ahi_
  +	ld	[%o4+4],$ahi_
   	fzeros	$ahi
  -	ld	[%o4+0],$nlo_	! load n[j] as pair of 32-bit words
  +	ld	[%o5+0],$nlo_	! load n[j] as pair of 32-bit words
   	fzeros	$nlo
  -	ld	[%o4+4],$nhi_
  +	ld	[%o5+4],$nhi_
   	fzeros	$nhi
   
   	fxtod	$alo,$alo
   -350,6 +347,11 
   	std	$nlob,[%sp+$bias+$frame+8]
   	std	$nloc,[%sp+$bias+$frame+16]
   	std	$nlod,[%sp+$bias+$frame+24]
  +
  +	addcc	$j,8,$j
  +	bz,pn	%icc,.L1stskip
  +.align	32,0x1000000
  +.L1st:
   	ldx	[%sp+$bias+$frame+0],%o0
   	ldx	[%sp+$bias+$frame+8],%o1
   	ldx	[%sp+$bias+$frame+16],%o2
   -376,10 +378,102 
   	add	%g1,1,%g1
   
   	stx	%o0,[$tp]		! tp[j-1]=
  +
  +
  +	add	$ap,$j,%o4
  +	add	$np,$j,%o5
  +	ld	[%o4+0],$alo_	! load a[j] as pair of 32-bit words
  +	fzeros	$alo
  +	ld	[%o4+4],$ahi_
  +	fzeros	$ahi
  +	ld	[%o5+0],$nlo_	! load n[j] as pair of 32-bit words
  +	fzeros	$nlo
  +	ld	[%o5+4],$nhi_
  +	fzeros	$nhi
  +
  +	fxtod	$alo,$alo
  +	fxtod	$ahi,$ahi
  +	fxtod	$nlo,$nlo
  +	fxtod	$nhi,$nhi
  +
  +	std	$alo,[$ap_l+$j]		! save smashed ap[j] in double
format
  +		fmuld	$alo,$ba,$aloa
  +	std	$ahi,[$ap_h+$j]
  +		fmuld	$nlo,$na,$nloa
  +	std	$nlo,[$np_l+$j]		! save smashed np[j] in double
format
  +		fmuld	$alo,$bb,$alob
  +	std	$nhi,[$np_h+$j]
  +		fmuld	$nlo,$nb,$nlob
  +		fmuld	$alo,$bc,$aloc
  +	faddd	$aloa,$nloa,$nloa
  +		fmuld	$nlo,$nc,$nloc
  +		fmuld	$alo,$bd,$alod
  +	faddd	$alob,$nlob,$nlob
  +		fmuld	$nlo,$nd,$nlod
  +		fmuld	$ahi,$ba,$ahia
  +	faddd	$aloc,$nloc,$nloc
  +		fmuld	$nhi,$na,$nhia
  +		fmuld	$ahi,$bb,$ahib
  +	faddd	$alod,$nlod,$nlod
  +		fmuld	$nhi,$nb,$nhib
  +		fmuld	$ahi,$bc,$ahic
  +	faddd	$ahia,$nhia,$nhia
  +		fmuld	$nhi,$nc,$nhic
  +		fmuld	$ahi,$bd,$ahid
  +	faddd	$ahib,$nhib,$nhib
  +		fmuld	$nhi,$nd,$nhid
  +
  +	faddd	$dota,$nloa,$nloa
  +	faddd	$dotb,$nlob,$nlob
  +	faddd	$ahic,$nhic,$dota	! $nhic
  +	faddd	$ahid,$nhid,$dotb	! $nhid
  +
  +	faddd	$nloc,$nhia,$nloc
  +	faddd	$nlod,$nhib,$nlod
  +
  +	fdtox	$nloa,$nloa
  +	fdtox	$nlob,$nlob
  +	fdtox	$nloc,$nloc
  +	fdtox	$nlod,$nlod
  +
  +	std	$nloa,[%sp+$bias+$frame+0]
  +	std	$nlob,[%sp+$bias+$frame+8]
  +	std	$nloc,[%sp+$bias+$frame+16]
  +	std	$nlod,[%sp+$bias+$frame+24]
  +
   	addcc	$j,8,$j
   	bnz,pt	%icc,.L1st
   	add	$tp,8,$tp
   
  +.L1stskip:
  +	ldx	[%sp+$bias+$frame+0],%o0
  +	ldx	[%sp+$bias+$frame+8],%o1
  +	ldx	[%sp+$bias+$frame+16],%o2
  +	ldx	[%sp+$bias+$frame+24],%o3
  +
  +	srlx	%o0,16,%o7
  +	add	%o7,%o1,%o1
  +	srlx	%o1,16,%o7
  +	add	%o7,%o2,%o2
  +	srlx	%o2,16,%o7
  +	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
  +	and	%o0,$mask,%o0
  +	and	%o1,$mask,%o1
  +	and	%o2,$mask,%o2
  +	sllx	%o1,16,%o1
  +	sllx	%o2,32,%o2
  +	sllx	%o3,48,%o7
  +	or	%o1,%o0,%o0
  +	or	%o2,%o0,%o0
  +	or	%o7,%o0,%o0		! 64-bit result
  +	addcc	%g1,%o0,%o0
  +	srlx	%o3,16,%g1		! 34-bit carry
  +	bcs,a	%xcc,.+8
  +	add	%g1,1,%g1
  +
  +	stx	%o0,[$tp]		! tp[j-1]=
  +	add	$tp,8,$tp
  +
   	fdtox	$dota,$dota
   	fdtox	$dotb,$dotb
   	std	$dota,[%sp+$bias+$frame+32]
   -514,10 +608,7 
   	bcs,a	%xcc,.+8
   	add	%g1,1,%g1
   
  -	ba	.Linner
   	add	$j,8,$j
  -.align	32
  -.Linner:
   	ldd	[$ap_l+$j],$alo		! load a[j] in double format
   	ldd	[$ap_h+$j],$ahi
   	ldd	[$np_l+$j],$nlo		! load n[j] in double format
   -563,6 +654,11 
   	std	$nlob,[%sp+$bias+$frame+8]
   	std	$nloc,[%sp+$bias+$frame+16]
   	std	$nlod,[%sp+$bias+$frame+24]
  +
  +	addcc	$j,8,$j
  +	bz,pn	%icc,.Linnerskip
  +.align	32,0x1000000
  +.Linner:
   	ldx	[%sp+$bias+$frame+0],%o0
   	ldx	[%sp+$bias+$frame+8],%o1
   	ldx	[%sp+$bias+$frame+16],%o2
   -594,10 +690,92 
   	add	%g1,1,%g1
   
   	stx	%o0,[$tp]		! tp[j-1]
  +
  +
  +	ldd	[$ap_l+$j],$alo		! load a[j] in double format
  +	ldd	[$ap_h+$j],$ahi
  +	ldd	[$np_l+$j],$nlo		! load n[j] in double format
  +	ldd	[$np_h+$j],$nhi
  +
  +		fmuld	$alo,$ba,$aloa
  +		fmuld	$nlo,$na,$nloa
  +		fmuld	$alo,$bb,$alob
  +		fmuld	$nlo,$nb,$nlob
  +		fmuld	$alo,$bc,$aloc
  +	faddd	$aloa,$nloa,$nloa
  +		fmuld	$nlo,$nc,$nloc
  +		fmuld	$alo,$bd,$alod
  +	faddd	$alob,$nlob,$nlob
  +		fmuld	$nlo,$nd,$nlod
  +		fmuld	$ahi,$ba,$ahia
  +	faddd	$aloc,$nloc,$nloc
  +		fmuld	$nhi,$na,$nhia
  +		fmuld	$ahi,$bb,$ahib
  +	faddd	$alod,$nlod,$nlod
  +		fmuld	$nhi,$nb,$nhib
  +		fmuld	$ahi,$bc,$ahic
  +	faddd	$ahia,$nhia,$nhia
  +		fmuld	$nhi,$nc,$nhic
  +		fmuld	$ahi,$bd,$ahid
  +	faddd	$ahib,$nhib,$nhib
  +		fmuld	$nhi,$nd,$nhid
  +
  +	faddd	$dota,$nloa,$nloa
  +	faddd	$dotb,$nlob,$nlob
  +	faddd	$ahic,$nhic,$dota	! $nhic
  +	faddd	$ahid,$nhid,$dotb	! $nhid
  +
  +	faddd	$nloc,$nhia,$nloc
  +	faddd	$nlod,$nhib,$nlod
  +
  +	fdtox	$nloa,$nloa
  +	fdtox	$nlob,$nlob
  +	fdtox	$nloc,$nloc
  +	fdtox	$nlod,$nlod
  +
  +	std	$nloa,[%sp+$bias+$frame+0]
  +	std	$nlob,[%sp+$bias+$frame+8]
  +	std	$nloc,[%sp+$bias+$frame+16]
  +	std	$nlod,[%sp+$bias+$frame+24]
  +
   	addcc	$j,8,$j
   	bnz,pt	%icc,.Linner
   	add	$tp,8,$tp
   
  +.Linnerskip:
  +	ldx	[%sp+$bias+$frame+0],%o0
  +	ldx	[%sp+$bias+$frame+8],%o1
  +	ldx	[%sp+$bias+$frame+16],%o2
  +	ldx	[%sp+$bias+$frame+24],%o3
  +
  +	srlx	%o0,16,%o7
  +	add	%o7,%o1,%o1
  +	srlx	%o1,16,%o7
  +	add	%o7,%o2,%o2
  +	srlx	%o2,16,%o7
  +	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
  +	and	%o0,$mask,%o0
  +	and	%o1,$mask,%o1
  +	and	%o2,$mask,%o2
  +	sllx	%o1,16,%o1
  +	sllx	%o2,32,%o2
  +	sllx	%o3,48,%o7
  +	or	%o1,%o0,%o0
  +	or	%o2,%o0,%o0
  +	or	%o7,%o0,%o0		! 64-bit result
  +	addcc	%g1,%o0,%o0
  +	srlx	%o3,16,%g1		! 34-bit carry
  +	bcs,a	%xcc,.+8
  +	add	%g1,1,%g1
  +
  +	ldx	[$tp+8],%o7		! tp[j]
  +	addcc	%o7,%o0,%o0
  +	bcs,a	%xcc,.+8
  +	add	%g1,1,%g1
  +
  +	stx	%o0,[$tp]		! tp[j-1]
  +	add	$tp,8,$tp
  +
   	fdtox	$dota,$dota
   	fdtox	$dotb,$dotb
   	std	$dota,[%sp+$bias+$frame+32]
   .
____________________________________________________________
__________
OpenSSL Project                                 http://www.openssl.org
CVS Repository Commit List                    
openssl-cvsopenssl.org
Automated List Manager                          
majordomoopenssl.org
[1]

about | contact  Other archives ( Real Estate discussion Medical topics )