[git commit] libbb/sha256: code shrink in 64-bit x86

Denys Vlasenko vda.linux at googlemail.com
Sat Feb 5 23:33:42 UTC 2022


commit: https://git.busybox.net/busybox/commit/?id=31c1c310772fa6c897ee1585ea15fc38f3ab3dff
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
sha256_process_block64_shaNI                         706     701      -5

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 libbb/hash_md5_sha256_x86-64_shaNI.S | 96 ++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S
index f3df541e4..dbf391135 100644
--- a/libbb/hash_md5_sha256_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-64_shaNI.S
@@ -31,9 +31,7 @@
 #define MSGTMP1		%xmm4
 #define MSGTMP2		%xmm5
 #define MSGTMP3		%xmm6
-#define MSGTMP4		%xmm7
-
-#define SHUF_MASK	%xmm8
+#define XMMTMP4		%xmm7
 
 #define ABEF_SAVE	%xmm9
 #define CDGH_SAVE	%xmm10
@@ -45,11 +43,12 @@ sha256_process_block64_shaNI:
 
 	shuf128_32	$0xB1, STATE0,  STATE0		/* CDAB */
 	shuf128_32	$0x1B, STATE1,  STATE1		/* EFGH */
-	mova128		STATE0, MSGTMP4
+	mova128		STATE0, XMMTMP4
 	palignr		$8, STATE1,  STATE0		/* ABEF */
-	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
+	pblendw		$0xF0, XMMTMP4, STATE1		/* CDGH */
 
-	mova128		PSHUFFLE_BSWAP32_FLIP_MASK(%rip), SHUF_MASK
+/* XMMTMP4 holds flip mask from here... */
+	mova128		PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4
 	leaq		K256+8*16(%rip), SHA256CONSTANTS
 
 	/* Save hash values for addition after rounds */
@@ -58,7 +57,7 @@ sha256_process_block64_shaNI:
 
 	/* Rounds 0-3 */
 	movu128		0*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
+	pshufb		XMMTMP4, MSG
 	mova128		MSG, MSGTMP0
 		paddd		0*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
@@ -67,7 +66,7 @@ sha256_process_block64_shaNI:
 
 	/* Rounds 4-7 */
 	movu128		1*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
+	pshufb		XMMTMP4, MSG
 	mova128		MSG, MSGTMP1
 		paddd		1*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
@@ -77,7 +76,7 @@ sha256_process_block64_shaNI:
 
 	/* Rounds 8-11 */
 	movu128		2*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
+	pshufb		XMMTMP4, MSG
 	mova128		MSG, MSGTMP2
 		paddd		2*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
@@ -87,13 +86,14 @@ sha256_process_block64_shaNI:
 
 	/* Rounds 12-15 */
 	movu128		3*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
+	pshufb		XMMTMP4, MSG
+/* ...to here */
 	mova128		MSG, MSGTMP3
 		paddd		3*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
+	mova128		MSGTMP3, XMMTMP4
+	palignr		$4, MSGTMP2, XMMTMP4
+	paddd		XMMTMP4, MSGTMP0
 	sha256msg2	MSGTMP3, MSGTMP0
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -103,9 +103,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP0, MSG
 		paddd		4*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
+	mova128		MSGTMP0, XMMTMP4
+	palignr		$4, MSGTMP3, XMMTMP4
+	paddd		XMMTMP4, MSGTMP1
 	sha256msg2	MSGTMP0, MSGTMP1
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -115,9 +115,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP1, MSG
 		paddd		5*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
+	mova128		MSGTMP1, XMMTMP4
+	palignr		$4, MSGTMP0, XMMTMP4
+	paddd		XMMTMP4, MSGTMP2
 	sha256msg2	MSGTMP1, MSGTMP2
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -127,9 +127,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP2, MSG
 		paddd		6*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
+	mova128		MSGTMP2, XMMTMP4
+	palignr		$4, MSGTMP1, XMMTMP4
+	paddd		XMMTMP4, MSGTMP3
 	sha256msg2	MSGTMP2, MSGTMP3
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -139,9 +139,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP3, MSG
 		paddd		7*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
+	mova128		MSGTMP3, XMMTMP4
+	palignr		$4, MSGTMP2, XMMTMP4
+	paddd		XMMTMP4, MSGTMP0
 	sha256msg2	MSGTMP3, MSGTMP0
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -151,9 +151,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP0, MSG
 		paddd		8*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
+	mova128		MSGTMP0, XMMTMP4
+	palignr		$4, MSGTMP3, XMMTMP4
+	paddd		XMMTMP4, MSGTMP1
 	sha256msg2	MSGTMP0, MSGTMP1
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -163,9 +163,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP1, MSG
 		paddd		9*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
+	mova128		MSGTMP1, XMMTMP4
+	palignr		$4, MSGTMP0, XMMTMP4
+	paddd		XMMTMP4, MSGTMP2
 	sha256msg2	MSGTMP1, MSGTMP2
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -175,9 +175,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP2, MSG
 		paddd		10*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
+	mova128		MSGTMP2, XMMTMP4
+	palignr		$4, MSGTMP1, XMMTMP4
+	paddd		XMMTMP4, MSGTMP3
 	sha256msg2	MSGTMP2, MSGTMP3
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -187,9 +187,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP3, MSG
 		paddd		11*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
+	mova128		MSGTMP3, XMMTMP4
+	palignr		$4, MSGTMP2, XMMTMP4
+	paddd		XMMTMP4, MSGTMP0
 	sha256msg2	MSGTMP3, MSGTMP0
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -199,9 +199,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP0, MSG
 		paddd		12*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
+	mova128		MSGTMP0, XMMTMP4
+	palignr		$4, MSGTMP3, XMMTMP4
+	paddd		XMMTMP4, MSGTMP1
 	sha256msg2	MSGTMP0, MSGTMP1
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -211,9 +211,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP1, MSG
 		paddd		13*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
+	mova128		MSGTMP1, XMMTMP4
+	palignr		$4, MSGTMP0, XMMTMP4
+	paddd		XMMTMP4, MSGTMP2
 	sha256msg2	MSGTMP1, MSGTMP2
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -222,9 +222,9 @@ sha256_process_block64_shaNI:
 	mova128		MSGTMP2, MSG
 		paddd		14*16-8*16(SHA256CONSTANTS), MSG
 		sha256rnds2	STATE0, STATE1
-	mova128		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
+	mova128		MSGTMP2, XMMTMP4
+	palignr		$4, MSGTMP1, XMMTMP4
+	paddd		XMMTMP4, MSGTMP3
 	sha256msg2	MSGTMP2, MSGTMP3
 		shuf128_32	$0x0E, MSG, MSG
 		sha256rnds2	STATE1, STATE0
@@ -243,9 +243,9 @@ sha256_process_block64_shaNI:
 	/* Write hash values back in the correct order */
 	shuf128_32	$0x1B, STATE0,  STATE0		/* FEBA */
 	shuf128_32	$0xB1, STATE1,  STATE1		/* DCHG */
-	mova128		STATE0, MSGTMP4
+	mova128		STATE0, XMMTMP4
 	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
-	palignr		$8, MSGTMP4, STATE1		/* HGFE */
+	palignr		$8, XMMTMP4, STATE1		/* HGFE */
 
 	movu128		STATE0, 80+0*16(%rdi)
 	movu128		STATE1, 80+1*16(%rdi)


More information about the busybox-cvs mailing list