From bugzilla at busybox.net Sat Jan 1 08:10:21 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sat, 01 Jan 2022 08:10:21 +0000 Subject: [Bug 14486] New: completion broken with multi-line prompts Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 Bug ID: 14486 Summary: completion broken with multi-line prompts Product: Busybox Version: 1.33.x Hardware: All OS: Linux Status: NEW Severity: normal Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: rkitover at gmail.com CC: busybox-cvs at busybox.net Target Milestone: --- Hello, I have been working on a prompt that I would like to be compatible with busybox. It is a 2-line prompt, the source is here: https://github.com/rkitover/sh-prompt-simple/ It does mostly work, except that when I use file/directory completion, pressing tab deletes the line above the prompt. I have recorded a video to demonstrate this: https://1drv.ms/v/s!AuXve-AbNCkVgdhRmSYz8iwQusZDfQ?e=qCMjy4 -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sat Jan 1 12:57:34 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 1 Jan 2022 13:57:34 +0100 Subject: [git commit] libbb/sha1: assembly versions for x86 Message-ID: <20220101125331.ADCC08275E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=5f6817020467598868b7d1c9ca477d7ccd66b87d branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master 32 bits: function old new delta sha1_process_block64 3950 3657 -293 64 bits: sha1_process_block64 4167 3683 -484 Signed-off-by: Denys Vlasenko --- libbb/Config.src | 2 +- libbb/hash_md5_sha.c | 417 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 418 insertions(+), 1 deletion(-) diff --git a/libbb/Config.src b/libbb/Config.src index d2054dc63..e027c14a8 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 360 374 3950 4167 + 0 367 367 3657 3683 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index faf485df5..9de30dfe6 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -8,6 +8,9 @@ */ #include "libbb.h" +#define STR1(s) #s +#define STR(s) STR1(s) + #define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA) /* gcc 4.2.1 optimizes rotr64 better with inline than with macro @@ -491,6 +494,419 @@ unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf) */ #if CONFIG_SHA1_SMALL == 0 +# if defined(__GNUC__) && defined(__i386__) +static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) +{ + BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 76); + asm( +"\n\ + pushl %ebp # \n\ + pushl %edi # \n\ + pushl %esi # \n\ + pushl %ebx # \n\ + pushl %eax \n\ + movl $15, %edi \n\ +1: \n\ + movl (%eax,%edi,4), %esi \n\ + bswap %esi \n\ + pushl %esi \n\ + decl %edi \n\ + jns 1b \n\ + movl 80(%eax), %ebx # b = ctx->hash[1] \n\ + movl 84(%eax), %ecx # c = ctx->hash[2] \n\ + movl 88(%eax), %edx # d = ctx->hash[3] \n\ + movl 92(%eax), %ebp # e = ctx->hash[4] \n\ + movl 76(%eax), %eax # a = ctx->hash[0] \n\ +#Register and stack use: \n\ +# eax..edx: a..d \n\ +# ebp: e \n\ +# esi,edi: temps \n\ +# 4*n(%esp): W[n] \n\ +" +#define RD1As(a,b,c,d,e, n, RCONST) \ +"\n\ + ##movl 4*"n"(%esp), %esi # n=0, W[0] already in %esi \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + andl "b", %edi # &b \n\ + xorl "d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\ + addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD1Bs(a,b,c,d,e, n, RCONST) \ +"\n\ + movl 4*"n"(%esp), %esi # W[n] \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + andl "b", %edi # &b \n\ + xorl "d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\ + addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\ + xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\ + xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\ + xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + andl "b", %edi # &b \n\ + xorl "d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\ + addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD1A(a,b,c,d,e, n) RD1As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST)) +#define RD1B(a,b,c,d,e, n) RD1Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST)) +#define RD1C(a,b,c,d,e, n) RD1Cs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0x5A827999 + RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) + RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9) + RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14) + RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19) +#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\ + xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\ + xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\ + xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + xorl "b", %edi # ^b \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\ + addl %edi, "e" # e += (c ^ d ^ b) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD2(a,b,c,d,e, n) RD2s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0x6ED9EBA1 + RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4) + RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9) + RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14) + RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19) + +#define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl "b", %edi # di: b \n\ + movl "b", %esi # si: b \n\ + orl "c", %edi # di: b | c \n\ + andl "c", %esi # si: b & c \n\ + andl "d", %edi # di: (b | c) & d \n\ + orl %esi, %edi # ((b | c) & d) | (b & c) \n\ + movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\ + xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\ + xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\ + xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\ + addl %edi, "e" # += ((b | c) & d) | (b & c)\n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD3(a,b,c,d,e, n) RD3s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0x8F1BBCDC + RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4) + RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9) + RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14) + RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19) + +#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\ + xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\ + xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\ + xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + xorl "b", %edi # ^b \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\ + addl %edi, "e" # e += (c ^ d ^ b) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\ + xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\ + xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\ + xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + ##movl %esi, 4*"n"(%esp) # store to W[n & 15] elided \n\ + movl "c", %edi # c \n\ + xorl "d", %edi # ^d \n\ + xorl "b", %edi # ^b \n\ + leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\ + addl %edi, "e" # e += (c ^ d ^ b) \n\ + movl "a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, "e" # e += rotl32(a,5) \n\ + rorl $2, "b" # b = rotl32(b,30) \n\ +" +#define RD4A(a,b,c,d,e, n) RD4As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) +#define RD4B(a,b,c,d,e, n) RD4Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0xCA62C1D6 + RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4) + RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9) + RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14) + RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) + +"\n\ + movl 4*16(%esp), %esi # \n\ + addl $4*(16+1), %esp # \n\ + addl %eax, 76(%esi) # ctx->hash[0] += a \n\ + addl %ebx, 80(%esi) # ctx->hash[1] += b \n\ + addl %ecx, 84(%esi) # ctx->hash[2] += c \n\ + addl %edx, 88(%esi) # ctx->hash[3] += d \n\ + addl %ebp, 92(%esi) # ctx->hash[4] += e \n\ + popl %ebx # \n\ + popl %esi # \n\ + popl %edi # \n\ + popl %ebp # \n\ +" + ); /* asm */ +#undef RCONST +} +# elif defined(__GNUC__) && defined(__x86_64__) +static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) +{ + BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); + asm( +// TODO: store W[] in r8..r15? (r8..r11 are callee-clobbered, no need to save) +"\n\ + ##pushq %r15 # \n\ + ##pushq %r14 # \n\ + ##pushq %r13 # \n\ + ##pushq %r12 # \n\ + ##pushq %rbp # \n\ + ##pushq %rbx # \n\ + movq %rbp, %r8 # callee-saved \n\ + movq %rbx, %r9 # callee-saved \n\ + movq %rdi, %r10 # we need ctx at the end \n\ + movl $15, %eax \n\ +1: \n\ + movl (%rdi,%rax,4), %esi \n\ + bswap %esi \n\ + movl %esi, -64(%rsp,%rax,4) \n\ + decl %eax \n\ + jns 1b \n\ + movl 80(%rdi), %eax # a = ctx->hash[0] \n\ + movl 84(%rdi), %ebx # b = ctx->hash[1] \n\ + movl 88(%rdi), %ecx # c = ctx->hash[2] \n\ + movl 92(%rdi), %edx # d = ctx->hash[3] \n\ + movl 96(%rdi), %ebp # e = ctx->hash[4] \n\ +#Register and stack use: \n\ +# eax..edx: a..d \n\ +# ebp: e \n\ +# esi,edi: temps \n\ +# -64+4*n(%rsp): W[n] \n\ +" +#define RD1As(a,b,c,d,e, n, RCONST) \ +"\n\ + ##movl -64+4*"n"(%rsp), %esi # n=0, W[0] already in %esi \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + andl %e"b", %edi # &b \n\ + xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ + addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD1Bs(a,b,c,d,e, n, RCONST) \ +"\n\ + movl -64+4*"n"(%rsp), %esi # W[n] \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + andl %e"b", %edi # &b \n\ + xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ + addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ + xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ + xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ + xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + andl %e"b", %edi # &b \n\ + xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ + addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) +#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) +#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0x5A827999 + RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) + RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9) + RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14) + RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19) +#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ + xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ + xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ + xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + xorl %e"b", %edi # ^b \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ + addl %edi, %e"e" # e += (c ^ d ^ b) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD2(a,b,c,d,e, n) RD2s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST)) +#undef RCONST +#define RCONST 0x6ED9EBA1 + RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4) + RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9) + RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14) + RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19) + +#define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl %e"b", %edi # di: b \n\ + movl %e"b", %esi # si: b \n\ + orl %e"c", %edi # di: b | c \n\ + andl %e"c", %esi # si: b & c \n\ + andl %e"d", %edi # di: (b | c) & d \n\ + orl %esi, %edi # ((b | c) & d) | (b & c) \n\ + movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ + xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ + xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ + xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD3(a,b,c,d,e, n) RD3s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST)) +#undef RCONST +//#define RCONST 0x8F1BBCDC "out of range for signed 32bit displacement" +#define RCONST -0x70e44324 + RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4) + RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9) + RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14) + RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19) + +#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ + xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ + xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ + xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + xorl %e"b", %edi # ^b \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ + addl %edi, %e"e" # e += (c ^ d ^ b) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ + xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ + xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ + xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + roll %esi # \n\ + ##movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] elided \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + xorl %e"b", %edi # ^b \n\ + leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ + addl %edi, %e"e" # e += (c ^ d ^ b) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD4A(a,b,c,d,e, n) RD4As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) +#define RD4B(a,b,c,d,e, n) RD4Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) +#undef RCONST +//#define RCONST 0xCA62C1D6 "out of range for signed 32bit displacement" +#define RCONST -0x359d3e2a + RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4) + RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9) + RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14) + RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) + +"\n\ + movq %r10, %rdi # \n\ + addl %eax, 80(%rdi) # ctx->hash[0] += a \n\ + addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\ + addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\ + addl %edx, 92(%rdi) # ctx->hash[3] += d \n\ + addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\ + movq %r9, %rbx # callee-saved \n\ + movq %r8, %rbp # callee-saved \n\ + ##popq %rbx # \n\ + ##popq %rbp # \n\ + ##popq %r12 # \n\ + ##popq %r13 # \n\ + ##popq %r14 # \n\ + ##popq %r15 # \n\ +" + ); /* asm */ +#undef RCONST +} +# else /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. * It seems further speedup can be achieved by handling more than * 64 bytes per one function call (coreutils does that). @@ -571,6 +987,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) ctx->hash[3] += d; ctx->hash[4] += e; } +# endif #elif CONFIG_SHA1_SMALL == 1 /* Middle-sized version, +300 bytes of code on x86. */ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) From vda.linux at googlemail.com Sat Jan 1 14:01:53 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 1 Jan 2022 15:01:53 +0100 Subject: [git commit] libbb/sha1: shrink x86_64 version - use r8..15 for W[8..15] Message-ID: <20220101135906.69E8E829F4@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=d643010feeef312c77d7f51c3dd476d4e605c982 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64 3683 3562 -121 Signed-off-by: Denys Vlasenko --- libbb/Config.src | 2 +- libbb/hash_md5_sha.c | 299 ++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 240 insertions(+), 61 deletions(-) diff --git a/libbb/Config.src b/libbb/Config.src index e027c14a8..f66f65f81 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 367 367 3657 3683 + 0 367 367 3657 3562 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 9de30dfe6..a4e36066a 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -700,22 +700,194 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) { BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); asm( -// TODO: store W[] in r8..r15? (r8..r11 are callee-clobbered, no need to save) "\n\ - ##pushq %r15 # \n\ - ##pushq %r14 # \n\ - ##pushq %r13 # \n\ - ##pushq %r12 # \n\ - ##pushq %rbp # \n\ - ##pushq %rbx # \n\ - movq %rbp, %r8 # callee-saved \n\ - movq %rbx, %r9 # callee-saved \n\ - movq %rdi, %r10 # we need ctx at the end \n\ - movl $15, %eax \n\ + pushq %r15 # \n\ + pushq %r14 # \n\ + pushq %r13 # \n\ + pushq %r12 # \n\ + pushq %rbp # \n\ + pushq %rbx # \n\ + pushq %rdi # we need ctx at the end \n\ + \n\ +#Register and stack use: \n\ +# eax..edx: a..d \n\ +# ebp: e \n\ +# esi,edi: temps \n\ +# -32+4*n(%rsp),r8...r15: W[0..7,8..15] \n\ + .macro loadW n,r \n\ + .if \\n == 0 \n\ + movl -32+4*0(%rsp),\\r \n\ + .endif \n\ + .if \\n == 1 \n\ + movl -32+4*1(%rsp),\\r \n\ + .endif \n\ + .if \\n == 2 \n\ + movl -32+4*2(%rsp),\\r \n\ + .endif \n\ + .if \\n == 3 \n\ + movl -32+4*3(%rsp),\\r \n\ + .endif \n\ + .if \\n == 4 \n\ + movl -32+4*4(%rsp),\\r \n\ + .endif \n\ + .if \\n == 5 \n\ + movl -32+4*5(%rsp),\\r \n\ + .endif \n\ + .if \\n == 6 \n\ + movl -32+4*6(%rsp),\\r \n\ + .endif \n\ + .if \\n == 7 \n\ + movl -32+4*7(%rsp),\\r \n\ + .endif \n\ + .if \\n == 8 \n\ + movl %r8d,\\r \n\ + .endif \n\ + .if \\n == 9 \n\ + movl %r9d,\\r \n\ + .endif \n\ + .if \\n == 10 \n\ + movl %r10d,\\r \n\ + .endif \n\ + .if \\n == 11 \n\ + movl %r11d,\\r \n\ + .endif \n\ + .if \\n == 12 \n\ + movl %r12d,\\r \n\ + .endif \n\ + .if \\n == 13 \n\ + movl %r13d,\\r \n\ + .endif \n\ + .if \\n == 14 \n\ + movl %r14d,\\r \n\ + .endif \n\ + .if \\n == 15 \n\ + movl %r15d,\\r \n\ + .endif \n\ + .endm \n\ + \n\ + .macro storeW r,n \n\ + .if \\n == 0 \n\ + movl \\r,-32+4*0(%rsp) \n\ + .endif \n\ + .if \\n == 1 \n\ + movl \\r,-32+4*1(%rsp) \n\ + .endif \n\ + .if \\n == 2 \n\ + movl \\r,-32+4*2(%rsp) \n\ + .endif \n\ + .if \\n == 3 \n\ + movl \\r,-32+4*3(%rsp) \n\ + .endif \n\ + .if \\n == 4 \n\ + movl \\r,-32+4*4(%rsp) \n\ + .endif \n\ + .if \\n == 5 \n\ + movl \\r,-32+4*5(%rsp) \n\ + .endif \n\ + .if \\n == 6 \n\ + movl \\r,-32+4*6(%rsp) \n\ + .endif \n\ + .if \\n == 7 \n\ + movl \\r,-32+4*7(%rsp) \n\ + .endif \n\ + .if \\n == 8 \n\ + movl \\r,%r8d \n\ + .endif \n\ + .if \\n == 9 \n\ + movl \\r,%r9d \n\ + .endif \n\ + .if \\n == 10 \n\ + movl \\r,%r10d \n\ + .endif \n\ + .if \\n == 11 \n\ + movl \\r,%r11d \n\ + .endif \n\ + .if \\n == 12 \n\ + movl \\r,%r12d \n\ + .endif \n\ + .if \\n == 13 \n\ + movl \\r,%r13d \n\ + .endif \n\ + .if \\n == 14 \n\ + movl \\r,%r14d \n\ + .endif \n\ + .if \\n == 15 \n\ + movl \\r,%r15d \n\ + .endif \n\ + .endm \n\ + \n\ + .macro xorW n,r \n\ + .if \\n == 0 \n\ + xorl -32+4*0(%rsp),\\r \n\ + .endif \n\ + .if \\n == 1 \n\ + xorl -32+4*1(%rsp),\\r \n\ + .endif \n\ + .if \\n == 2 \n\ + xorl -32+4*2(%rsp),\\r \n\ + .endif \n\ + .if \\n == 3 \n\ + xorl -32+4*3(%rsp),\\r \n\ + .endif \n\ + .if \\n == 4 \n\ + xorl -32+4*4(%rsp),\\r \n\ + .endif \n\ + .if \\n == 5 \n\ + xorl -32+4*5(%rsp),\\r \n\ + .endif \n\ + .if \\n == 6 \n\ + xorl -32+4*6(%rsp),\\r \n\ + .endif \n\ + .if \\n == 7 \n\ + xorl -32+4*7(%rsp),\\r \n\ + .endif \n\ + .if \\n == 8 \n\ + xorl %r8d,\\r \n\ + .endif \n\ + .if \\n == 9 \n\ + xorl %r9d,\\r \n\ + .endif \n\ + .if \\n == 10 \n\ + xorl %r10d,\\r \n\ + .endif \n\ + .if \\n == 11 \n\ + xorl %r11d,\\r \n\ + .endif \n\ + .if \\n == 12 \n\ + xorl %r12d,\\r \n\ + .endif \n\ + .if \\n == 13 \n\ + xorl %r13d,\\r \n\ + .endif \n\ + .if \\n == 14 \n\ + xorl %r14d,\\r \n\ + .endif \n\ + .if \\n == 15 \n\ + xorl %r15d,\\r \n\ + .endif \n\ + .endm \n\ + \n\ + movl 4*8(%rdi), %r8d \n\ + bswap %r8d \n\ + movl 4*9(%rdi), %r9d \n\ + bswap %r9d \n\ + movl 4*10(%rdi), %r10d \n\ + bswap %r10d \n\ + movl 4*11(%rdi), %r11d \n\ + bswap %r11d \n\ + movl 4*12(%rdi), %r12d \n\ + bswap %r12d \n\ + movl 4*13(%rdi), %r13d \n\ + bswap %r13d \n\ + movl 4*14(%rdi), %r14d \n\ + bswap %r14d \n\ + movl 4*15(%rdi), %r15d \n\ + bswap %r15d \n\ + movl $7, %eax \n\ 1: \n\ movl (%rdi,%rax,4), %esi \n\ bswap %esi \n\ - movl %esi, -64(%rsp,%rax,4) \n\ + movl %esi, -32(%rsp,%rax,4) \n\ decl %eax \n\ jns 1b \n\ movl 80(%rdi), %eax # a = ctx->hash[0] \n\ @@ -723,15 +895,10 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) movl 88(%rdi), %ecx # c = ctx->hash[2] \n\ movl 92(%rdi), %edx # d = ctx->hash[3] \n\ movl 96(%rdi), %ebp # e = ctx->hash[4] \n\ -#Register and stack use: \n\ -# eax..edx: a..d \n\ -# ebp: e \n\ -# esi,edi: temps \n\ -# -64+4*n(%rsp): W[n] \n\ " #define RD1As(a,b,c,d,e, n, RCONST) \ "\n\ - ##movl -64+4*"n"(%rsp), %esi # n=0, W[0] already in %esi \n\ + ##loadW "n", %esi # n=0, W[0] already in %esi \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ andl %e"b", %edi # &b \n\ @@ -745,7 +912,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) " #define RD1Bs(a,b,c,d,e, n, RCONST) \ "\n\ - movl -64+4*"n"(%rsp), %esi # W[n] \n\ + loadW "n", %esi # W[n] \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ andl %e"b", %edi # &b \n\ @@ -757,14 +924,27 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) addl %esi, %e"e" # e += rotl32(a,5) \n\ rorl $2, %e"b" # b = rotl32(b,30) \n\ " -#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +#define RD1Cs(a,b,c,d,e, n, RCONST) \ "\n\ - movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ - xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ - xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ - xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + movl %e"c", %edi # c \n\ + xorl %e"d", %edi # ^d \n\ + andl %e"b", %edi # &b \n\ + xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ + leal "RCONST"(%r"e",%r"n"), %e"e" # e += RCONST + W[n] \n\ + addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ + movl %e"a", %esi # \n\ + roll $5, %esi # rotl32(a,5) \n\ + addl %esi, %e"e" # e += rotl32(a,5) \n\ + rorl $2, %e"b" # b = rotl32(b,30) \n\ +" +#define RD1Ds(a,b,c,d,e, n13,n8,n2,n, RCONST) \ +"\n\ + loadW "n13", %esi # W[(n+13) & 15] \n\ + xorW "n8", %esi # ^W[(n+8) & 15] \n\ + xorW "n2", %esi # ^W[(n+2) & 15] \n\ + xorW "n", %esi # ^W[n & 15] \n\ roll %esi # \n\ - movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + storeW %esi, "n" # store to W[n & 15] \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ andl %e"b", %edi # &b \n\ @@ -776,23 +956,24 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) addl %esi, %e"e" # e += rotl32(a,5) \n\ rorl $2, %e"b" # b = rotl32(b,30) \n\ " -#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) -#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) -#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) +#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) +#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) +#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) +#define RD1D(a,b,c,d,e, n) RD1Ds(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) #undef RCONST #define RCONST 0x5A827999 RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) - RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9) - RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14) - RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19) + RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1C(cx,dx,bp,ax,bx, 8) RD1C(bx,cx,dx,bp,ax, 9) + RD1C(ax,bx,cx,dx,bp,10) RD1C(bp,ax,bx,cx,dx,11) RD1C(dx,bp,ax,bx,cx,12) RD1C(cx,dx,bp,ax,bx,13) RD1C(bx,cx,dx,bp,ax,14) + RD1C(ax,bx,cx,dx,bp,15) RD1D(bp,ax,bx,cx,dx,16) RD1D(dx,bp,ax,bx,cx,17) RD1D(cx,dx,bp,ax,bx,18) RD1D(bx,cx,dx,bp,ax,19) #define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ "\n\ - movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ - xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ - xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ - xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + loadW "n13", %esi # W[(n+13) & 15] \n\ + xorW "n8", %esi # ^W[(n+8) & 15] \n\ + xorW "n2", %esi # ^W[(n+2) & 15] \n\ + xorW "n", %esi # ^W[n & 15] \n\ roll %esi # \n\ - movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + storeW %esi, "n" # store to W[n & 15] \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ xorl %e"b", %edi # ^b \n\ @@ -819,12 +1000,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) andl %e"c", %esi # si: b & c \n\ andl %e"d", %edi # di: (b | c) & d \n\ orl %esi, %edi # ((b | c) & d) | (b & c) \n\ - movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ - xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ - xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ - xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + loadW "n13", %esi # W[(n+13) & 15] \n\ + xorW "n8", %esi # ^W[(n+8) & 15] \n\ + xorW "n2", %esi # ^W[(n+2) & 15] \n\ + xorW "n", %esi # ^W[n & 15] \n\ roll %esi # \n\ - movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + storeW %esi, "n" # store to W[n & 15] \n\ addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\ leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ movl %e"a", %esi # \n\ @@ -843,12 +1024,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) #define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ "\n\ - movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ - xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ - xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ - xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + loadW "n13", %esi # W[(n+13) & 15] \n\ + xorW "n8", %esi # ^W[(n+8) & 15] \n\ + xorW "n2", %esi # ^W[(n+2) & 15] \n\ + xorW "n", %esi # ^W[n & 15] \n\ roll %esi # \n\ - movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ + storeW %esi, "n" # store to W[n & 15] \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ xorl %e"b", %edi # ^b \n\ @@ -861,12 +1042,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) " #define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ "\n\ - movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ - xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ - xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ - xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ + loadW "n13", %esi # W[(n+13) & 15] \n\ + xorW "n8", %esi # ^W[(n+8) & 15] \n\ + xorW "n2", %esi # ^W[(n+2) & 15] \n\ + xorW "n", %esi # ^W[n & 15] \n\ roll %esi # \n\ - ##movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] elided \n\ + #storeW %esi, "n" # store to W[n & 15] elided \n\ movl %e"c", %edi # c \n\ xorl %e"d", %edi # ^d \n\ xorl %e"b", %edi # ^b \n\ @@ -888,20 +1069,18 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) "\n\ - movq %r10, %rdi # \n\ + popq %rdi # \n\ addl %eax, 80(%rdi) # ctx->hash[0] += a \n\ addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\ addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\ addl %edx, 92(%rdi) # ctx->hash[3] += d \n\ addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\ - movq %r9, %rbx # callee-saved \n\ - movq %r8, %rbp # callee-saved \n\ - ##popq %rbx # \n\ - ##popq %rbp # \n\ - ##popq %r12 # \n\ - ##popq %r13 # \n\ - ##popq %r14 # \n\ - ##popq %r15 # \n\ + popq %rbx # \n\ + popq %rbp # \n\ + popq %r12 # \n\ + popq %r13 # \n\ + popq %r14 # \n\ + popq %r15 # \n\ " ); /* asm */ #undef RCONST From vda.linux at googlemail.com Sat Jan 1 14:42:15 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 1 Jan 2022 15:42:15 +0100 Subject: [git commit] libbb/sha1: x86_64 version: bswap in 64-bit chunks Message-ID: <20220101143740.C3D0282A21@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=4d4f1f2096f06d69a6f205f0d8e33d4398f25677 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko --- libbb/Config.src | 2 +- libbb/hash_md5_sha.c | 42 ++++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/libbb/Config.src b/libbb/Config.src index f66f65f81..42a2283aa 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 367 367 3657 3562 + 0 367 367 3657 3570 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index a4e36066a..959bfc951 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) .endif \n\ .endm \n\ \n\ - movl 4*8(%rdi), %r8d \n\ - bswap %r8d \n\ - movl 4*9(%rdi), %r9d \n\ - bswap %r9d \n\ - movl 4*10(%rdi), %r10d \n\ - bswap %r10d \n\ - movl 4*11(%rdi), %r11d \n\ - bswap %r11d \n\ - movl 4*12(%rdi), %r12d \n\ - bswap %r12d \n\ - movl 4*13(%rdi), %r13d \n\ - bswap %r13d \n\ - movl 4*14(%rdi), %r14d \n\ - bswap %r14d \n\ - movl 4*15(%rdi), %r15d \n\ - bswap %r15d \n\ - movl $7, %eax \n\ + movq 4*8(%rdi), %r8 \n\ + bswap %r8 \n\ + movl %r8d, %r9d \n\ + shrq $32, %r8 \n\ + movq 4*10(%rdi), %r10 \n\ + bswap %r10 \n\ + movl %r10d, %r11d \n\ + shrq $32, %r10 \n\ + movq 4*12(%rdi), %r12 \n\ + bswap %r12 \n\ + movl %r12d, %r13d \n\ + shrq $32, %r12 \n\ + movq 4*14(%rdi), %r14 \n\ + bswap %r14 \n\ + movl %r14d, %r15d \n\ + shrq $32, %r14 \n\ + \n\ + movl $3, %eax \n\ 1: \n\ - movl (%rdi,%rax,4), %esi \n\ - bswap %esi \n\ - movl %esi, -32(%rsp,%rax,4) \n\ + movq (%rdi,%rax,8), %rsi \n\ + bswap %rsi \n\ + rolq $32, %rsi \n\ + movq %rsi, -32(%rsp,%rax,8) \n\ decl %eax \n\ jns 1b \n\ movl 80(%rdi), %eax # a = ctx->hash[0] \n\ From bugzilla at busybox.net Sat Jan 1 15:25:25 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sat, 01 Jan 2022 15:25:25 +0000 Subject: [Bug 14491] New: Support sort -h Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14491 Bug ID: 14491 Summary: Support sort -h Product: Busybox Version: unspecified Hardware: All OS: All Status: NEW Severity: major Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: kasperkantz at outlook.com CC: busybox-cvs at busybox.net Target Milestone: --- with coreutils sort: ``` # or `du -h | sort -hr` $ cat << EOF | sort -hr 320K ./x1 412K ./x2 2.6M . 952K ./x3 764K ./x4 EOF ``` result: ``` 2.6M . 952K ./x3 764K ./x4 412K ./x2 320K ./x1 ``` busybox doesn't support -h yet. would be nice to add it. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Sat Jan 1 15:29:43 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sat, 01 Jan 2022 15:29:43 +0000 Subject: [Bug 14496] New: 'sort' is not consistent with its coreutils/toybox counterpart Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14496 Bug ID: 14496 Summary: 'sort' is not consistent with its coreutils/toybox counterpart Product: Busybox Version: unspecified Hardware: All OS: All Status: NEW Severity: major Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: kasperkantz at outlook.com CC: busybox-cvs at busybox.net Target Milestone: --- input: ``` $ cat << EOF | sort -k2rn -s a 1 b 2 c 1 d 2 EOF ``` result with coreutils and toybox: ``` b 2 d 2 a 1 c 1 ``` result with busybox: ``` d 2 b 2 c 1 a 1 ``` whereas it should return the lines a, b, c, d according to the documented behavior of the 'stable' (`-s`) parameter, but instead returns d, b, c, a. It should reverse the order of the second column only and keep the first in the same order provided via stdin according to the `-r` flag as the implementation from GNU coreutils does. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sun Jan 2 00:56:35 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 2 Jan 2022 01:56:35 +0100 Subject: [git commit] libbb/sha1: code shrink in medium-speed version Message-ID: <20220102005211.CE7D382B5E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=5c0c5582319a5123635c9fd62f8e99ef01cceb3f branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64 654 641 -13 Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 959bfc951..7eca3de4d 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -1121,7 +1121,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) * see what the value will be). * """ */ -#if defined(__i386__) +#if defined(__GNUC__) && defined(__i386__) # define DO_NOT_TRY_PROPAGATING(m) asm("":"+m"(m)) #else # define DO_NOT_TRY_PROPAGATING(m) ((void)0) @@ -1212,7 +1212,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) c = rotl32(b, 30); b = a; a = work; - n = (n + 1) & 15; + n = (n + 1) /* & 15*/; } while (n != 4); /* 2nd round of 20 operations */ j = 19; From vda.linux at googlemail.com Mon Jan 3 11:57:36 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 3 Jan 2022 12:57:36 +0100 Subject: [git commit] libbb/sha1: x86_64 version: move to a separate .S file, no code changes Message-ID: <20220103120658.71B1782D55@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=05fd13ebec869fc5e6f226481a2405a2685e8db1 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/Kbuild.src | 1 + libbb/hash_md5_sha.c | 392 +------------ libbb/hash_md5_sha_x86-64.S | 1349 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1353 insertions(+), 389 deletions(-) diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index 2fa239857..19b8aad60 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src @@ -56,6 +56,7 @@ lib-y += login.o lib-y += make_directory.o lib-y += makedev.o lib-y += hash_md5_sha.o +lib-y += hash_md5_sha_x86-64.o # Alternative (disabled) MD5 implementation #lib-y += hash_md5prime.o lib-y += messages.o diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 7eca3de4d..ee19c1cb7 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -696,397 +696,11 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) #undef RCONST } # elif defined(__GNUC__) && defined(__x86_64__) -static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) -{ - BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); - asm( -"\n\ - pushq %r15 # \n\ - pushq %r14 # \n\ - pushq %r13 # \n\ - pushq %r12 # \n\ - pushq %rbp # \n\ - pushq %rbx # \n\ - pushq %rdi # we need ctx at the end \n\ - \n\ -#Register and stack use: \n\ -# eax..edx: a..d \n\ -# ebp: e \n\ -# esi,edi: temps \n\ -# -32+4*n(%rsp),r8...r15: W[0..7,8..15] \n\ - .macro loadW n,r \n\ - .if \\n == 0 \n\ - movl -32+4*0(%rsp),\\r \n\ - .endif \n\ - .if \\n == 1 \n\ - movl -32+4*1(%rsp),\\r \n\ - .endif \n\ - .if \\n == 2 \n\ - movl -32+4*2(%rsp),\\r \n\ - .endif \n\ - .if \\n == 3 \n\ - movl -32+4*3(%rsp),\\r \n\ - .endif \n\ - .if \\n == 4 \n\ - movl -32+4*4(%rsp),\\r \n\ - .endif \n\ - .if \\n == 5 \n\ - movl -32+4*5(%rsp),\\r \n\ - .endif \n\ - .if \\n == 6 \n\ - movl -32+4*6(%rsp),\\r \n\ - .endif \n\ - .if \\n == 7 \n\ - movl -32+4*7(%rsp),\\r \n\ - .endif \n\ - .if \\n == 8 \n\ - movl %r8d,\\r \n\ - .endif \n\ - .if \\n == 9 \n\ - movl %r9d,\\r \n\ - .endif \n\ - .if \\n == 10 \n\ - movl %r10d,\\r \n\ - .endif \n\ - .if \\n == 11 \n\ - movl %r11d,\\r \n\ - .endif \n\ - .if \\n == 12 \n\ - movl %r12d,\\r \n\ - .endif \n\ - .if \\n == 13 \n\ - movl %r13d,\\r \n\ - .endif \n\ - .if \\n == 14 \n\ - movl %r14d,\\r \n\ - .endif \n\ - .if \\n == 15 \n\ - movl %r15d,\\r \n\ - .endif \n\ - .endm \n\ - \n\ - .macro storeW r,n \n\ - .if \\n == 0 \n\ - movl \\r,-32+4*0(%rsp) \n\ - .endif \n\ - .if \\n == 1 \n\ - movl \\r,-32+4*1(%rsp) \n\ - .endif \n\ - .if \\n == 2 \n\ - movl \\r,-32+4*2(%rsp) \n\ - .endif \n\ - .if \\n == 3 \n\ - movl \\r,-32+4*3(%rsp) \n\ - .endif \n\ - .if \\n == 4 \n\ - movl \\r,-32+4*4(%rsp) \n\ - .endif \n\ - .if \\n == 5 \n\ - movl \\r,-32+4*5(%rsp) \n\ - .endif \n\ - .if \\n == 6 \n\ - movl \\r,-32+4*6(%rsp) \n\ - .endif \n\ - .if \\n == 7 \n\ - movl \\r,-32+4*7(%rsp) \n\ - .endif \n\ - .if \\n == 8 \n\ - movl \\r,%r8d \n\ - .endif \n\ - .if \\n == 9 \n\ - movl \\r,%r9d \n\ - .endif \n\ - .if \\n == 10 \n\ - movl \\r,%r10d \n\ - .endif \n\ - .if \\n == 11 \n\ - movl \\r,%r11d \n\ - .endif \n\ - .if \\n == 12 \n\ - movl \\r,%r12d \n\ - .endif \n\ - .if \\n == 13 \n\ - movl \\r,%r13d \n\ - .endif \n\ - .if \\n == 14 \n\ - movl \\r,%r14d \n\ - .endif \n\ - .if \\n == 15 \n\ - movl \\r,%r15d \n\ - .endif \n\ - .endm \n\ - \n\ - .macro xorW n,r \n\ - .if \\n == 0 \n\ - xorl -32+4*0(%rsp),\\r \n\ - .endif \n\ - .if \\n == 1 \n\ - xorl -32+4*1(%rsp),\\r \n\ - .endif \n\ - .if \\n == 2 \n\ - xorl -32+4*2(%rsp),\\r \n\ - .endif \n\ - .if \\n == 3 \n\ - xorl -32+4*3(%rsp),\\r \n\ - .endif \n\ - .if \\n == 4 \n\ - xorl -32+4*4(%rsp),\\r \n\ - .endif \n\ - .if \\n == 5 \n\ - xorl -32+4*5(%rsp),\\r \n\ - .endif \n\ - .if \\n == 6 \n\ - xorl -32+4*6(%rsp),\\r \n\ - .endif \n\ - .if \\n == 7 \n\ - xorl -32+4*7(%rsp),\\r \n\ - .endif \n\ - .if \\n == 8 \n\ - xorl %r8d,\\r \n\ - .endif \n\ - .if \\n == 9 \n\ - xorl %r9d,\\r \n\ - .endif \n\ - .if \\n == 10 \n\ - xorl %r10d,\\r \n\ - .endif \n\ - .if \\n == 11 \n\ - xorl %r11d,\\r \n\ - .endif \n\ - .if \\n == 12 \n\ - xorl %r12d,\\r \n\ - .endif \n\ - .if \\n == 13 \n\ - xorl %r13d,\\r \n\ - .endif \n\ - .if \\n == 14 \n\ - xorl %r14d,\\r \n\ - .endif \n\ - .if \\n == 15 \n\ - xorl %r15d,\\r \n\ - .endif \n\ - .endm \n\ - \n\ - movq 4*8(%rdi), %r8 \n\ - bswap %r8 \n\ - movl %r8d, %r9d \n\ - shrq $32, %r8 \n\ - movq 4*10(%rdi), %r10 \n\ - bswap %r10 \n\ - movl %r10d, %r11d \n\ - shrq $32, %r10 \n\ - movq 4*12(%rdi), %r12 \n\ - bswap %r12 \n\ - movl %r12d, %r13d \n\ - shrq $32, %r12 \n\ - movq 4*14(%rdi), %r14 \n\ - bswap %r14 \n\ - movl %r14d, %r15d \n\ - shrq $32, %r14 \n\ - \n\ - movl $3, %eax \n\ -1: \n\ - movq (%rdi,%rax,8), %rsi \n\ - bswap %rsi \n\ - rolq $32, %rsi \n\ - movq %rsi, -32(%rsp,%rax,8) \n\ - decl %eax \n\ - jns 1b \n\ - movl 80(%rdi), %eax # a = ctx->hash[0] \n\ - movl 84(%rdi), %ebx # b = ctx->hash[1] \n\ - movl 88(%rdi), %ecx # c = ctx->hash[2] \n\ - movl 92(%rdi), %edx # d = ctx->hash[3] \n\ - movl 96(%rdi), %ebp # e = ctx->hash[4] \n\ -" -#define RD1As(a,b,c,d,e, n, RCONST) \ -"\n\ - ##loadW "n", %esi # n=0, W[0] already in %esi \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - andl %e"b", %edi # &b \n\ - xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ - addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD1Bs(a,b,c,d,e, n, RCONST) \ -"\n\ - loadW "n", %esi # W[n] \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - andl %e"b", %edi # &b \n\ - xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ - addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD1Cs(a,b,c,d,e, n, RCONST) \ -"\n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - andl %e"b", %edi # &b \n\ - xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ - leal "RCONST"(%r"e",%r"n"), %e"e" # e += RCONST + W[n] \n\ - addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD1Ds(a,b,c,d,e, n13,n8,n2,n, RCONST) \ -"\n\ - loadW "n13", %esi # W[(n+13) & 15] \n\ - xorW "n8", %esi # ^W[(n+8) & 15] \n\ - xorW "n2", %esi # ^W[(n+2) & 15] \n\ - xorW "n", %esi # ^W[n & 15] \n\ - roll %esi # \n\ - storeW %esi, "n" # store to W[n & 15] \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - andl %e"b", %edi # &b \n\ - xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ - addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) -#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) -#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) -#define RD1D(a,b,c,d,e, n) RD1Ds(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) -#undef RCONST -#define RCONST 0x5A827999 - RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) - RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1C(cx,dx,bp,ax,bx, 8) RD1C(bx,cx,dx,bp,ax, 9) - RD1C(ax,bx,cx,dx,bp,10) RD1C(bp,ax,bx,cx,dx,11) RD1C(dx,bp,ax,bx,cx,12) RD1C(cx,dx,bp,ax,bx,13) RD1C(bx,cx,dx,bp,ax,14) - RD1C(ax,bx,cx,dx,bp,15) RD1D(bp,ax,bx,cx,dx,16) RD1D(dx,bp,ax,bx,cx,17) RD1D(cx,dx,bp,ax,bx,18) RD1D(bx,cx,dx,bp,ax,19) -#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ -"\n\ - loadW "n13", %esi # W[(n+13) & 15] \n\ - xorW "n8", %esi # ^W[(n+8) & 15] \n\ - xorW "n2", %esi # ^W[(n+2) & 15] \n\ - xorW "n", %esi # ^W[n & 15] \n\ - roll %esi # \n\ - storeW %esi, "n" # store to W[n & 15] \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - xorl %e"b", %edi # ^b \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ - addl %edi, %e"e" # e += (c ^ d ^ b) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD2(a,b,c,d,e, n) RD2s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST)) -#undef RCONST -#define RCONST 0x6ED9EBA1 - RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4) - RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9) - RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14) - RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19) - -#define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ -"\n\ - movl %e"b", %edi # di: b \n\ - movl %e"b", %esi # si: b \n\ - orl %e"c", %edi # di: b | c \n\ - andl %e"c", %esi # si: b & c \n\ - andl %e"d", %edi # di: (b | c) & d \n\ - orl %esi, %edi # ((b | c) & d) | (b & c) \n\ - loadW "n13", %esi # W[(n+13) & 15] \n\ - xorW "n8", %esi # ^W[(n+8) & 15] \n\ - xorW "n2", %esi # ^W[(n+2) & 15] \n\ - xorW "n", %esi # ^W[n & 15] \n\ - roll %esi # \n\ - storeW %esi, "n" # store to W[n & 15] \n\ - addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD3(a,b,c,d,e, n) RD3s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST)) -#undef RCONST -//#define RCONST 0x8F1BBCDC "out of range for signed 32bit displacement" -#define RCONST -0x70e44324 - RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4) - RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9) - RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14) - RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19) -#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ -"\n\ - loadW "n13", %esi # W[(n+13) & 15] \n\ - xorW "n8", %esi # ^W[(n+8) & 15] \n\ - xorW "n2", %esi # ^W[(n+2) & 15] \n\ - xorW "n", %esi # ^W[n & 15] \n\ - roll %esi # \n\ - storeW %esi, "n" # store to W[n & 15] \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - xorl %e"b", %edi # ^b \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ - addl %edi, %e"e" # e += (c ^ d ^ b) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ -"\n\ - loadW "n13", %esi # W[(n+13) & 15] \n\ - xorW "n8", %esi # ^W[(n+8) & 15] \n\ - xorW "n2", %esi # ^W[(n+2) & 15] \n\ - xorW "n", %esi # ^W[n & 15] \n\ - roll %esi # \n\ - #storeW %esi, "n" # store to W[n & 15] elided \n\ - movl %e"c", %edi # c \n\ - xorl %e"d", %edi # ^d \n\ - xorl %e"b", %edi # ^b \n\ - leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ - addl %edi, %e"e" # e += (c ^ d ^ b) \n\ - movl %e"a", %esi # \n\ - roll $5, %esi # rotl32(a,5) \n\ - addl %esi, %e"e" # e += rotl32(a,5) \n\ - rorl $2, %e"b" # b = rotl32(b,30) \n\ -" -#define RD4A(a,b,c,d,e, n) RD4As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) -#define RD4B(a,b,c,d,e, n) RD4Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) -#undef RCONST -//#define RCONST 0xCA62C1D6 "out of range for signed 32bit displacement" -#define RCONST -0x359d3e2a - RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4) - RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9) - RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14) - RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) +/* in hash_md5_sha_x86-64.S */ +struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; +void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM); -"\n\ - popq %rdi # \n\ - addl %eax, 80(%rdi) # ctx->hash[0] += a \n\ - addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\ - addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\ - addl %edx, 92(%rdi) # ctx->hash[3] += d \n\ - addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\ - popq %rbx # \n\ - popq %rbp # \n\ - popq %r12 # \n\ - popq %r13 # \n\ - popq %r14 # \n\ - popq %r15 # \n\ -" - ); /* asm */ -#undef RCONST -} # else /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. * It seems further speedup can be achieved by handling more than diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S new file mode 100644 index 000000000..466cd9ae9 --- /dev/null +++ b/libbb/hash_md5_sha_x86-64.S @@ -0,0 +1,1349 @@ +### Generated by hash_md5_sha_x86-64.S.sh ### +#if defined(__GNUC__) && defined(__x86_64__) + .section .text.sha1_process_block64,"ax", at progbits + .globl sha1_process_block64 + .hidden sha1_process_block64 + .type sha1_process_block64, @function +sha1_process_block64: + pushq %r15 # + pushq %r14 # + pushq %r13 # + pushq %r12 # + pushq %rbp # + pushq %rbx # + pushq %rdi # we need ctx at the end + +#Register and stack use: +# eax..edx: a..d +# ebp: e +# esi,edi: temps +# -32+4*n(%rsp),r8...r15: W[0..7,8..15] + + movq 4*8(%rdi), %r8 + bswapq %r8 + movl %r8d, %r9d + shrq $32, %r8 + movq 4*10(%rdi), %r10 + bswapq %r10 + movl %r10d, %r11d + shrq $32, %r10 + movq 4*12(%rdi), %r12 + bswapq %r12 + movl %r12d, %r13d + shrq $32, %r12 + movq 4*14(%rdi), %r14 + bswapq %r14 + movl %r14d, %r15d + shrq $32, %r14 + + movl $3, %eax +1: + movq (%rdi,%rax,8), %rsi + bswapq %rsi + rolq $32, %rsi + movq %rsi, -32(%rsp,%rax,8) + decl %eax + jns 1b + movl 80(%rdi), %eax # a = ctx->hash[0] + movl 84(%rdi), %ebx # b = ctx->hash[1] + movl 88(%rdi), %ecx # c = ctx->hash[2] + movl 92(%rdi), %edx # d = ctx->hash[3] + movl 96(%rdi), %ebp # e = ctx->hash[4] + +# 0 + # W[0], already in %esi + movl %ecx, %edi # c + xorl %edx, %edi # ^d + andl %ebx, %edi # &b + xorl %edx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbp,%rsi),%ebp # e += RCONST + W[n] + addl %edi, %ebp # e += (((c ^ d) & b) ^ d) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 1 + movl -32+4*1(%rsp), %esi # W[n] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + andl %eax, %edi # &b + xorl %ecx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + addl %edi, %edx # e += (((c ^ d) & b) ^ d) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 2 + movl -32+4*2(%rsp), %esi # W[n] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + andl %ebp, %edi # &b + xorl %ebx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + addl %edi, %ecx # e += (((c ^ d) & b) ^ d) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 3 + movl -32+4*3(%rsp), %esi # W[n] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + andl %edx, %edi # &b + xorl %eax, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] + addl %edi, %ebx # e += (((c ^ d) & b) ^ d) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 4 + movl -32+4*4(%rsp), %esi # W[n] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + andl %ecx, %edi # &b + xorl %ebp, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] + addl %edi, %eax # e += (((c ^ d) & b) ^ d) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 5 + movl -32+4*5(%rsp), %esi # W[n] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + andl %ebx, %edi # &b + xorl %edx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbp,%rsi),%ebp # e += RCONST + W[n] + addl %edi, %ebp # e += (((c ^ d) & b) ^ d) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 6 + movl -32+4*6(%rsp), %esi # W[n] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + andl %eax, %edi # &b + xorl %ecx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + addl %edi, %edx # e += (((c ^ d) & b) ^ d) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 7 + movl -32+4*7(%rsp), %esi # W[n] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + andl %ebp, %edi # &b + xorl %ebx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + addl %edi, %ecx # e += (((c ^ d) & b) ^ d) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 8 + # W[n], in %r8 + movl %ebp, %edi # c + xorl %eax, %edi # ^d + andl %edx, %edi # &b + xorl %eax, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbx,%r8),%ebx # e += RCONST + W[n] + addl %edi, %ebx # e += (((c ^ d) & b) ^ d) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 9 + # W[n], in %r9 + movl %edx, %edi # c + xorl %ebp, %edi # ^d + andl %ecx, %edi # &b + xorl %ebp, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rax,%r9),%eax # e += RCONST + W[n] + addl %edi, %eax # e += (((c ^ d) & b) ^ d) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 10 + # W[n], in %r10 + movl %ecx, %edi # c + xorl %edx, %edi # ^d + andl %ebx, %edi # &b + xorl %edx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbp,%r10),%ebp # e += RCONST + W[n] + addl %edi, %ebp # e += (((c ^ d) & b) ^ d) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 11 + # W[n], in %r11 + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + andl %eax, %edi # &b + xorl %ecx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rdx,%r11),%edx # e += RCONST + W[n] + addl %edi, %edx # e += (((c ^ d) & b) ^ d) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 12 + # W[n], in %r12 + movl %eax, %edi # c + xorl %ebx, %edi # ^d + andl %ebp, %edi # &b + xorl %ebx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rcx,%r12),%ecx # e += RCONST + W[n] + addl %edi, %ecx # e += (((c ^ d) & b) ^ d) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 13 + # W[n], in %r13 + movl %ebp, %edi # c + xorl %eax, %edi # ^d + andl %edx, %edi # &b + xorl %eax, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbx,%r13),%ebx # e += RCONST + W[n] + addl %edi, %ebx # e += (((c ^ d) & b) ^ d) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 14 + # W[n], in %r14 + movl %edx, %edi # c + xorl %ebp, %edi # ^d + andl %ecx, %edi # &b + xorl %ebp, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rax,%r14),%eax # e += RCONST + W[n] + addl %edi, %eax # e += (((c ^ d) & b) ^ d) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 15 + # W[n], in %r15 + movl %ecx, %edi # c + xorl %edx, %edi # ^d + andl %ebx, %edi # &b + xorl %edx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbp,%r15),%ebp # e += RCONST + W[n] + addl %edi, %ebp # e += (((c ^ d) & b) ^ d) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 16 + movl %r13d, %esi # W[(n+13) & 15] + xorl %r8d, %esi # ^W[(n+8) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*0(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*0(%rsp) # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + andl %eax, %edi # &b + xorl %ecx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + addl %edi, %edx # e += (((c ^ d) & b) ^ d) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 17 + movl %r14d, %esi # W[(n+13) & 15] + xorl %r9d, %esi # ^W[(n+8) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*1(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*1(%rsp) # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + andl %ebp, %edi # &b + xorl %ebx, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + addl %edi, %ecx # e += (((c ^ d) & b) ^ d) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 18 + movl %r15d, %esi # W[(n+13) & 15] + xorl %r10d, %esi # ^W[(n+8) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*2(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*2(%rsp) # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + andl %edx, %edi # &b + xorl %eax, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] + addl %edi, %ebx # e += (((c ^ d) & b) ^ d) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 19 + movl -32+4*0(%rsp), %esi # W[(n+13) & 15] + xorl %r11d, %esi # ^W[(n+8) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*3(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*3(%rsp) # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + andl %ecx, %edi # &b + xorl %ebp, %edi # (((c ^ d) & b) ^ d) + leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] + addl %edi, %eax # e += (((c ^ d) & b) ^ d) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 20 + movl -32+4*1(%rsp), %esi # W[(n+13) & 15] + xorl %r12d, %esi # ^W[(n+8) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*4(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*4(%rsp) # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 21 + movl -32+4*2(%rsp), %esi # W[(n+13) & 15] + xorl %r13d, %esi # ^W[(n+8) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*5(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*5(%rsp) # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 22 + movl -32+4*3(%rsp), %esi # W[(n+13) & 15] + xorl %r14d, %esi # ^W[(n+8) & 15] + xorl %r8d, %esi # ^W[(n+2) & 15] + xorl -32+4*6(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*6(%rsp) # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 23 + movl -32+4*4(%rsp), %esi # W[(n+13) & 15] + xorl %r15d, %esi # ^W[(n+8) & 15] + xorl %r9d, %esi # ^W[(n+2) & 15] + xorl -32+4*7(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*7(%rsp) # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 24 + movl -32+4*5(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] + xorl %r10d, %esi # ^W[(n+2) & 15] + xorl %r8d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r8d # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 25 + movl -32+4*6(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] + xorl %r11d, %esi # ^W[(n+2) & 15] + xorl %r9d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r9d # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 26 + movl -32+4*7(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] + xorl %r12d, %esi # ^W[(n+2) & 15] + xorl %r10d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r10d # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 27 + movl %r8d, %esi # W[(n+13) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] + xorl %r13d, %esi # ^W[(n+2) & 15] + xorl %r11d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r11d # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 28 + movl %r9d, %esi # W[(n+13) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] + xorl %r14d, %esi # ^W[(n+2) & 15] + xorl %r12d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r12d # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 29 + movl %r10d, %esi # W[(n+13) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] + xorl %r15d, %esi # ^W[(n+2) & 15] + xorl %r13d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r13d # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 30 + movl %r11d, %esi # W[(n+13) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] + xorl %r14d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r14d # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 31 + movl %r12d, %esi # W[(n+13) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] + xorl %r15d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r15d # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 32 + movl %r13d, %esi # W[(n+13) & 15] + xorl %r8d, %esi # ^W[(n+8) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*0(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*0(%rsp) # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 33 + movl %r14d, %esi # W[(n+13) & 15] + xorl %r9d, %esi # ^W[(n+8) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*1(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*1(%rsp) # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 34 + movl %r15d, %esi # W[(n+13) & 15] + xorl %r10d, %esi # ^W[(n+8) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*2(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*2(%rsp) # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 35 + movl -32+4*0(%rsp), %esi # W[(n+13) & 15] + xorl %r11d, %esi # ^W[(n+8) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*3(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*3(%rsp) # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 36 + movl -32+4*1(%rsp), %esi # W[(n+13) & 15] + xorl %r12d, %esi # ^W[(n+8) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*4(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*4(%rsp) # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 37 + movl -32+4*2(%rsp), %esi # W[(n+13) & 15] + xorl %r13d, %esi # ^W[(n+8) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*5(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*5(%rsp) # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 38 + movl -32+4*3(%rsp), %esi # W[(n+13) & 15] + xorl %r14d, %esi # ^W[(n+8) & 15] + xorl %r8d, %esi # ^W[(n+2) & 15] + xorl -32+4*6(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*6(%rsp) # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 39 + movl -32+4*4(%rsp), %esi # W[(n+13) & 15] + xorl %r15d, %esi # ^W[(n+8) & 15] + xorl %r9d, %esi # ^W[(n+2) & 15] + xorl -32+4*7(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*7(%rsp) # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 40 + movl %ebx, %edi # di: b + movl %ebx, %esi # si: b + orl %ecx, %edi # di: b | c + andl %ecx, %esi # si: b & c + andl %edx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*5(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] + xorl %r10d, %esi # ^W[(n+2) & 15] + xorl %r8d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r8d # store to W[n & 15] + addl %edi, %ebp # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 41 + movl %eax, %edi # di: b + movl %eax, %esi # si: b + orl %ebx, %edi # di: b | c + andl %ebx, %esi # si: b & c + andl %ecx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*6(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] + xorl %r11d, %esi # ^W[(n+2) & 15] + xorl %r9d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r9d # store to W[n & 15] + addl %edi, %edx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 42 + movl %ebp, %edi # di: b + movl %ebp, %esi # si: b + orl %eax, %edi # di: b | c + andl %eax, %esi # si: b & c + andl %ebx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*7(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] + xorl %r12d, %esi # ^W[(n+2) & 15] + xorl %r10d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r10d # store to W[n & 15] + addl %edi, %ecx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 43 + movl %edx, %edi # di: b + movl %edx, %esi # si: b + orl %ebp, %edi # di: b | c + andl %ebp, %esi # si: b & c + andl %eax, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r8d, %esi # W[(n+13) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] + xorl %r13d, %esi # ^W[(n+2) & 15] + xorl %r11d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r11d # store to W[n & 15] + addl %edi, %ebx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 44 + movl %ecx, %edi # di: b + movl %ecx, %esi # si: b + orl %edx, %edi # di: b | c + andl %edx, %esi # si: b & c + andl %ebp, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r9d, %esi # W[(n+13) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] + xorl %r14d, %esi # ^W[(n+2) & 15] + xorl %r12d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r12d # store to W[n & 15] + addl %edi, %eax # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 45 + movl %ebx, %edi # di: b + movl %ebx, %esi # si: b + orl %ecx, %edi # di: b | c + andl %ecx, %esi # si: b & c + andl %edx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r10d, %esi # W[(n+13) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] + xorl %r15d, %esi # ^W[(n+2) & 15] + xorl %r13d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r13d # store to W[n & 15] + addl %edi, %ebp # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 46 + movl %eax, %edi # di: b + movl %eax, %esi # si: b + orl %ebx, %edi # di: b | c + andl %ebx, %esi # si: b & c + andl %ecx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r11d, %esi # W[(n+13) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] + xorl %r14d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r14d # store to W[n & 15] + addl %edi, %edx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 47 + movl %ebp, %edi # di: b + movl %ebp, %esi # si: b + orl %eax, %edi # di: b | c + andl %eax, %esi # si: b & c + andl %ebx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r12d, %esi # W[(n+13) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] + xorl %r15d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r15d # store to W[n & 15] + addl %edi, %ecx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 48 + movl %edx, %edi # di: b + movl %edx, %esi # si: b + orl %ebp, %edi # di: b | c + andl %ebp, %esi # si: b & c + andl %eax, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r13d, %esi # W[(n+13) & 15] + xorl %r8d, %esi # ^W[(n+8) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*0(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*0(%rsp) # store to W[n & 15] + addl %edi, %ebx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 49 + movl %ecx, %edi # di: b + movl %ecx, %esi # si: b + orl %edx, %edi # di: b | c + andl %edx, %esi # si: b & c + andl %ebp, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r14d, %esi # W[(n+13) & 15] + xorl %r9d, %esi # ^W[(n+8) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*1(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*1(%rsp) # store to W[n & 15] + addl %edi, %eax # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 50 + movl %ebx, %edi # di: b + movl %ebx, %esi # si: b + orl %ecx, %edi # di: b | c + andl %ecx, %esi # si: b & c + andl %edx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r15d, %esi # W[(n+13) & 15] + xorl %r10d, %esi # ^W[(n+8) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*2(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*2(%rsp) # store to W[n & 15] + addl %edi, %ebp # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 51 + movl %eax, %edi # di: b + movl %eax, %esi # si: b + orl %ebx, %edi # di: b | c + andl %ebx, %esi # si: b & c + andl %ecx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*0(%rsp), %esi # W[(n+13) & 15] + xorl %r11d, %esi # ^W[(n+8) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*3(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*3(%rsp) # store to W[n & 15] + addl %edi, %edx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 52 + movl %ebp, %edi # di: b + movl %ebp, %esi # si: b + orl %eax, %edi # di: b | c + andl %eax, %esi # si: b & c + andl %ebx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*1(%rsp), %esi # W[(n+13) & 15] + xorl %r12d, %esi # ^W[(n+8) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*4(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*4(%rsp) # store to W[n & 15] + addl %edi, %ecx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 53 + movl %edx, %edi # di: b + movl %edx, %esi # si: b + orl %ebp, %edi # di: b | c + andl %ebp, %esi # si: b & c + andl %eax, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*2(%rsp), %esi # W[(n+13) & 15] + xorl %r13d, %esi # ^W[(n+8) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*5(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*5(%rsp) # store to W[n & 15] + addl %edi, %ebx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 54 + movl %ecx, %edi # di: b + movl %ecx, %esi # si: b + orl %edx, %edi # di: b | c + andl %edx, %esi # si: b & c + andl %ebp, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*3(%rsp), %esi # W[(n+13) & 15] + xorl %r14d, %esi # ^W[(n+8) & 15] + xorl %r8d, %esi # ^W[(n+2) & 15] + xorl -32+4*6(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*6(%rsp) # store to W[n & 15] + addl %edi, %eax # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 55 + movl %ebx, %edi # di: b + movl %ebx, %esi # si: b + orl %ecx, %edi # di: b | c + andl %ecx, %esi # si: b & c + andl %edx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*4(%rsp), %esi # W[(n+13) & 15] + xorl %r15d, %esi # ^W[(n+8) & 15] + xorl %r9d, %esi # ^W[(n+2) & 15] + xorl -32+4*7(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*7(%rsp) # store to W[n & 15] + addl %edi, %ebp # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 56 + movl %eax, %edi # di: b + movl %eax, %esi # si: b + orl %ebx, %edi # di: b | c + andl %ebx, %esi # si: b & c + andl %ecx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*5(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] + xorl %r10d, %esi # ^W[(n+2) & 15] + xorl %r8d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r8d # store to W[n & 15] + addl %edi, %edx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 57 + movl %ebp, %edi # di: b + movl %ebp, %esi # si: b + orl %eax, %edi # di: b | c + andl %eax, %esi # si: b & c + andl %ebx, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*6(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] + xorl %r11d, %esi # ^W[(n+2) & 15] + xorl %r9d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r9d # store to W[n & 15] + addl %edi, %ecx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 58 + movl %edx, %edi # di: b + movl %edx, %esi # si: b + orl %ebp, %edi # di: b | c + andl %ebp, %esi # si: b & c + andl %eax, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl -32+4*7(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] + xorl %r12d, %esi # ^W[(n+2) & 15] + xorl %r10d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r10d # store to W[n & 15] + addl %edi, %ebx # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 59 + movl %ecx, %edi # di: b + movl %ecx, %esi # si: b + orl %edx, %edi # di: b | c + andl %edx, %esi # si: b & c + andl %ebp, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) + movl %r8d, %esi # W[(n+13) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] + xorl %r13d, %esi # ^W[(n+2) & 15] + xorl %r11d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r11d # store to W[n & 15] + addl %edi, %eax # += ((b | c) & d) | (b & c) + leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 60 + movl %r9d, %esi # W[(n+13) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] + xorl %r14d, %esi # ^W[(n+2) & 15] + xorl %r12d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r12d # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 61 + movl %r10d, %esi # W[(n+13) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] + xorl %r15d, %esi # ^W[(n+2) & 15] + xorl %r13d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r13d # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 62 + movl %r11d, %esi # W[(n+13) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] + xorl %r14d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r14d # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 63 + movl %r12d, %esi # W[(n+13) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] + xorl %r15d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r15d # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 64 + movl %r13d, %esi # W[(n+13) & 15] + xorl %r8d, %esi # ^W[(n+8) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*0(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*0(%rsp) # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 65 + movl %r14d, %esi # W[(n+13) & 15] + xorl %r9d, %esi # ^W[(n+8) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*1(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*1(%rsp) # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 66 + movl %r15d, %esi # W[(n+13) & 15] + xorl %r10d, %esi # ^W[(n+8) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*2(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*2(%rsp) # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 67 + movl -32+4*0(%rsp), %esi # W[(n+13) & 15] + xorl %r11d, %esi # ^W[(n+8) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*3(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*3(%rsp) # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 68 + movl -32+4*1(%rsp), %esi # W[(n+13) & 15] + xorl %r12d, %esi # ^W[(n+8) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*4(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*4(%rsp) # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 69 + movl -32+4*2(%rsp), %esi # W[(n+13) & 15] + xorl %r13d, %esi # ^W[(n+8) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15] + xorl -32+4*5(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*5(%rsp) # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 70 + movl -32+4*3(%rsp), %esi # W[(n+13) & 15] + xorl %r14d, %esi # ^W[(n+8) & 15] + xorl %r8d, %esi # ^W[(n+2) & 15] + xorl -32+4*6(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*6(%rsp) # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 71 + movl -32+4*4(%rsp), %esi # W[(n+13) & 15] + xorl %r15d, %esi # ^W[(n+8) & 15] + xorl %r9d, %esi # ^W[(n+2) & 15] + xorl -32+4*7(%rsp), %esi # ^W[n & 15] + roll %esi # + movl %esi, -32+4*7(%rsp) # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 72 + movl -32+4*5(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] + xorl %r10d, %esi # ^W[(n+2) & 15] + xorl %r8d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r8d # store to W[n & 15] + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 73 + movl -32+4*6(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] + xorl %r11d, %esi # ^W[(n+2) & 15] + xorl %r9d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r9d # store to W[n & 15] + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 74 + movl -32+4*7(%rsp), %esi # W[(n+13) & 15] + xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] + xorl %r12d, %esi # ^W[(n+2) & 15] + xorl %r10d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r10d # store to W[n & 15] + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) +# 75 + movl %r8d, %esi # W[(n+13) & 15] + xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] + xorl %r13d, %esi # ^W[(n+2) & 15] + xorl %r11d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r11d # store to W[n & 15] + movl %ecx, %edi # c + xorl %edx, %edi # ^d + xorl %ebx, %edi # ^b + leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + addl %edi, %ebp # e += (c ^ d ^ b) + movl %eax, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebp # e += rotl32(a,5) + rorl $2, %ebx # b = rotl32(b,30) +# 76 + movl %r9d, %esi # W[(n+13) & 15] + xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] + xorl %r14d, %esi # ^W[(n+2) & 15] + xorl %r12d, %esi # ^W[n & 15] + roll %esi # + movl %esi, %r12d # store to W[n & 15] + movl %ebx, %edi # c + xorl %ecx, %edi # ^d + xorl %eax, %edi # ^b + leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + addl %edi, %edx # e += (c ^ d ^ b) + movl %ebp, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %edx # e += rotl32(a,5) + rorl $2, %eax # b = rotl32(b,30) +# 77 + movl %r10d, %esi # W[(n+13) & 15] + xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] + xorl %r15d, %esi # ^W[(n+2) & 15] + xorl %r13d, %esi # ^W[n & 15] + roll %esi # + # store to W[n & 15] - unused, not done + movl %eax, %edi # c + xorl %ebx, %edi # ^d + xorl %ebp, %edi # ^b + leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + addl %edi, %ecx # e += (c ^ d ^ b) + movl %edx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ecx # e += rotl32(a,5) + rorl $2, %ebp # b = rotl32(b,30) +# 78 + movl %r11d, %esi # W[(n+13) & 15] + xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] + xorl %r14d, %esi # ^W[n & 15] + roll %esi # + # store to W[n & 15] - unused, not done + movl %ebp, %edi # c + xorl %eax, %edi # ^d + xorl %edx, %edi # ^b + leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + addl %edi, %ebx # e += (c ^ d ^ b) + movl %ecx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %ebx # e += rotl32(a,5) + rorl $2, %edx # b = rotl32(b,30) +# 79 + movl %r12d, %esi # W[(n+13) & 15] + xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] + xorl %r15d, %esi # ^W[n & 15] + roll %esi # + # store to W[n & 15] - unused, not done + movl %edx, %edi # c + xorl %ebp, %edi # ^d + xorl %ecx, %edi # ^b + leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + addl %edi, %eax # e += (c ^ d ^ b) + movl %ebx, %esi # + roll $5, %esi # rotl32(a,5) + addl %esi, %eax # e += rotl32(a,5) + rorl $2, %ecx # b = rotl32(b,30) + + popq %rdi # + addl %eax, 80(%rdi) # ctx->hash[0] += a + addl %ebx, 84(%rdi) # ctx->hash[1] += b + addl %ecx, 88(%rdi) # ctx->hash[2] += c + addl %edx, 92(%rdi) # ctx->hash[3] += d + addl %ebp, 96(%rdi) # ctx->hash[4] += e + popq %rbx # + popq %rbp # + popq %r12 # + popq %r13 # + popq %r14 # + popq %r15 # + + ret + .size sha1_process_block64, .-sha1_process_block64 +#endif From vda.linux at googlemail.com Mon Jan 3 12:10:30 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 3 Jan 2022 13:10:30 +0100 Subject: [git commit] libbb/sha1: x86_64 version: generate from a script, optimize a bit Message-ID: <20220103120658.7C72782DEC@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=947bef0deaba7b2ce432d515379091dcd4cf747f branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64 3569 3502 -67 Signed-off-by: Denys Vlasenko --- libbb/Config.src | 2 +- libbb/hash_md5_sha_x86-64.S | 472 ++++++++++++++++++----------------------- libbb/hash_md5_sha_x86-64.S.sh | 267 +++++++++++++++++++++++ 3 files changed, 474 insertions(+), 267 deletions(-) diff --git a/libbb/Config.src b/libbb/Config.src index 42a2283aa..c80bee286 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 367 367 3657 3570 + 0 367 375 3657 3502 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 466cd9ae9..3e1c4b455 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -1,23 +1,27 @@ ### Generated by hash_md5_sha_x86-64.S.sh ### -#if defined(__GNUC__) && defined(__x86_64__) + +#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) .section .text.sha1_process_block64,"ax", at progbits - .globl sha1_process_block64 - .hidden sha1_process_block64 + .globl sha1_process_block64 + .hidden sha1_process_block64 .type sha1_process_block64, @function + + .balign 8 # allow decoders to fetch at least 4 first insns sha1_process_block64: - pushq %r15 # - pushq %r14 # - pushq %r13 # - pushq %r12 # - pushq %rbp # - pushq %rbx # - pushq %rdi # we need ctx at the end + pushq %r15 # + pushq %r14 # + pushq %r13 # + pushq %r12 # + pushq %rbp # + pushq %rbx # + pushq %rdi # we need ctx at the end #Register and stack use: # eax..edx: a..d # ebp: e # esi,edi: temps # -32+4*n(%rsp),r8...r15: W[0..7,8..15] +# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) movq 4*8(%rdi), %r8 bswapq %r8 @@ -253,7 +257,7 @@ sha1_process_block64: xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (((c ^ d) & b) ^ d) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -270,7 +274,7 @@ sha1_process_block64: xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (((c ^ d) & b) ^ d) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -287,7 +291,7 @@ sha1_process_block64: xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (((c ^ d) & b) ^ d) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -304,7 +308,7 @@ sha1_process_block64: xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (((c ^ d) & b) ^ d) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -320,7 +324,7 @@ sha1_process_block64: movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -336,7 +340,7 @@ sha1_process_block64: movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -352,7 +356,7 @@ sha1_process_block64: movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -368,135 +372,119 @@ sha1_process_block64: movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 24 - movl -32+4*5(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] - xorl %r10d, %esi # ^W[(n+2) & 15] - xorl %r8d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r8d # store to W[n & 15] + xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] + xorl %r10d, %r8d # ^W[(n+2) & 15] + roll %r8d # movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 25 - movl -32+4*6(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] - xorl %r11d, %esi # ^W[(n+2) & 15] - xorl %r9d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r9d # store to W[n & 15] + xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] + xorl %r11d, %r9d # ^W[(n+2) & 15] + roll %r9d # movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 26 - movl -32+4*7(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] - xorl %r12d, %esi # ^W[(n+2) & 15] - xorl %r10d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r10d # store to W[n & 15] + xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] + xorl %r12d, %r10d # ^W[(n+2) & 15] + roll %r10d # movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 27 - movl %r8d, %esi # W[(n+13) & 15] - xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] - xorl %r13d, %esi # ^W[(n+2) & 15] - xorl %r11d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r11d # store to W[n & 15] + xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] + xorl %r13d, %r11d # ^W[(n+2) & 15] + roll %r11d # movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 28 - movl %r9d, %esi # W[(n+13) & 15] - xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] - xorl %r14d, %esi # ^W[(n+2) & 15] - xorl %r12d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r12d # store to W[n & 15] + xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] + xorl %r14d, %r12d # ^W[(n+2) & 15] + roll %r12d # movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 29 - movl %r10d, %esi # W[(n+13) & 15] - xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] - xorl %r15d, %esi # ^W[(n+2) & 15] - xorl %r13d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r13d # store to W[n & 15] + xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] + xorl %r15d, %r13d # ^W[(n+2) & 15] + roll %r13d # movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 30 - movl %r11d, %esi # W[(n+13) & 15] - xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] - xorl %r14d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r14d # store to W[n & 15] + xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] + roll %r14d # movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 31 - movl %r12d, %esi # W[(n+13) & 15] - xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] - xorl %r15d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r15d # store to W[n & 15] + xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] + roll %r15d # movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -512,7 +500,7 @@ sha1_process_block64: movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -528,7 +516,7 @@ sha1_process_block64: movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -544,7 +532,7 @@ sha1_process_block64: movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -560,7 +548,7 @@ sha1_process_block64: movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -576,7 +564,7 @@ sha1_process_block64: movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -592,7 +580,7 @@ sha1_process_block64: movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -608,7 +596,7 @@ sha1_process_block64: movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -624,7 +612,7 @@ sha1_process_block64: movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W + leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -637,14 +625,12 @@ sha1_process_block64: andl %ecx, %esi # si: b & c andl %edx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*5(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] - xorl %r10d, %esi # ^W[(n+2) & 15] - xorl %r8d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r8d # store to W[n & 15] + xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] + xorl %r10d, %r8d # ^W[(n+2) & 15] + roll %r8d # addl %edi, %ebp # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15] movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) @@ -656,14 +642,12 @@ sha1_process_block64: andl %ebx, %esi # si: b & c andl %ecx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*6(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] - xorl %r11d, %esi # ^W[(n+2) & 15] - xorl %r9d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r9d # store to W[n & 15] + xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] + xorl %r11d, %r9d # ^W[(n+2) & 15] + roll %r9d # addl %edi, %edx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15] movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) @@ -675,14 +659,12 @@ sha1_process_block64: andl %eax, %esi # si: b & c andl %ebx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*7(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] - xorl %r12d, %esi # ^W[(n+2) & 15] - xorl %r10d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r10d # store to W[n & 15] + xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] + xorl %r12d, %r10d # ^W[(n+2) & 15] + roll %r10d # addl %edi, %ecx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15] movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) @@ -694,14 +676,12 @@ sha1_process_block64: andl %ebp, %esi # si: b & c andl %eax, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r8d, %esi # W[(n+13) & 15] - xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] - xorl %r13d, %esi # ^W[(n+2) & 15] - xorl %r11d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r11d # store to W[n & 15] + xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] + xorl %r13d, %r11d # ^W[(n+2) & 15] + roll %r11d # addl %edi, %ebx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15] movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) @@ -713,14 +693,12 @@ sha1_process_block64: andl %edx, %esi # si: b & c andl %ebp, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r9d, %esi # W[(n+13) & 15] - xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] - xorl %r14d, %esi # ^W[(n+2) & 15] - xorl %r12d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r12d # store to W[n & 15] + xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] + xorl %r14d, %r12d # ^W[(n+2) & 15] + roll %r12d # addl %edi, %eax # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15] movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) @@ -732,14 +710,12 @@ sha1_process_block64: andl %ecx, %esi # si: b & c andl %edx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r10d, %esi # W[(n+13) & 15] - xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] - xorl %r15d, %esi # ^W[(n+2) & 15] - xorl %r13d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r13d # store to W[n & 15] + xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] + xorl %r15d, %r13d # ^W[(n+2) & 15] + roll %r13d # addl %edi, %ebp # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15] movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) @@ -751,14 +727,12 @@ sha1_process_block64: andl %ebx, %esi # si: b & c andl %ecx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r11d, %esi # W[(n+13) & 15] - xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] - xorl %r14d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r14d # store to W[n & 15] + xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] + roll %r14d # addl %edi, %edx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15] movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) @@ -770,14 +744,12 @@ sha1_process_block64: andl %eax, %esi # si: b & c andl %ebx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r12d, %esi # W[(n+13) & 15] - xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] - xorl %r15d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r15d # store to W[n & 15] + xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] + roll %r15d # addl %edi, %ecx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15] movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) @@ -796,7 +768,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*0(%rsp) # store to W[n & 15] addl %edi, %ebx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) @@ -815,7 +787,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*1(%rsp) # store to W[n & 15] addl %edi, %eax # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) @@ -834,7 +806,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*2(%rsp) # store to W[n & 15] addl %edi, %ebp # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) @@ -853,7 +825,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*3(%rsp) # store to W[n & 15] addl %edi, %edx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15] movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) @@ -872,7 +844,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*4(%rsp) # store to W[n & 15] addl %edi, %ecx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) @@ -891,7 +863,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*5(%rsp) # store to W[n & 15] addl %edi, %ebx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) @@ -910,7 +882,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*6(%rsp) # store to W[n & 15] addl %edi, %eax # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) @@ -929,7 +901,7 @@ sha1_process_block64: roll %esi # movl %esi, -32+4*7(%rsp) # store to W[n & 15] addl %edi, %ebp # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) @@ -941,14 +913,12 @@ sha1_process_block64: andl %ebx, %esi # si: b & c andl %ecx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*5(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] - xorl %r10d, %esi # ^W[(n+2) & 15] - xorl %r8d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r8d # store to W[n & 15] + xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] + xorl %r10d, %r8d # ^W[(n+2) & 15] + roll %r8d # addl %edi, %edx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15] movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) @@ -960,14 +930,12 @@ sha1_process_block64: andl %eax, %esi # si: b & c andl %ebx, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*6(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] - xorl %r11d, %esi # ^W[(n+2) & 15] - xorl %r9d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r9d # store to W[n & 15] + xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] + xorl %r11d, %r9d # ^W[(n+2) & 15] + roll %r9d # addl %edi, %ecx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15] movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) @@ -979,14 +947,12 @@ sha1_process_block64: andl %ebp, %esi # si: b & c andl %eax, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl -32+4*7(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] - xorl %r12d, %esi # ^W[(n+2) & 15] - xorl %r10d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r10d # store to W[n & 15] + xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] + xorl %r12d, %r10d # ^W[(n+2) & 15] + roll %r10d # addl %edi, %ebx # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15] movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) @@ -998,77 +964,67 @@ sha1_process_block64: andl %edx, %esi # si: b & c andl %ebp, %edi # di: (b | c) & d orl %esi, %edi # ((b | c) & d) | (b & c) - movl %r8d, %esi # W[(n+13) & 15] - xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] - xorl %r13d, %esi # ^W[(n+2) & 15] - xorl %r11d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r11d # store to W[n & 15] + xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] + xorl %r13d, %r11d # ^W[(n+2) & 15] + roll %r11d # addl %edi, %eax # += ((b | c) & d) | (b & c) - leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15] movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 60 - movl %r9d, %esi # W[(n+13) & 15] - xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] - xorl %r14d, %esi # ^W[(n+2) & 15] - xorl %r12d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r12d # store to W[n & 15] + xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] + xorl %r14d, %r12d # ^W[(n+2) & 15] + roll %r12d # movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 61 - movl %r10d, %esi # W[(n+13) & 15] - xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] - xorl %r15d, %esi # ^W[(n+2) & 15] - xorl %r13d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r13d # store to W[n & 15] + xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] + xorl %r15d, %r13d # ^W[(n+2) & 15] + roll %r13d # movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 62 - movl %r11d, %esi # W[(n+13) & 15] - xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] - xorl %r14d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r14d # store to W[n & 15] + xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] + roll %r14d # movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 63 - movl %r12d, %esi # W[(n+13) & 15] - xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] - xorl %r15d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r15d # store to W[n & 15] + xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] + roll %r15d # movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -1084,7 +1040,7 @@ sha1_process_block64: movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -1100,7 +1056,7 @@ sha1_process_block64: movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -1116,7 +1072,7 @@ sha1_process_block64: movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -1132,7 +1088,7 @@ sha1_process_block64: movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -1148,7 +1104,7 @@ sha1_process_block64: movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -1164,7 +1120,7 @@ sha1_process_block64: movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -1180,7 +1136,7 @@ sha1_process_block64: movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -1196,135 +1152,119 @@ sha1_process_block64: movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 72 - movl -32+4*5(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] - xorl %r10d, %esi # ^W[(n+2) & 15] - xorl %r8d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r8d # store to W[n & 15] + xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] + xorl %r10d, %r8d # ^W[(n+2) & 15] + roll %r8d # movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 73 - movl -32+4*6(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] - xorl %r11d, %esi # ^W[(n+2) & 15] - xorl %r9d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r9d # store to W[n & 15] + xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] + xorl %r11d, %r9d # ^W[(n+2) & 15] + roll %r9d # movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 74 - movl -32+4*7(%rsp), %esi # W[(n+13) & 15] - xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] - xorl %r12d, %esi # ^W[(n+2) & 15] - xorl %r10d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r10d # store to W[n & 15] + xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] + xorl %r12d, %r10d # ^W[(n+2) & 15] + roll %r10d # movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 75 - movl %r8d, %esi # W[(n+13) & 15] - xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] - xorl %r13d, %esi # ^W[(n+2) & 15] - xorl %r11d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r11d # store to W[n & 15] + xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] + xorl %r13d, %r11d # ^W[(n+2) & 15] + roll %r11d # movl %ecx, %edi # c xorl %edx, %edi # ^d xorl %ebx, %edi # ^b - leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W + leal -0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15] addl %edi, %ebp # e += (c ^ d ^ b) movl %eax, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 76 - movl %r9d, %esi # W[(n+13) & 15] - xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] - xorl %r14d, %esi # ^W[(n+2) & 15] - xorl %r12d, %esi # ^W[n & 15] - roll %esi # - movl %esi, %r12d # store to W[n & 15] + xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] + xorl %r14d, %r12d # ^W[(n+2) & 15] + roll %r12d # movl %ebx, %edi # c xorl %ecx, %edi # ^d xorl %eax, %edi # ^b - leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W + leal -0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15] addl %edi, %edx # e += (c ^ d ^ b) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 77 - movl %r10d, %esi # W[(n+13) & 15] - xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] - xorl %r15d, %esi # ^W[(n+2) & 15] - xorl %r13d, %esi # ^W[n & 15] - roll %esi # - # store to W[n & 15] - unused, not done + xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] + xorl %r15d, %r13d # ^W[(n+2) & 15] + roll %r13d # movl %eax, %edi # c xorl %ebx, %edi # ^d xorl %ebp, %edi # ^b - leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W + leal -0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15] addl %edi, %ecx # e += (c ^ d ^ b) movl %edx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 78 - movl %r11d, %esi # W[(n+13) & 15] - xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] - xorl %r14d, %esi # ^W[n & 15] - roll %esi # - # store to W[n & 15] - unused, not done + xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] + xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] + roll %r14d # movl %ebp, %edi # c xorl %eax, %edi # ^d xorl %edx, %edi # ^b - leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W + leal -0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15] addl %edi, %ebx # e += (c ^ d ^ b) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) addl %esi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 79 - movl %r12d, %esi # W[(n+13) & 15] - xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] - xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] - xorl %r15d, %esi # ^W[n & 15] - roll %esi # - # store to W[n & 15] - unused, not done + xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] + xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] + xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] + roll %r15d # movl %edx, %edi # c xorl %ebp, %edi # ^d xorl %ecx, %edi # ^b - leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W + leal -0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15] addl %edi, %eax # e += (c ^ d ^ b) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh new file mode 100755 index 000000000..931c0f0fd --- /dev/null +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -0,0 +1,267 @@ +#!/bin/sh + +# We don't regenerate it on every "make" invocation - only by hand. +# The reason is that the changes to generated code are difficult +# to visualize by looking only at this script, it helps when the commit +# also contains the diff of the generated file. +exec >hash_md5_sha_x86-64.S + +echo \ +'### Generated by hash_md5_sha_x86-64.S.sh ### + +#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) + .section .text.sha1_process_block64,"ax", at progbits + .globl sha1_process_block64 + .hidden sha1_process_block64 + .type sha1_process_block64, @function + + .balign 8 # allow decoders to fetch at least 4 first insns +sha1_process_block64: + pushq %r15 # + pushq %r14 # + pushq %r13 # + pushq %r12 # + pushq %rbp # + pushq %rbx # + pushq %rdi # we need ctx at the end + +#Register and stack use: +# eax..edx: a..d +# ebp: e +# esi,edi: temps +# -32+4*n(%rsp),r8...r15: W[0..7,8..15] +# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) + + movq 4*8(%rdi), %r8 + bswapq %r8 + movl %r8d, %r9d + shrq $32, %r8 + movq 4*10(%rdi), %r10 + bswapq %r10 + movl %r10d, %r11d + shrq $32, %r10 + movq 4*12(%rdi), %r12 + bswapq %r12 + movl %r12d, %r13d + shrq $32, %r12 + movq 4*14(%rdi), %r14 + bswapq %r14 + movl %r14d, %r15d + shrq $32, %r14 + + movl $3, %eax +1: + movq (%rdi,%rax,8), %rsi + bswapq %rsi + rolq $32, %rsi + movq %rsi, -32(%rsp,%rax,8) + decl %eax + jns 1b + movl 80(%rdi), %eax # a = ctx->hash[0] + movl 84(%rdi), %ebx # b = ctx->hash[1] + movl 88(%rdi), %ecx # c = ctx->hash[2] + movl 92(%rdi), %edx # d = ctx->hash[3] + movl 96(%rdi), %ebp # e = ctx->hash[4] +' +W32() { +test "$1" || exit 1 +test "$1" -lt 0 && exit 1 +test "$1" -gt 15 && exit 1 +test "$1" -lt 8 && echo "-32+4*$1(%rsp)" +test "$1" -ge 8 && echo "%r${1}d" +} + +RD1A() { +local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 +local n=$(($6)) +echo "# $n" +test $n = 0 && echo " + # W[0], already in %esi +";test $n != 0 && test $n -lt 8 && echo " + movl `W32 $n`, %esi # W[n] +";test $n -ge 8 && echo " + # W[n], in %r$n +";echo " + movl %e$c, %edi # c + xorl %e$d, %edi # ^d + andl %e$b, %edi # &b + xorl %e$d, %edi # (((c ^ d) & b) ^ d) +";test $n -lt 8 && echo " + leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] +";test $n -ge 8 && echo " + leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] +";echo " + addl %edi, %e$e # e += (((c ^ d) & b) ^ d) + movl %e$a, %esi # + roll \$5, %esi # rotl32(a,5) + addl %esi, %e$e # e += rotl32(a,5) + rorl \$2, %e$b # b = rotl32(b,30) +" +} +RD1B() { +local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 +local n=$(($6)) +local n13=$(((n+13) & 15)) +local n8=$(((n+8) & 15)) +local n2=$(((n+2) & 15)) +local n0=$(((n+0) & 15)) +echo " +# $n +";test $n0 -lt 8 && echo " + movl `W32 $n13`, %esi # W[(n+13) & 15] + xorl `W32 $n8`, %esi # ^W[(n+8) & 15] + xorl `W32 $n2`, %esi # ^W[(n+2) & 15] + xorl `W32 $n0`, %esi # ^W[n & 15] + roll %esi # + movl %esi, `W32 $n0` # store to W[n & 15] +";test $n0 -ge 8 && echo " + xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] + xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] + xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] + roll `W32 $n0` # +"; echo " + movl %e$c, %edi # c + xorl %e$d, %edi # ^d + andl %e$b, %edi # &b + xorl %e$d, %edi # (((c ^ d) & b) ^ d) +";test $n0 -lt 8 && echo " + leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] +";test $n0 -ge 8 && echo " + leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] +";echo " + addl %edi, %e$e # e += (((c ^ d) & b) ^ d) + movl %e$a, %esi # + roll \$5, %esi # rotl32(a,5) + addl %esi, %e$e # e += rotl32(a,5) + rorl \$2, %e$b # b = rotl32(b,30) +" +} +{ +RCONST=0x5A827999 +RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4 +RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9 +RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14 +RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19 +} | grep -v '^$' + +RD2() { +local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 +local n=$(($6)) +local n13=$(((n+13) & 15)) +local n8=$(((n+8) & 15)) +local n2=$(((n+2) & 15)) +local n0=$(((n+0) & 15)) +echo " +# $n +";test $n0 -lt 8 && echo " + movl `W32 $n13`, %esi # W[(n+13) & 15] + xorl `W32 $n8`, %esi # ^W[(n+8) & 15] + xorl `W32 $n2`, %esi # ^W[(n+2) & 15] + xorl `W32 $n0`, %esi # ^W[n & 15] + roll %esi # + movl %esi, `W32 $n0` # store to W[n & 15] +";test $n0 -ge 8 && echo " + xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] + xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] + xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] + roll `W32 $n0` # +"; echo " + movl %e$c, %edi # c + xorl %e$d, %edi # ^d + xorl %e$b, %edi # ^b +";test $n0 -lt 8 && echo " + leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] +";test $n0 -ge 8 && echo " + leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] +";echo " + addl %edi, %e$e # e += (c ^ d ^ b) + movl %e$a, %esi # + roll \$5, %esi # rotl32(a,5) + addl %esi, %e$e # e += rotl32(a,5) + rorl \$2, %e$b # b = rotl32(b,30) +" +} +{ +RCONST=0x6ED9EBA1 +RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24 +RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29 +RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34 +RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39 +} | grep -v '^$' + +RD3() { +local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 +local n=$(($6)) +local n13=$(((n+13) & 15)) +local n8=$(((n+8) & 15)) +local n2=$(((n+2) & 15)) +local n0=$(((n+0) & 15)) +echo " +# $n + movl %e$b, %edi # di: b + movl %e$b, %esi # si: b + orl %e$c, %edi # di: b | c + andl %e$c, %esi # si: b & c + andl %e$d, %edi # di: (b | c) & d + orl %esi, %edi # ((b | c) & d) | (b & c) +";test $n0 -lt 8 && echo " + movl `W32 $n13`, %esi # W[(n+13) & 15] + xorl `W32 $n8`, %esi # ^W[(n+8) & 15] + xorl `W32 $n2`, %esi # ^W[(n+2) & 15] + xorl `W32 $n0`, %esi # ^W[n & 15] + roll %esi # + movl %esi, `W32 $n0` # store to W[n & 15] +";test $n0 -ge 8 && echo " + xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] + xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] + xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] + roll `W32 $n0` # +"; echo " + addl %edi, %e$e # += ((b | c) & d) | (b & c) +";test $n0 -lt 8 && echo " + leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] +";test $n0 -ge 8 && echo " + leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] +";echo " + movl %e$a, %esi # + roll \$5, %esi # rotl32(a,5) + addl %esi, %e$e # e += rotl32(a,5) + rorl \$2, %e$b # b = rotl32(b,30) +" +} +{ +#RCONST=0x8F1BBCDC "out of range for signed 32bit displacement" +RCONST=-0x70E44324 +RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44 +RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49 +RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54 +RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59 +} | grep -v '^$' + +# Round 4 has the same logic as round 2, only n and RCONST are different +{ +#RCONST=0xCA62C1D6 "out of range for signed 32bit displacement" +RCONST=-0x359D3E2A +RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64 +RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 +RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 +RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 +} | grep -v '^$' + +echo " + popq %rdi # + addl %eax, 80(%rdi) # ctx->hash[0] += a + addl %ebx, 84(%rdi) # ctx->hash[1] += b + addl %ecx, 88(%rdi) # ctx->hash[2] += c + addl %edx, 92(%rdi) # ctx->hash[3] += d + addl %ebp, 96(%rdi) # ctx->hash[4] += e + popq %rbx # + popq %rbp # + popq %r12 # + popq %r13 # + popq %r14 # + popq %r15 # + + ret + .size sha1_process_block64, .-sha1_process_block64 +#endif" From vda.linux at googlemail.com Mon Jan 3 12:14:09 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 3 Jan 2022 13:14:09 +0100 Subject: [git commit] typo fix Message-ID: <20220103120818.2E2C182B8D@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=4387077f8e69c26ce5ce4a8119c225cc1c461f88 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S | 2 +- libbb/hash_md5_sha_x86-64.S.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 3e1c4b455..ec4e63765 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -21,7 +21,7 @@ sha1_process_block64: # ebp: e # esi,edi: temps # -32+4*n(%rsp),r8...r15: W[0..7,8..15] -# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) +# (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?) movq 4*8(%rdi), %r8 bswapq %r8 diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 931c0f0fd..5f09546b2 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -30,7 +30,7 @@ sha1_process_block64: # ebp: e # esi,edi: temps # -32+4*n(%rsp),r8...r15: W[0..7,8..15] -# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) +# (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?) movq 4*8(%rdi), %r8 bswapq %r8 From vda.linux at googlemail.com Mon Jan 3 16:02:48 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 3 Jan 2022 17:02:48 +0100 Subject: [git commit] libbb/sha1: x86_64 version: tidying up, no code changes Message-ID: <20220103155838.15FE682BC5@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=7abb2bb96e0cd584f44dd8b219ad16d0232a6485 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S | 32 ++++++++++++++++---------------- libbb/hash_md5_sha_x86-64.S.sh | 33 ++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index ec4e63765..95b85d80a 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -60,7 +60,7 @@ sha1_process_block64: xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbp,%rsi),%ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n] addl %edi, %ebp # e += (((c ^ d) & b) ^ d) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -72,7 +72,7 @@ sha1_process_block64: xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n] addl %edi, %edx # e += (((c ^ d) & b) ^ d) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -84,7 +84,7 @@ sha1_process_block64: xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n] addl %edi, %ecx # e += (((c ^ d) & b) ^ d) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -96,7 +96,7 @@ sha1_process_block64: xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n] addl %edi, %ebx # e += (((c ^ d) & b) ^ d) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -108,7 +108,7 @@ sha1_process_block64: xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n] addl %edi, %eax # e += (((c ^ d) & b) ^ d) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -120,7 +120,7 @@ sha1_process_block64: xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbp,%rsi),%ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n] addl %edi, %ebp # e += (((c ^ d) & b) ^ d) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -132,7 +132,7 @@ sha1_process_block64: xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n] addl %edi, %edx # e += (((c ^ d) & b) ^ d) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -144,7 +144,7 @@ sha1_process_block64: xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n] addl %edi, %ecx # e += (((c ^ d) & b) ^ d) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -156,7 +156,7 @@ sha1_process_block64: xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbx,%r8),%ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%r8), %ebx # e += RCONST + W[n] addl %edi, %ebx # e += (((c ^ d) & b) ^ d) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -168,7 +168,7 @@ sha1_process_block64: xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rax,%r9),%eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%r9), %eax # e += RCONST + W[n] addl %edi, %eax # e += (((c ^ d) & b) ^ d) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -180,7 +180,7 @@ sha1_process_block64: xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbp,%r10),%ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%r10), %ebp # e += RCONST + W[n] addl %edi, %ebp # e += (((c ^ d) & b) ^ d) movl %eax, %esi # roll $5, %esi # rotl32(a,5) @@ -192,7 +192,7 @@ sha1_process_block64: xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rdx,%r11),%edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%r11), %edx # e += RCONST + W[n] addl %edi, %edx # e += (((c ^ d) & b) ^ d) movl %ebp, %esi # roll $5, %esi # rotl32(a,5) @@ -204,7 +204,7 @@ sha1_process_block64: xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rcx,%r12),%ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%r12), %ecx # e += RCONST + W[n] addl %edi, %ecx # e += (((c ^ d) & b) ^ d) movl %edx, %esi # roll $5, %esi # rotl32(a,5) @@ -216,7 +216,7 @@ sha1_process_block64: xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbx,%r13),%ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%r13), %ebx # e += RCONST + W[n] addl %edi, %ebx # e += (((c ^ d) & b) ^ d) movl %ecx, %esi # roll $5, %esi # rotl32(a,5) @@ -228,7 +228,7 @@ sha1_process_block64: xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rax,%r14),%eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%r14), %eax # e += RCONST + W[n] addl %edi, %eax # e += (((c ^ d) & b) ^ d) movl %ebx, %esi # roll $5, %esi # rotl32(a,5) @@ -240,7 +240,7 @@ sha1_process_block64: xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) - leal 0x5A827999(%rbp,%r15),%ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%r15), %ebp # e += RCONST + W[n] addl %edi, %ebp # e += (((c ^ d) & b) ^ d) movl %eax, %esi # roll $5, %esi # rotl32(a,5) diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 5f09546b2..c5f0ef504 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -74,22 +74,24 @@ test "$1" -ge 8 && echo "%r${1}d" RD1A() { local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 local n=$(($6)) -echo "# $n" -test $n = 0 && echo " +local n0=$(((n+0) & 15)) +echo " +# $n +";test $n0 = 0 && echo " # W[0], already in %esi -";test $n != 0 && test $n -lt 8 && echo " - movl `W32 $n`, %esi # W[n] -";test $n -ge 8 && echo " - # W[n], in %r$n +";test $n0 != 0 && test $n0 -lt 8 && echo " + movl `W32 $n0`, %esi # W[n] +";test $n0 -ge 8 && echo " + # W[n], in %r$n0 ";echo " movl %e$c, %edi # c xorl %e$d, %edi # ^d andl %e$b, %edi # &b xorl %e$d, %edi # (((c ^ d) & b) ^ d) -";test $n -lt 8 && echo " - leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] -";test $n -ge 8 && echo " - leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] +";test $n0 -lt 8 && echo " + leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n] +";test $n0 -ge 8 && echo " + leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n] ";echo " addl %edi, %e$e # e += (((c ^ d) & b) ^ d) movl %e$a, %esi # @@ -119,7 +121,7 @@ echo " xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] roll `W32 $n0` # -"; echo " +";echo " movl %e$c, %edi # c xorl %e$d, %edi # ^d andl %e$b, %edi # &b @@ -165,7 +167,7 @@ echo " xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] roll `W32 $n0` # -"; echo " +";echo " movl %e$c, %edi # c xorl %e$d, %edi # ^d xorl %e$b, %edi # ^b @@ -216,7 +218,7 @@ echo " xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] roll `W32 $n0` # -"; echo " +";echo " addl %edi, %e$e # += ((b | c) & d) | (b & c) ";test $n0 -lt 8 && echo " leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] @@ -246,6 +248,11 @@ RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx b RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 +# Note: new W[n&15] values generated in last 3 iterations +# (W[13,14,15]) are unused after each of these iterations. +# Since we use r8..r15 for W[8..15], this does not matter. +# If we switch to e.g. using r8..r15 for W[0..7], then saving of W[13,14,15] +# (the "movl %esi, `W32 $n0`" insn) is a dead store and can be removed. } | grep -v '^$' echo " From vda.linux at googlemail.com Mon Jan 3 23:51:04 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 00:51:04 +0100 Subject: [git commit] md5/shaXsum: use FEATURE_COPYBUF_KB to size the buffer instead of fixed 4k Message-ID: <20220103234841.617F382BCC@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=1fc520ed286f815cae1da1e9f8014cb18a256744 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta md5_sha1_sum_main 536 565 +29 hash_file 419 401 -18 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 29/-18) Total: 11 bytes In my test, for unrolled sha1, COPYBUF_KB=64 increases throughput from 367 MB/s to 457 MB/s. Signed-off-by: Denys Vlasenko --- coreutils/md5_sha1_sum.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/coreutils/md5_sha1_sum.c b/coreutils/md5_sha1_sum.c index 4efa23061..3b389cb6b 100644 --- a/coreutils/md5_sha1_sum.c +++ b/coreutils/md5_sha1_sum.c @@ -151,10 +151,12 @@ static unsigned char *hash_bin_to_hex(unsigned char *hash_value, return (unsigned char *)hex_value; } +#define BUFSZ (CONFIG_FEATURE_COPYBUF_KB < 4 ? 4096 : CONFIG_FEATURE_COPYBUF_KB * 1024) + #if !ENABLE_SHA3SUM -# define hash_file(f,w) hash_file(f) +# define hash_file(b,f,w) hash_file(b,f) #endif -static uint8_t *hash_file(const char *filename, unsigned sha3_width) +static uint8_t *hash_file(unsigned char *in_buf, const char *filename, unsigned sha3_width) { int src_fd, hash_len, count; union _ctx_ { @@ -227,8 +229,7 @@ static uint8_t *hash_file(const char *filename, unsigned sha3_width) } { - RESERVE_CONFIG_UBUFFER(in_buf, 4096); - while ((count = safe_read(src_fd, in_buf, 4096)) > 0) { + while ((count = safe_read(src_fd, in_buf, BUFSZ)) > 0) { update(&context, in_buf, count); } hash_value = NULL; @@ -238,7 +239,6 @@ static uint8_t *hash_file(const char *filename, unsigned sha3_width) final(&context, in_buf); hash_value = hash_bin_to_hex(in_buf, hash_len); } - RELEASE_CONFIG_BUFFER(in_buf); } if (src_fd != STDIN_FILENO) { @@ -251,6 +251,7 @@ static uint8_t *hash_file(const char *filename, unsigned sha3_width) int md5_sha1_sum_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int md5_sha1_sum_main(int argc UNUSED_PARAM, char **argv) { + unsigned char *in_buf; int return_value = EXIT_SUCCESS; unsigned flags; #if ENABLE_SHA3SUM @@ -279,6 +280,12 @@ int md5_sha1_sum_main(int argc UNUSED_PARAM, char **argv) if (!*argv) *--argv = (char*)"-"; + /* The buffer is not alloc/freed for each input file: + * for big values of COPYBUF_KB, this helps to keep its pages + * pre-faulted and possibly even fully cached on local CPU. + */ + in_buf = xmalloc(BUFSZ); + do { if (ENABLE_FEATURE_MD5_SHA1_SUM_CHECK && (flags & FLAG_CHECK)) { FILE *pre_computed_stream; @@ -310,7 +317,7 @@ int md5_sha1_sum_main(int argc UNUSED_PARAM, char **argv) *filename_ptr = '\0'; filename_ptr += 2; - hash_value = hash_file(filename_ptr, sha3_width); + hash_value = hash_file(in_buf, filename_ptr, sha3_width); if (hash_value && (strcmp((char*)hash_value, line) == 0)) { if (!(flags & FLAG_SILENT)) @@ -339,7 +346,7 @@ int md5_sha1_sum_main(int argc UNUSED_PARAM, char **argv) } fclose_if_not_stdin(pre_computed_stream); } else { - uint8_t *hash_value = hash_file(*argv, sha3_width); + uint8_t *hash_value = hash_file(in_buf, *argv, sha3_width); if (hash_value == NULL) { return_value = EXIT_FAILURE; } else { From vda.linux at googlemail.com Tue Jan 4 00:45:52 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 01:45:52 +0100 Subject: [git commit] libbb/sha1: x86_64 version: reorder prologue/epilogue insns Message-ID: <20220104004139.5BA45831C6@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=c3cfcc92422f6e525073226cdbfdcb00ab1e7dc7 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Not clear exactly why, but this increases hashing speed on Skylake from 454 MB/s to 464 MB/s. Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S | 60 ++++++++++++++++++------------------- libbb/hash_md5_sha_x86-64.S.sh | 67 +++++++++++++++++++++++------------------- 2 files changed, 67 insertions(+), 60 deletions(-) diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 95b85d80a..ff78fc049 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -6,14 +6,14 @@ .hidden sha1_process_block64 .type sha1_process_block64, @function - .balign 8 # allow decoders to fetch at least 4 first insns + .balign 8 # allow decoders to fetch at least 5 first insns sha1_process_block64: - pushq %r15 # - pushq %r14 # - pushq %r13 # - pushq %r12 # - pushq %rbp # - pushq %rbx # + pushq %rbp # 1 byte insn + pushq %rbx # 1 byte insn + pushq %r15 # 2 byte insn + pushq %r14 # 2 byte insn + pushq %r13 # 2 byte insn + pushq %r12 # 2 byte insn pushq %rdi # we need ctx at the end #Register and stack use: @@ -22,24 +22,6 @@ sha1_process_block64: # esi,edi: temps # -32+4*n(%rsp),r8...r15: W[0..7,8..15] # (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?) - - movq 4*8(%rdi), %r8 - bswapq %r8 - movl %r8d, %r9d - shrq $32, %r8 - movq 4*10(%rdi), %r10 - bswapq %r10 - movl %r10d, %r11d - shrq $32, %r10 - movq 4*12(%rdi), %r12 - bswapq %r12 - movl %r12d, %r13d - shrq $32, %r12 - movq 4*14(%rdi), %r14 - bswapq %r14 - movl %r14d, %r15d - shrq $32, %r14 - movl $3, %eax 1: movq (%rdi,%rax,8), %rsi @@ -48,12 +30,30 @@ sha1_process_block64: movq %rsi, -32(%rsp,%rax,8) decl %eax jns 1b + movl 80(%rdi), %eax # a = ctx->hash[0] movl 84(%rdi), %ebx # b = ctx->hash[1] movl 88(%rdi), %ecx # c = ctx->hash[2] movl 92(%rdi), %edx # d = ctx->hash[3] movl 96(%rdi), %ebp # e = ctx->hash[4] + movq 4*8(%rdi), %r8 + movq 4*10(%rdi), %r10 + bswapq %r8 + bswapq %r10 + movq 4*12(%rdi), %r12 + movq 4*14(%rdi), %r14 + bswapq %r12 + bswapq %r14 + movl %r8d, %r9d + shrq $32, %r8 + movl %r10d, %r11d + shrq $32, %r10 + movl %r12d, %r13d + shrq $32, %r12 + movl %r14d, %r15d + shrq $32, %r14 + # 0 # W[0], already in %esi movl %ecx, %edi # c @@ -1272,17 +1272,17 @@ sha1_process_block64: rorl $2, %ecx # b = rotl32(b,30) popq %rdi # + popq %r12 # addl %eax, 80(%rdi) # ctx->hash[0] += a + popq %r13 # addl %ebx, 84(%rdi) # ctx->hash[1] += b + popq %r14 # addl %ecx, 88(%rdi) # ctx->hash[2] += c + popq %r15 # addl %edx, 92(%rdi) # ctx->hash[3] += d - addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbx # + addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbp # - popq %r12 # - popq %r13 # - popq %r14 # - popq %r15 # ret .size sha1_process_block64, .-sha1_process_block64 diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index c5f0ef504..7e50b64fb 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -15,14 +15,14 @@ echo \ .hidden sha1_process_block64 .type sha1_process_block64, @function - .balign 8 # allow decoders to fetch at least 4 first insns + .balign 8 # allow decoders to fetch at least 5 first insns sha1_process_block64: - pushq %r15 # - pushq %r14 # - pushq %r13 # - pushq %r12 # - pushq %rbp # - pushq %rbx # + pushq %rbp # 1 byte insn + pushq %rbx # 1 byte insn + pushq %r15 # 2 byte insn + pushq %r14 # 2 byte insn + pushq %r13 # 2 byte insn + pushq %r12 # 2 byte insn pushq %rdi # we need ctx at the end #Register and stack use: @@ -31,24 +31,6 @@ sha1_process_block64: # esi,edi: temps # -32+4*n(%rsp),r8...r15: W[0..7,8..15] # (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?) - - movq 4*8(%rdi), %r8 - bswapq %r8 - movl %r8d, %r9d - shrq $32, %r8 - movq 4*10(%rdi), %r10 - bswapq %r10 - movl %r10d, %r11d - shrq $32, %r10 - movq 4*12(%rdi), %r12 - bswapq %r12 - movl %r12d, %r13d - shrq $32, %r12 - movq 4*14(%rdi), %r14 - bswapq %r14 - movl %r14d, %r15d - shrq $32, %r14 - movl $3, %eax 1: movq (%rdi,%rax,8), %rsi @@ -57,11 +39,29 @@ sha1_process_block64: movq %rsi, -32(%rsp,%rax,8) decl %eax jns 1b + movl 80(%rdi), %eax # a = ctx->hash[0] movl 84(%rdi), %ebx # b = ctx->hash[1] movl 88(%rdi), %ecx # c = ctx->hash[2] movl 92(%rdi), %edx # d = ctx->hash[3] movl 96(%rdi), %ebp # e = ctx->hash[4] + + movq 4*8(%rdi), %r8 + movq 4*10(%rdi), %r10 + bswapq %r8 + bswapq %r10 + movq 4*12(%rdi), %r12 + movq 4*14(%rdi), %r14 + bswapq %r12 + bswapq %r14 + movl %r8d, %r9d + shrq $32, %r8 + movl %r10d, %r11d + shrq $32, %r10 + movl %r12d, %r13d + shrq $32, %r12 + movl %r14d, %r15d + shrq $32, %r14 ' W32() { test "$1" || exit 1 @@ -71,6 +71,13 @@ test "$1" -lt 8 && echo "-32+4*$1(%rsp)" test "$1" -ge 8 && echo "%r${1}d" } +# It's possible to interleave insns in rounds to mostly eliminate +# dependency chains, but this likely to only help old Pentium-based +# CPUs (ones without OOO, which can only simultaneously execute a pair +# of _adjacent_ insns). +# Testing on old-ish Silvermont CPU (which has OOO window of only +# about ~8 insns) shows very small (~1%) speedup. + RD1A() { local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 local n=$(($6)) @@ -257,17 +264,17 @@ RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx b echo " popq %rdi # + popq %r12 # addl %eax, 80(%rdi) # ctx->hash[0] += a + popq %r13 # addl %ebx, 84(%rdi) # ctx->hash[1] += b + popq %r14 # addl %ecx, 88(%rdi) # ctx->hash[2] += c + popq %r15 # addl %edx, 92(%rdi) # ctx->hash[3] += d - addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbx # + addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbp # - popq %r12 # - popq %r13 # - popq %r14 # - popq %r15 # ret .size sha1_process_block64, .-sha1_process_block64 From vda.linux at googlemail.com Tue Jan 4 13:32:41 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 14:32:41 +0100 Subject: [git commit] build system: detect if build host has no bzip2 Message-ID: <20220104132703.A6D0A82BCC@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=ed2af2e82dbcfccb7392e9fbc3f837de1594c103 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- scripts/bb_release | 6 +++--- scripts/embedded_scripts | 6 ++++++ scripts/mkconfigs | 11 +++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/scripts/bb_release b/scripts/bb_release index 545440d3a..180ad8f2e 100755 --- a/scripts/bb_release +++ b/scripts/bb_release @@ -17,7 +17,7 @@ VERSION=`ls busybox-*.tar.gz | sed 's/busybox-\(.*\)\.tar\.gz/\1/'` zcat busybox-$VERSION.tar.gz | bzip2 > busybox-$VERSION.tar.bz2 for releasefile in busybox-$VERSION.tar.gz busybox-$VERSION.tar.bz2; do - test -f $releasefile || { echo "no $releasefile"; exit 1; } - gpg --detach-sign $releasefile - sha256sum $releasefile > $releasefile.sha256 + test -f $releasefile || { echo "no $releasefile"; exit 1; } + gpg --detach-sign $releasefile + sha256sum $releasefile > $releasefile.sha256 done diff --git a/scripts/embedded_scripts b/scripts/embedded_scripts index aa7bf3e8a..205ac591a 100755 --- a/scripts/embedded_scripts +++ b/scripts/embedded_scripts @@ -23,6 +23,12 @@ if test $? != 0; then exit 1 fi +bzip2 /dev/null +if test $? != 0; then + echo 'bzip2 is not installed' + exit 1 +fi + custom_scripts="" if [ -d "$custom_loc" ] then diff --git a/scripts/mkconfigs b/scripts/mkconfigs index 6a26fe1dd..1bbf10c3a 100755 --- a/scripts/mkconfigs +++ b/scripts/mkconfigs @@ -28,6 +28,17 @@ config=.config +od -v -b /dev/null +if test $? != 0; then + echo 'od tool is not installed or cannot accept "-v -b" options' + exit 1 +fi +bzip2 /dev/null +if test $? != 0; then + echo 'bzip2 is not installed' + exit 1 +fi + { echo "\ #ifndef _BBCONFIGOPTS_H From vda.linux at googlemail.com Tue Jan 4 18:42:36 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 19:42:36 +0100 Subject: [git commit] sed: correctly handle 'w FILE' commands writing to the same file Message-ID: <20220104183721.9BDE383245@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=286b33721d5f6afd615f752ea83bbd72658c6bb9 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sed_xfopen_w - 84 +84 Signed-off-by: Denys Vlasenko --- editors/sed.c | 31 +++++++++++++++++++++++++++++-- testsuite/sed.tests | 9 +++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index e8c82ac63..48b0dbf67 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -97,6 +97,12 @@ enum { OPT_in_place = 1 << 0, }; +struct sed_FILE { + struct sed_FILE *next; /* Next (linked list, NULL terminated) */ + const char *fname; + FILE *fp; +}; + /* Each sed command turns into one of these structures. */ typedef struct sed_cmd_s { /* Ordered by alignment requirements: currently 36 bytes on x86 */ @@ -151,6 +157,11 @@ struct globals { /* linked list of append lines */ llist_t *append_head; + /* linked list of FILEs opened for 'w' and s///w'. + * Needed to handle duplicate fnames: sed '/a/w F;/b/w F' + */ + struct sed_FILE *FILE_head; + char *add_cmd_line; struct pipeline { @@ -211,6 +222,22 @@ static void sed_free_and_close_stuff(void) void sed_free_and_close_stuff(void); #endif +static FILE *sed_xfopen_w(const char *fname) +{ + struct sed_FILE **pp = &G.FILE_head; + struct sed_FILE *cur; + while ((cur = *pp) != NULL) { + if (strcmp(cur->fname, fname) == 0) + return cur->fp; + pp = &cur->next; + } + *pp = cur = xzalloc(sizeof(*cur)); + /*cur->next = NULL; - already is */ + cur->fname = xstrdup(fname); + cur->fp = xfopen_for_write(fname); + return cur->fp; +} + /* If something bad happens during -i operation, delete temp file */ static void cleanup_outname(void) @@ -446,7 +473,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) { char *fname; idx += parse_file_cmd(/*sed_cmd,*/ substr+idx+1, &fname); - sed_cmd->sw_file = xfopen_for_write(fname); + sed_cmd->sw_file = sed_xfopen_w(fname); sed_cmd->sw_last_char = '\n'; free(fname); break; @@ -561,7 +588,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) } cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); if (sed_cmd->cmd == 'w') { - sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); + sed_cmd->sw_file = sed_xfopen_w(sed_cmd->string); sed_cmd->sw_last_char = '\n'; } } diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 2b78c9b12..e62b839f7 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -405,6 +405,15 @@ testing "sed ^ OR not^" \ "" \ "abca\n" +# This only works if file name is exactly the same. +# For example, w FILE; w ./FILE won't work. +testing "sed understands duplicate file name" \ + "sed -n -e '/a/w sed.output' -e '/c/w sed.output' 2>&1 && cat sed.output && rm sed.output" \ + "a\nc\n" \ + "" \ + "a\nb\nc\n" + + # testing "description" "commands" "result" "infile" "stdin" exit $FAILCOUNT From vda.linux at googlemail.com Tue Jan 4 22:31:58 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 23:31:58 +0100 Subject: [git commit] libbb: factor out fflush_stdout_and_exit(EXIT_SUCCESS) Message-ID: <20220104222628.3BC54822DF@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=31f45c1b369bee73843f7d791313423997618448 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta fflush_stdout_and_exit_SUCCESS - 7 +7 xxd_main 890 888 -2 vlock_main 353 351 -2 uuencode_main 318 316 -2 uniq_main 427 425 -2 uname_main 250 248 -2 sort_main 853 851 -2 shuf_main 500 498 -2 route_main 238 236 -2 readlink_main 113 111 -2 nice_main 156 154 -2 last_main 957 955 -2 ipcs_main 960 958 -2 env_main 209 207 -2 chrt_main 464 462 -2 cal_main 921 919 -2 baseNUM_main 650 648 -2 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/16 up/down: 7/-32) Total: -25 bytes Signed-off-by: Denys Vlasenko --- coreutils/env.c | 2 +- coreutils/nice.c | 2 +- coreutils/readlink.c | 2 +- coreutils/shuf.c | 2 +- coreutils/sort.c | 2 +- coreutils/uname.c | 2 +- coreutils/uniq.c | 2 +- coreutils/uudecode.c | 2 +- coreutils/uuencode.c | 2 +- include/libbb.h | 1 + libbb/fflush_stdout_and_exit.c | 5 +++++ loginutils/vlock.c | 2 +- networking/route.c | 2 +- sysklogd/logread.c | 2 +- util-linux/cal.c | 2 +- util-linux/chrt.c | 2 +- util-linux/hexdump_xxd.c | 2 +- util-linux/ipcs.c | 8 ++++---- util-linux/last.c | 2 +- util-linux/last_fancy.c | 2 +- 20 files changed, 27 insertions(+), 21 deletions(-) diff --git a/coreutils/env.c b/coreutils/env.c index a0ea4dd27..6eafd06ef 100644 --- a/coreutils/env.c +++ b/coreutils/env.c @@ -100,7 +100,7 @@ int env_main(int argc UNUSED_PARAM, char **argv) } } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } /* diff --git a/coreutils/nice.c b/coreutils/nice.c index 28591ac61..e70da5d2b 100644 --- a/coreutils/nice.c +++ b/coreutils/nice.c @@ -33,7 +33,7 @@ int nice_main(int argc UNUSED_PARAM, char **argv) if (!*++argv) { /* No args, so (GNU) output current nice value. */ printf("%d\n", old_priority); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } adjustment = 10; /* Set default adjustment. */ diff --git a/coreutils/readlink.c b/coreutils/readlink.c index 09d69df2b..b2e867883 100644 --- a/coreutils/readlink.c +++ b/coreutils/readlink.c @@ -96,5 +96,5 @@ int readlink_main(int argc UNUSED_PARAM, char **argv) printf((opt & 2) ? "%s" : "%s\n", buf); free(buf); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/coreutils/shuf.c b/coreutils/shuf.c index 3def3d80f..337366b45 100644 --- a/coreutils/shuf.c +++ b/coreutils/shuf.c @@ -171,5 +171,5 @@ int shuf_main(int argc, char **argv) printf("%s%c", lines[i], eol); } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/coreutils/sort.c b/coreutils/sort.c index 32a06e40a..0cbb6f597 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -644,5 +644,5 @@ int sort_main(int argc UNUSED_PARAM, char **argv) printf("%s%c", lines[i], ch); } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/coreutils/uname.c b/coreutils/uname.c index da785ab4c..6c0bdf096 100644 --- a/coreutils/uname.c +++ b/coreutils/uname.c @@ -209,5 +209,5 @@ int uname_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) #endif } - fflush_stdout_and_exit(EXIT_SUCCESS); /* coreutils-6.9 compat */ + fflush_stdout_and_exit_SUCCESS(); /* coreutils-6.9 compat */ } diff --git a/coreutils/uniq.c b/coreutils/uniq.c index a3058ac07..06c57f750 100644 --- a/coreutils/uniq.c +++ b/coreutils/uniq.c @@ -139,5 +139,5 @@ int uniq_main(int argc UNUSED_PARAM, char **argv) die_if_ferror(stdin, input_filename); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/coreutils/uudecode.c b/coreutils/uudecode.c index e90902f52..63a8d4d48 100644 --- a/coreutils/uudecode.c +++ b/coreutils/uudecode.c @@ -352,7 +352,7 @@ int baseNUM_main(int argc UNUSED_PARAM, char **argv) #undef src_buf } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } #endif diff --git a/coreutils/uuencode.c b/coreutils/uuencode.c index db49ec80a..f096e3122 100644 --- a/coreutils/uuencode.c +++ b/coreutils/uuencode.c @@ -78,5 +78,5 @@ int uuencode_main(int argc UNUSED_PARAM, char **argv) } printf(tbl == bb_uuenc_tbl_std ? "\n`\nend\n" : "\n====\n"); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/include/libbb.h b/include/libbb.h index a48782832..8308d6259 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1054,6 +1054,7 @@ void die_if_ferror(FILE *file, const char *msg) FAST_FUNC; void die_if_ferror_stdout(void) FAST_FUNC; int fflush_all(void) FAST_FUNC; void fflush_stdout_and_exit(int retval) NORETURN FAST_FUNC; +void fflush_stdout_and_exit_SUCCESS(void) NORETURN FAST_FUNC; int fclose_if_not_stdin(FILE *file) FAST_FUNC; FILE* xfopen(const char *filename, const char *mode) FAST_FUNC; /* Prints warning to stderr and returns NULL on failure: */ diff --git a/libbb/fflush_stdout_and_exit.c b/libbb/fflush_stdout_and_exit.c index 5df74170e..5a13ebcf8 100644 --- a/libbb/fflush_stdout_and_exit.c +++ b/libbb/fflush_stdout_and_exit.c @@ -20,3 +20,8 @@ void FAST_FUNC fflush_stdout_and_exit(int retval) * but use xfunc_die() */ xfunc_die(); } + +void FAST_FUNC fflush_stdout_and_exit_SUCCESS(void) +{ + fflush_stdout_and_exit(EXIT_SUCCESS); +} diff --git a/loginutils/vlock.c b/loginutils/vlock.c index 334b7d2ad..720835c4b 100644 --- a/loginutils/vlock.c +++ b/loginutils/vlock.c @@ -128,5 +128,5 @@ int vlock_main(int argc UNUSED_PARAM, char **argv) ioctl(STDIN_FILENO, VT_SETMODE, &ovtm); #endif tcsetattr_stdin_TCSANOW(&oterm); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/networking/route.c b/networking/route.c index ff5daa8a7..26146f8e9 100644 --- a/networking/route.c +++ b/networking/route.c @@ -702,7 +702,7 @@ int route_main(int argc UNUSED_PARAM, char **argv) #endif bb_displayroutes(noresolve, opt & ROUTE_OPT_e); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } /* Check verb. At the moment, must be add, del, or delete. */ diff --git a/sysklogd/logread.c b/sysklogd/logread.c index d5f8ca0a2..e6cfcf4a7 100644 --- a/sysklogd/logread.c +++ b/sysklogd/logread.c @@ -226,5 +226,5 @@ int logread_main(int argc UNUSED_PARAM, char **argv) /* shmdt(shbuf); - on Linux, shmdt is not mandatory on exit */ - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/util-linux/cal.c b/util-linux/cal.c index 6ba6ebf98..522ab3476 100644 --- a/util-linux/cal.c +++ b/util-linux/cal.c @@ -233,7 +233,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv) } } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } /* diff --git a/util-linux/chrt.c b/util-linux/chrt.c index 6799abb2d..be96fa426 100644 --- a/util-linux/chrt.c +++ b/util-linux/chrt.c @@ -110,7 +110,7 @@ int chrt_main(int argc UNUSED_PARAM, char **argv) show_min_max(SCHED_RR); show_min_max(SCHED_BATCH); show_min_max(SCHED_IDLE); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } //if (opt & OPT_r) // policy = SCHED_RR; - default, already set diff --git a/util-linux/hexdump_xxd.c b/util-linux/hexdump_xxd.c index 76dada983..4372ac770 100644 --- a/util-linux/hexdump_xxd.c +++ b/util-linux/hexdump_xxd.c @@ -150,7 +150,7 @@ static void reverse(unsigned opt, const char *filename) free(buf); } //fclose(fp); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } static void print_C_style(const char *p, const char *hdr) diff --git a/util-linux/ipcs.c b/util-linux/ipcs.c index ef2529c05..5973cbf57 100644 --- a/util-linux/ipcs.c +++ b/util-linux/ipcs.c @@ -600,15 +600,15 @@ int ipcs_main(int argc UNUSED_PARAM, char **argv) id = xatoi(opt_i); if (opt & flag_shm) { print_shm(id); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } if (opt & flag_sem) { print_sem(id); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } if (opt & flag_msg) { print_msg(id); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } bb_show_usage(); } @@ -633,5 +633,5 @@ int ipcs_main(int argc UNUSED_PARAM, char **argv) do_sem(format); bb_putchar('\n'); } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/util-linux/last.c b/util-linux/last.c index 24ce7a8d8..63751ca45 100644 --- a/util-linux/last.c +++ b/util-linux/last.c @@ -162,5 +162,5 @@ int last_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) xlseek(file, pos, SEEK_SET); } - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } diff --git a/util-linux/last_fancy.c b/util-linux/last_fancy.c index e56e0ba85..648236229 100644 --- a/util-linux/last_fancy.c +++ b/util-linux/last_fancy.c @@ -296,5 +296,5 @@ int last_main(int argc UNUSED_PARAM, char **argv) if (ENABLE_FEATURE_CLEAN_UP) close(file); - fflush_stdout_and_exit(EXIT_SUCCESS); + fflush_stdout_and_exit_SUCCESS(); } From vda.linux at googlemail.com Tue Jan 4 22:36:16 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 23:36:16 +0100 Subject: [git commit] libbb: fflush_stdout_and_exit(0) still exits with _error_ (not 0!) if fflush fails Message-ID: <20220104223134.2FCC5832B4@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=dfd8aafcf59c88662516a534a4334b3f08f58c88 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta fflush_stdout_and_exit 36 40 +4 Signed-off-by: Denys Vlasenko --- libbb/fflush_stdout_and_exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libbb/fflush_stdout_and_exit.c b/libbb/fflush_stdout_and_exit.c index 5a13ebcf8..33e28ae34 100644 --- a/libbb/fflush_stdout_and_exit.c +++ b/libbb/fflush_stdout_and_exit.c @@ -13,9 +13,9 @@ */ void FAST_FUNC fflush_stdout_and_exit(int retval) { - xfunc_error_retval = retval; if (fflush(stdout)) bb_simple_perror_msg_and_die(bb_msg_standard_output); + xfunc_error_retval = retval; /* In case we are in NOFORK applet. Do not exit() directly, * but use xfunc_die() */ xfunc_die(); From vda.linux at googlemail.com Tue Jan 4 22:53:21 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 4 Jan 2022 23:53:21 +0100 Subject: [git commit] sort: fix -s -r interaction: 'stable' order is not affected by -r Message-ID: <20220104224804.ECE20832B4@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=cc7d2e21780c28608b00a4faf0fed297527bcbf4 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta compare_keys 818 820 +2 Signed-off-by: Denys Vlasenko --- coreutils/sort.c | 4 +++- testsuite/sort.tests | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/coreutils/sort.c b/coreutils/sort.c index 0cbb6f597..9ff777851 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -380,7 +380,9 @@ static int compare_keys(const void *xarg, const void *yarg) /* If x > y, 1, else -1 */ retval = (x32 > y32) * 2 - 1; - } else + /* Here, -r has no effect! */ + return retval; + } if (!(option_mask32 & FLAG_no_tie_break)) { /* fallback sort */ flags = option_mask32; diff --git a/testsuite/sort.tests b/testsuite/sort.tests index c51a8e475..5375f93de 100755 --- a/testsuite/sort.tests +++ b/testsuite/sort.tests @@ -175,6 +175,19 @@ testing "sort file in place" \ 111 " "" +testing "sort -sr (stable and reverse) does NOT reverse 'stable' ordering" \ +"sort -k2 -r -s input" "\ +b 2 +d 2 +a 1 +c 1 +" "\ +a 1 +b 2 +c 1 +d 2 +" "" + # testing "description" "command(s)" "result" "infile" "stdin" exit $FAILCOUNT From bugzilla at busybox.net Tue Jan 4 22:54:28 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 04 Jan 2022 22:54:28 +0000 Subject: [Bug 14496] 'sort' is not consistent with its coreutils/toybox counterpart In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14496 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Denys Vlasenko --- Thank you for the report. Fixed in git. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 4 23:07:56 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 04 Jan 2022 23:07:56 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #1 from Denys Vlasenko --- Can you specify the value of PS1 which does not work for you? -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 00:13:58 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 00:13:58 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #2 from rkitover at gmail.com --- Yes, it's in my project that I linked here: https://raw.githubusercontent.com/rkitover/sh-prompt-simple/master/prompt.sh Just do: . ./prompt.sh I don't know what specifically in my prompt causes the bug, and I can't paste it here because it has ANSI color codes. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Wed Jan 5 11:05:55 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 5 Jan 2022 12:05:55 +0100 Subject: [git commit] sort: support -h Message-ID: <20220105110006.3F32C82DFE@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=34e0bb3931b595e7a48061255692ec4ff29499c5 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta static.scale_suffix - 62 +62 .rodata 104304 104336 +32 compare_keys 820 848 +28 packed_usage 34159 34184 +25 static.suffix - 9 +9 sort_opt_str 37 38 +1 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 4/0 up/down: 157/0) Total: 157 bytes Signed-off-by: Denys Vlasenko --- coreutils/sort.c | 95 +++++++++++++++++++++++++++++++++++----------------- testsuite/sort.tests | 29 ++++++++++++++++ 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/coreutils/sort.c b/coreutils/sort.c index 9ff777851..9aac656fe 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -18,7 +18,7 @@ //config: sort is used to sort lines of text in specified files. //config: //config:config FEATURE_SORT_BIG -//config: bool "Full SuSv3 compliant sort (support -ktcbdfiogM)" +//config: bool "Full SuSv3 compliant sort (support -ktcbdfioghM)" //config: default y //config: depends on SORT //config: help @@ -43,7 +43,7 @@ //usage:#define sort_trivial_usage //usage: "[-nru" -//usage: IF_FEATURE_SORT_BIG("gMcszbdfiokt] [-o FILE] [-k START[.OFS][OPTS][,END[.OFS][OPTS]] [-t CHAR") +//usage: IF_FEATURE_SORT_BIG("ghMcszbdfiokt] [-o FILE] [-k START[.OFS][OPTS][,END[.OFS][OPTS]] [-t CHAR") //usage: "] [FILE]..." //usage:#define sort_full_usage "\n\n" //usage: "Sort lines of text\n" @@ -59,6 +59,7 @@ //usage: "\n -n Sort numbers" //usage: IF_FEATURE_SORT_BIG( //usage: "\n -g General numerical sort" +//usage: "\n -h Sort human readable numbers (2K 1G)" //usage: "\n -M Sort month" //usage: "\n -V Sort version" //usage: "\n -t CHAR Field separator" @@ -94,31 +95,32 @@ enum { FLAG_n = 1 << 0, /* Numeric sort */ FLAG_g = 1 << 1, /* Sort using strtod() */ - FLAG_M = 1 << 2, /* Sort date */ - FLAG_V = 1 << 3, /* Sort version */ + FLAG_h = 1 << 2, /* Sort using strtod(), plus KMGT suffixes */ + FLAG_M = 1 << 3, /* Sort date */ + FLAG_V = 1 << 4, /* Sort version */ /* ucsz apply to root level only, not keys. b at root level implies bb */ - FLAG_u = 1 << 4, /* Unique */ - FLAG_c = 1 << 5, /* Check: no output, exit(!ordered) */ - FLAG_s = 1 << 6, /* Stable sort, no ascii fallback at end */ - FLAG_z = 1 << 7, /* Input and output is NUL terminated, not \n */ + FLAG_u = 1 << 5, /* Unique */ + FLAG_c = 1 << 6, /* Check: no output, exit(!ordered) */ + FLAG_s = 1 << 7, /* Stable sort, no ascii fallback at end */ + FLAG_z = 1 << 8, /* Input and output is NUL terminated, not \n */ /* These can be applied to search keys, the previous four can't */ - FLAG_b = 1 << 8, /* Ignore leading blanks */ - FLAG_r = 1 << 9, /* Reverse */ - FLAG_d = 1 << 10, /* Ignore !(isalnum()|isspace()) */ - FLAG_f = 1 << 11, /* Force uppercase */ - FLAG_i = 1 << 12, /* Ignore !isprint() */ - FLAG_m = 1 << 13, /* ignored: merge already sorted files; do not sort */ - FLAG_S = 1 << 14, /* ignored: -S, --buffer-size=SIZE */ - FLAG_T = 1 << 15, /* ignored: -T, --temporary-directory=DIR */ - FLAG_o = 1 << 16, - FLAG_k = 1 << 17, - FLAG_t = 1 << 18, + FLAG_b = 1 << 9, /* Ignore leading blanks */ + FLAG_r = 1 << 10, /* Reverse */ + FLAG_d = 1 << 11, /* Ignore !(isalnum()|isspace()) */ + FLAG_f = 1 << 12, /* Force uppercase */ + FLAG_i = 1 << 13, /* Ignore !isprint() */ + FLAG_m = 1 << 14, /* ignored: merge already sorted files; do not sort */ + FLAG_S = 1 << 15, /* ignored: -S, --buffer-size=SIZE */ + FLAG_T = 1 << 16, /* ignored: -T, --temporary-directory=DIR */ + FLAG_o = 1 << 17, + FLAG_k = 1 << 18, + FLAG_t = 1 << 19, FLAG_bb = 0x80000000, /* Ignore trailing blanks */ FLAG_no_tie_break = 0x40000000, }; static const char sort_opt_str[] ALIGN1 = "^" - "ngMVucszbrdfimS:T:o:k:*t:" + "nghMVucszbrdfimS:T:o:k:*t:" "\0" "o--o:t--t"/*-t, -o: at most one of each*/; /* * OPT_STR must not be string literal, needs to have stable address: @@ -253,6 +255,25 @@ static struct sort_key *add_key(void) #define GET_LINE(fp) xmalloc_fgetline(fp) #endif +#if ENABLE_FEATURE_SORT_BIG +static int scale_suffix(const char *tail) +{ + static const char suffix[] ALIGN1 = "kmgtpezy"; + const char *s; + int n; + + if (!tail[0]) + return -1; + s = strchr(suffix, tail[0] | 0x20); + if (!s) + return -1; + n = s - suffix; + if (n != 0 && tail[0] >= 'a') + return -1; /* mg... not accepted, only MG... */ + return n; +} +#endif + /* Iterate through keys list and perform comparisons */ static int compare_keys(const void *xarg, const void *yarg) { @@ -275,7 +296,7 @@ static int compare_keys(const void *xarg, const void *yarg) y = *(char **)yarg; #endif /* Perform actual comparison */ - switch (flags & (FLAG_n | FLAG_g | FLAG_M | FLAG_V)) { + switch (flags & (FLAG_n | FLAG_g | FLAG_h | FLAG_M | FLAG_V)) { default: bb_simple_error_msg_and_die("unknown sort type"); break; @@ -293,7 +314,8 @@ static int compare_keys(const void *xarg, const void *yarg) #endif break; #if ENABLE_FEATURE_SORT_BIG - case FLAG_g: { + case FLAG_g: + case FLAG_h: { char *xx, *yy; //TODO: needs setlocale(LC_NUMERIC, "C")? double dx = strtod(x, &xx); @@ -308,16 +330,26 @@ static int compare_keys(const void *xarg, const void *yarg) retval = (dy != dy) ? 0 : -1; else if (dy != dy) retval = 1; - /* Check for infinity. Could underflow, but it avoids libm. */ - else if (1.0 / dx == 0.0) { - if (dx < 0) - retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; + else { + if (flags & FLAG_h) { + int xs = scale_suffix(xx); + int ys = scale_suffix(yy); + if (xs != ys) { + retval = xs - ys; + break; + } + } + /* Check for infinity. Could underflow, but it avoids libm. */ + if (1.0 / dx == 0.0) { + if (dx < 0) + retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; + else + retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; + } else if (1.0 / dy == 0.0) + retval = (dy < 0) ? 1 : -1; else - retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; - } else if (1.0 / dy == 0.0) - retval = (dy < 0) ? 1 : -1; - else - retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + } break; } case FLAG_M: { @@ -476,6 +508,7 @@ int sort_main(int argc UNUSED_PARAM, char **argv) FLAG_allowed_for_k = FLAG_n | /* Numeric sort */ FLAG_g | /* Sort using strtod() */ + FLAG_h | /* Sort using strtod(), plus KMGT suffixes */ FLAG_M | /* Sort date */ FLAG_b | /* Ignore leading blanks */ FLAG_r | /* Reverse */ diff --git a/testsuite/sort.tests b/testsuite/sort.tests index 5375f93de..ff33e21b4 100755 --- a/testsuite/sort.tests +++ b/testsuite/sort.tests @@ -188,6 +188,35 @@ c 1 d 2 " "" +testing "sort -h" \ +"sort -h input" "\ +3e +4m +5y +1023 +1024 +1025 +3000 +2K +3k +1M +2E +1Y +" "\ +1Y +5y +1M +2E +3k +3e +2K +4m +1023 +1025 +3000 +1024 +" "" + # testing "description" "command(s)" "result" "infile" "stdin" exit $FAILCOUNT From bugzilla at busybox.net Wed Jan 5 11:06:41 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 11:06:41 +0000 Subject: [Bug 14491] Support sort -h In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14491 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Denys Vlasenko --- Fixed in git -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 11:11:04 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 11:11:04 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #3 from Denys Vlasenko --- "Please run my obscure 210-line shell script on your system. I promise, nothing untoward will happen". Er. How about "no"? Add echo "$PS1" | hexdump -vC in a relevant part of your script, run it, and post the value of PS1. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 11:46:40 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 11:46:40 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #4 from rkitover at gmail.com --- The prompt has embedded function calls to functions in that script. Here you go: 00000000 60 5f 53 50 53 5f 63 6d 64 5f 73 74 61 74 75 73 |`_SPS_cmd_status| 00000010 60 20 60 5f 53 50 53 5f 65 6e 76 60 20 60 5f 53 |` `_SPS_env` `_S| 00000020 50 53 5f 63 77 64 60 20 60 5f 53 50 53 5f 67 69 |PS_cwd` `_SPS_gi| 00000030 74 5f 62 61 72 60 0a 1b 5b 33 38 3b 32 3b 31 34 |t_bar`..[38;2;14| 00000040 30 3b 32 30 36 3b 32 35 30 6d 72 6b 69 74 6f 76 |0;206;250mrkitov| 00000050 65 72 1b 5b 31 3b 39 37 6d 40 1b 5b 30 6d 1b 5b |er.[1;97m at .[0m.[| 00000060 33 38 3b 32 3b 31 34 30 3b 32 30 36 3b 32 35 30 |38;2;140;206;250| 00000070 6d 63 6f 6d 70 6c 79 20 1b 5b 33 38 3b 32 3b 32 |mcomply .[38;2;2| 00000080 32 30 3b 32 30 3b 36 30 6d 3e 1b 5b 30 6d 20 0a |20;20;60m>.[0m .| 00000090 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 12:03:39 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 12:03:39 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #5 from rkitover at gmail.com --- Here is the prompt with all of the functions expanded once, it still exhibits the same behavior: 00000000 1b 5b 30 3b 33 32 6d 76 20 1b 5b 30 3b 39 35 6d |.[0;32mv .[0;95m| 00000010 53 55 53 45 20 1b 5b 33 33 6d 7e 20 0a 1b 5b 33 |SUSE .[33m~ ..[3| 00000020 38 3b 32 3b 31 34 30 3b 32 30 36 3b 32 35 30 6d |8;2;140;206;250m| 00000030 72 6b 69 74 6f 76 65 72 1b 5b 31 3b 39 37 6d 40 |rkitover.[1;97m@| 00000040 1b 5b 30 6d 1b 5b 33 38 3b 32 3b 31 34 30 3b 32 |.[0m.[38;2;140;2| 00000050 30 36 3b 32 35 30 6d 63 6f 6d 70 6c 79 20 1b 5b |06;250mcomply .[| 00000060 33 38 3b 32 3b 32 32 30 3b 32 30 3b 36 30 6d 3e |38;2;220;20;60m>| 00000070 1b 5b 30 6d 20 0a |.[0m .|00000076 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 15:40:15 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 15:40:15 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #6 from Denys Vlasenko --- Tried to reproduce with this: e=`printf '\033'` n=$'\n' PS1="$e[0;32m""v ""$e[0;95m""SUSE ""$e[33m""~ ""$n$e[38;2;140;206;250m""rkitover""$e[1;97m""@""$e[0m$e[38;2;140;206;250m""comply ""$e[38;2;220;20;60m"">""$e[0m " but it works for me: completions do not "eat" previous screen row. What is your version of busybox? Please attach your .config. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 16:10:00 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 16:10:00 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #7 from rkitover at gmail.com --- Same result for me with your code. BusyBox v1.34.1 () multi-call binary. This is from OpenSUSE Tumbleweed, the package is: busybox-1.34.1-2.1.x86_64 This is the config they use: https://build.opensuse.org/package/view_file/Base:System/busybox/busybox.config?expand=1 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 17:04:23 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 17:04:23 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #8 from Denys Vlasenko --- lineedit.c: * lineedit does not know that the terminal escape sequences do not * take up space on the screen. The redisplay code assumes, unless * told otherwise, that each character in the prompt is a printable * character that takes up one character position on the screen. * You need to tell lineedit that some sequences of characters * in the prompt take up no screen space. Compatibly with readline, * use the \[ escape to begin a sequence of non-printing characters, * and the \] escape to signal the end of such a sequence. Example: * * PS1='\[\033[01;32m\]\u@\h\[\033[01;34m\] \w \$\[\033[00m\] ' Your PS1 has no such annotations and this what makes line editing code to incorrectly conclude that line has wrapped when it did not. The very same PS1 also misbehaves in bash. (This depends on terminal width, this is why it did not happen to me in a wider terminal). This PS1 would work: PS1="\[$e[0;32m\]""v ""\[$e[0;95m\]""SUSE ""\[$e[33m\]""~ ""\n\[$e[38;2;140;206;250m\]""rkitover""\[$e[1;97m\]""@""\[$e[0m$e[38;2;140;206;250m\]""comply ""\[$e[38;2;220;20;60m\]"">""\[$e[0m\] " ...but their .config has this: # CONFIG_FEATURE_EDITING_FANCY_PROMPT is not set which disables the \[ \] feature (and all other useful magic for PS1, such as \u \w). It needs to be enabled... -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 17:15:11 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 17:15:11 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #9 from rkitover at gmail.com --- In bash I use the \001 and \002 escape sequences for this, so my prompt works there. Do the \[ and \] sequences map to a character? How can I detect busybox ash as opposed to ksh or something like that? I will get the suse package fixed. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 17:35:59 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 17:35:59 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |INVALID -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 5 17:37:24 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 05 Jan 2022 17:37:24 +0000 Subject: [Bug 14486] completion broken with multi-line prompts In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14486 --- Comment #10 from Denys Vlasenko --- bash supports \[ \] too. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Wed Jan 5 21:04:21 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 5 Jan 2022 22:04:21 +0100 Subject: [git commit] less: code shrink Message-ID: <20220105211615.D89C382BA0@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=076f5e064fa7b6cc2c03b030abcf2cbd60514180 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta restore_tty - 29 +29 less_main 2107 2105 -2 getch_nowait 253 251 -2 buffer_print 614 612 -2 less_exit 51 12 -39 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/4 up/down: 29/-45) Total: -16 bytes Signed-off-by: Denys Vlasenko --- miscutils/less.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/miscutils/less.c b/miscutils/less.c index 26983f40d..6825e5577 100644 --- a/miscutils/less.c +++ b/miscutils/less.c @@ -325,15 +325,18 @@ static void print_statusline(const char *str) } /* Exit the program gracefully */ -static void less_exit(int code) +static void restore_tty(void) { set_tty_cooked(); if (!(G.kbd_fd_orig_flags & O_NONBLOCK)) ndelay_off(kbd_fd); clear_line(); - if (code < 0) - kill_myself_with_sig(- code); /* does not return */ - exit(code); +} + +static void less_exit(void) +{ + restore_tty(); + exit(EXIT_SUCCESS); } #if (ENABLE_FEATURE_LESS_DASHCMD && ENABLE_FEATURE_LESS_LINENUMS) \ @@ -913,7 +916,7 @@ static void buffer_print(void) ) { i = option_mask32 & FLAG_F ? 0 : cur_fline; if (max_fline - i <= max_displayed_line) - less_exit(EXIT_SUCCESS); + less_exit(); } status_print(); } @@ -1146,7 +1149,7 @@ static int64_t getch_nowait(void) goto again; } /* EOF/error (ssh session got killed etc) */ - less_exit(EXIT_SUCCESS); + less_exit(); } set_tty_cooked(); return key64; @@ -1297,7 +1300,7 @@ static void colon_process(void) change_file(-1); break; case 'q': - less_exit(EXIT_SUCCESS); + less_exit(); break; case 'x': change_file(0); @@ -1715,7 +1718,7 @@ static void keypress_process(int keypress) buffer_line(cur_fline); break; case 'q': case 'Q': - less_exit(EXIT_SUCCESS); + less_exit(); break; #if ENABLE_FEATURE_LESS_MARKS case 'm': @@ -1793,7 +1796,8 @@ static void keypress_process(int keypress) static void sig_catcher(int sig) { - less_exit(- sig); + restore_tty(); + kill_myself_with_sig(sig); /* does not return */ } #if ENABLE_FEATURE_LESS_WINCH From vda.linux at googlemail.com Wed Jan 5 21:16:06 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 5 Jan 2022 22:16:06 +0100 Subject: [git commit] libbb: code shrink: introduce and use [_]exit_SUCCESS() Message-ID: <20220105211616.053F682BA0@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=db5546ca101846f18294a43b39883bc4ff53613a branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta exit_SUCCESS - 7 +7 _exit_SUCCESS - 7 +7 run_pipe 1562 1567 +5 pseudo_exec_argv 399 400 +1 finish 86 87 +1 start_stop_daemon_main 1109 1107 -2 shutdown_on_signal 38 36 -2 runsv_main 1662 1660 -2 redirect 1070 1068 -2 read_line 79 77 -2 pause_and_low_level_reboot 54 52 -2 list_i2c_busses_and_exit 483 481 -2 less_exit 12 10 -2 identify 4123 4121 -2 grep_file 1161 1159 -2 getty_main 1519 1517 -2 fsck_minix_main 2681 2679 -2 free_session 132 130 -2 fdisk_main 4739 4737 -2 clean_up_and_exit 53 51 -2 bsd_select 1566 1564 -2 bb_daemonize_or_rexec 198 196 -2 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 3/17 up/down: 21/-34) Total: -13 bytes Signed-off-by: Denys Vlasenko --- debianutils/start_stop_daemon.c | 4 ++-- findutils/grep.c | 2 +- include/libbb.h | 2 ++ init/init.c | 6 +++--- libbb/vfork_daemon_rexec.c | 4 ++-- libbb/xfuncs.c | 10 ++++++++++ loginutils/getty.c | 6 +++--- loginutils/login.c | 2 +- miscutils/devfsd.c | 4 ++-- miscutils/hdparm.c | 2 +- miscutils/i2c_tools.c | 2 +- miscutils/less.c | 4 ++-- miscutils/watchdog.c | 2 +- modutils/modprobe-small.c | 2 +- networking/arping.c | 2 +- networking/inetd.c | 2 +- networking/nc.c | 2 +- networking/telnetd.c | 2 +- runit/runsv.c | 2 +- shell/ash.c | 2 +- shell/hush.c | 4 ++-- util-linux/fdisk.c | 4 ++-- util-linux/fdisk_osf.c | 4 ++-- util-linux/fsck_minix.c | 2 +- 24 files changed, 45 insertions(+), 33 deletions(-) diff --git a/debianutils/start_stop_daemon.c b/debianutils/start_stop_daemon.c index 68df44ae9..3e5dd9faa 100644 --- a/debianutils/start_stop_daemon.c +++ b/debianutils/start_stop_daemon.c @@ -519,7 +519,7 @@ int start_stop_daemon_main(int argc UNUSED_PARAM, char **argv) /* why _exit? the child may have changed the stack, * so "return 0" may do bad things */ - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } /* Child */ setsid(); /* detach from controlling tty */ @@ -531,7 +531,7 @@ int start_stop_daemon_main(int argc UNUSED_PARAM, char **argv) */ pid = xvfork(); if (pid != 0) - _exit(EXIT_SUCCESS); /* Parent */ + _exit_SUCCESS(); /* Parent */ } if (opt & OPT_MAKEPID) { /* User wants _us_ to make the pidfile */ diff --git a/findutils/grep.c b/findutils/grep.c index 8600d72fa..0b72812f1 100644 --- a/findutils/grep.c +++ b/findutils/grep.c @@ -470,7 +470,7 @@ static int grep_file(FILE *file) * "exit immediately with zero status * if any match is found, * even if errors were detected" */ - exit(EXIT_SUCCESS); + exit_SUCCESS(); } /* -l "print filenames with matches": stop after the first match */ if (option_mask32 & OPT_l) { diff --git a/include/libbb.h b/include/libbb.h index 8308d6259..c93058f6d 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1278,6 +1278,8 @@ void set_task_comm(const char *comm) FAST_FUNC; # define re_execed_comm() 0 # define set_task_comm(name) ((void)0) #endif +void exit_SUCCESS(void) NORETURN FAST_FUNC; +void _exit_SUCCESS(void) NORETURN FAST_FUNC; /* Helpers for daemonization. * diff --git a/init/init.c b/init/init.c index efab5dcb4..785a3b460 100644 --- a/init/init.c +++ b/init/init.c @@ -744,7 +744,7 @@ static void pause_and_low_level_reboot(unsigned magic) pid = vfork(); if (pid == 0) { /* child */ reboot(magic); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } /* Used to have "while (1) sleep(1)" here. * However, in containers reboot() call is ignored, and with that loop @@ -752,7 +752,7 @@ static void pause_and_low_level_reboot(unsigned magic) */ waitpid(pid, NULL, 0); sleep1(); /* paranoia */ - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } static void run_shutdown_and_kill_processes(void) @@ -942,7 +942,7 @@ static void reload_inittab(void) for (a = G.init_action_list; a; a = a->next) if (a->action_type == 0 && a->pid != 0) kill(a->pid, SIGKILL); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } } #endif diff --git a/libbb/vfork_daemon_rexec.c b/libbb/vfork_daemon_rexec.c index 31e97051f..79141936a 100644 --- a/libbb/vfork_daemon_rexec.c +++ b/libbb/vfork_daemon_rexec.c @@ -308,7 +308,7 @@ void FAST_FUNC bb_daemonize_or_rexec(int flags, char **argv) /* fflush_all(); - add it in fork_or_rexec() if necessary */ if (fork_or_rexec(argv)) - _exit(EXIT_SUCCESS); /* parent */ + _exit_SUCCESS(); /* parent */ /* if daemonizing, detach from stdio & ctty */ setsid(); dup2(fd, 0); @@ -320,7 +320,7 @@ void FAST_FUNC bb_daemonize_or_rexec(int flags, char **argv) // * Prevent this: stop being a session leader. // */ // if (fork_or_rexec(argv)) -// _exit(EXIT_SUCCESS); /* parent */ +// _exit_SUCCESS(); /* parent */ // } } while (fd > 2) { diff --git a/libbb/xfuncs.c b/libbb/xfuncs.c index c40dcb706..465e5366c 100644 --- a/libbb/xfuncs.c +++ b/libbb/xfuncs.c @@ -423,3 +423,13 @@ int FAST_FUNC wait4pid(pid_t pid) return WTERMSIG(status) + 0x180; return 0; } + +void FAST_FUNC exit_SUCCESS(void) +{ + exit(EXIT_SUCCESS); +} + +void FAST_FUNC _exit_SUCCESS(void) +{ + _exit(EXIT_SUCCESS); +} diff --git a/loginutils/getty.c b/loginutils/getty.c index 6c6d409f4..cd6378d80 100644 --- a/loginutils/getty.c +++ b/loginutils/getty.c @@ -484,7 +484,7 @@ static char *get_logname(void) if (read(STDIN_FILENO, &c, 1) < 1) { finalize_tty_attrs(); if (errno == EINTR || errno == EIO) - exit(EXIT_SUCCESS); + exit_SUCCESS(); bb_simple_perror_msg_and_die(bb_msg_read_error); } @@ -511,7 +511,7 @@ static char *get_logname(void) case CTL('C'): case CTL('D'): finalize_tty_attrs(); - exit(EXIT_SUCCESS); + exit_SUCCESS(); case '\0': /* BREAK. If we have speeds to try, * return NULL (will switch speeds and return here) */ @@ -538,7 +538,7 @@ static char *get_logname(void) static void alarm_handler(int sig UNUSED_PARAM) { finalize_tty_attrs(); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } static void sleep10(void) diff --git a/loginutils/login.c b/loginutils/login.c index ce87e318a..569053c12 100644 --- a/loginutils/login.c +++ b/loginutils/login.c @@ -312,7 +312,7 @@ static void alarm_handler(int sig UNUSED_PARAM) /* unix API is brain damaged regarding O_NONBLOCK, * we should undo it, or else we can affect other processes */ ndelay_off(STDOUT_FILENO); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } int login_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; diff --git a/miscutils/devfsd.c b/miscutils/devfsd.c index e5bb8a2d8..839d00fd0 100644 --- a/miscutils/devfsd.c +++ b/miscutils/devfsd.c @@ -453,7 +453,7 @@ int devfsd_main(int argc, char **argv) DEVFSD_PROTOCOL_REVISION_DAEMON, bb_msg_proto_rev, proto_rev); if (DEVFSD_PROTOCOL_REVISION_DAEMON != proto_rev) bb_error_msg_and_die("%s mismatch!", bb_msg_proto_rev); - exit(EXIT_SUCCESS); /* -v */ + exit_SUCCESS(); /* -v */ } /* Tell kernel we are special(i.e. we get to see hidden entries) */ xioctl(fd, DEVFSDIOC_SET_EVENT_MASK, 0); @@ -474,7 +474,7 @@ int devfsd_main(int argc, char **argv) dir_operation(SERVICE, mount_point, 0, NULL); if (ENABLE_DEVFSD_FG_NP && no_polling) - exit(EXIT_SUCCESS); + exit_SUCCESS(); if (ENABLE_DEVFSD_VERBOSE || ENABLE_DEBUG) logmode = LOGMODE_BOTH; diff --git a/miscutils/hdparm.c b/miscutils/hdparm.c index 01b4e8e2e..d8d8f6166 100644 --- a/miscutils/hdparm.c +++ b/miscutils/hdparm.c @@ -1271,7 +1271,7 @@ static void identify(uint16_t *val) } } - exit(EXIT_SUCCESS); + exit_SUCCESS(); } #endif diff --git a/miscutils/i2c_tools.c b/miscutils/i2c_tools.c index b25d49792..e3741eeba 100644 --- a/miscutils/i2c_tools.c +++ b/miscutils/i2c_tools.c @@ -1212,7 +1212,7 @@ static void NORETURN list_i2c_busses_and_exit(void) } } - exit(EXIT_SUCCESS); + exit_SUCCESS(); } static void NORETURN no_support(const char *cmd) diff --git a/miscutils/less.c b/miscutils/less.c index 6825e5577..82c4b21f0 100644 --- a/miscutils/less.c +++ b/miscutils/less.c @@ -333,10 +333,10 @@ static void restore_tty(void) clear_line(); } -static void less_exit(void) +static NOINLINE void less_exit(void) { restore_tty(); - exit(EXIT_SUCCESS); + exit_SUCCESS(); } #if (ENABLE_FEATURE_LESS_DASHCMD && ENABLE_FEATURE_LESS_LINENUMS) \ diff --git a/miscutils/watchdog.c b/miscutils/watchdog.c index d8e9c78f5..9f5a4b849 100644 --- a/miscutils/watchdog.c +++ b/miscutils/watchdog.c @@ -76,7 +76,7 @@ static void shutdown_on_signal(int sig UNUSED_PARAM) { remove_pidfile_std_path_and_ext("watchdog"); shutdown_watchdog(); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } static void watchdog_open(const char* device) diff --git a/modutils/modprobe-small.c b/modutils/modprobe-small.c index db44a2ed0..b61651621 100644 --- a/modutils/modprobe-small.c +++ b/modutils/modprobe-small.c @@ -415,7 +415,7 @@ static FAST_FUNC int fileAction(struct recursive_state *state, /* Load was successful, there is nothing else to do. * This can happen ONLY for "top-level" module load, * not a dep, because deps don't do dirscan. */ - exit(EXIT_SUCCESS); + exit_SUCCESS(); } } diff --git a/networking/arping.c b/networking/arping.c index d44d7d697..86f0221ed 100644 --- a/networking/arping.c +++ b/networking/arping.c @@ -159,7 +159,7 @@ static void finish(void) if (option_mask32 & DAD) exit(!!received); if (option_mask32 & UNSOLICITED) - exit(EXIT_SUCCESS); + exit_SUCCESS(); exit(!received); } diff --git a/networking/inetd.c b/networking/inetd.c index e5352a555..e71be51c3 100644 --- a/networking/inetd.c +++ b/networking/inetd.c @@ -1208,7 +1208,7 @@ static void clean_up_and_exit(int sig UNUSED_PARAM) close(sep->se_fd); } remove_pidfile_std_path_and_ext("inetd"); - exit(EXIT_SUCCESS); + exit_SUCCESS(); } int inetd_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; diff --git a/networking/nc.c b/networking/nc.c index d351bf72a..ab1316339 100644 --- a/networking/nc.c +++ b/networking/nc.c @@ -268,7 +268,7 @@ int nc_main(int argc, char **argv) nread = safe_read(pfds[fdidx].fd, iobuf, COMMON_BUFSIZE); if (fdidx != 0) { if (nread < 1) - exit(EXIT_SUCCESS); + exit_SUCCESS(); ofd = STDOUT_FILENO; } else { if (nread < 1) { diff --git a/networking/telnetd.c b/networking/telnetd.c index 581da1924..0805e464f 100644 --- a/networking/telnetd.c +++ b/networking/telnetd.c @@ -582,7 +582,7 @@ free_session(struct tsession *ts) struct tsession *t; if (option_mask32 & OPT_INETD) - exit(EXIT_SUCCESS); + exit_SUCCESS(); /* Unlink this telnet session from the session list */ t = G.sessions; diff --git a/runit/runsv.c b/runit/runsv.c index a4b8af494..6ad6bf46e 100644 --- a/runit/runsv.c +++ b/runit/runsv.c @@ -700,7 +700,7 @@ int runsv_main(int argc UNUSED_PARAM, char **argv) if (svd[0].sd_want == W_EXIT && svd[0].state == S_DOWN) { if (svd[1].pid == 0) - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); if (svd[1].sd_want != W_EXIT) { svd[1].sd_want = W_EXIT; /* stopservice(&svd[1]); */ diff --git a/shell/ash.c b/shell/ash.c index 827643808..4a8ec0c03 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -5505,7 +5505,7 @@ openhere(union node *redir) ignoresig(SIGTSTP); //signal(SIGTSTP, SIG_IGN); signal(SIGPIPE, SIG_DFL); xwrite(pip[1], p, len); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } out: close(pip[1]); diff --git a/shell/hush.c b/shell/hush.c index 6a27b1634..982fc356a 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -8587,7 +8587,7 @@ static NOINLINE void pseudo_exec_argv(nommu_save_t *nommu_save, * expand_assignments(): think about ... | var=`sleep 1` | ... */ free_strings(new_env); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } sv_shadowed = G.shadowed_vars_pp; @@ -8768,7 +8768,7 @@ static void pseudo_exec(nommu_save_t *nommu_save, /* Case when we are here: ... | >file */ debug_printf_exec("pseudo_exec'ed null command\n"); - _exit(EXIT_SUCCESS); + _exit_SUCCESS(); } #if ENABLE_HUSH_JOB diff --git a/util-linux/fdisk.c b/util-linux/fdisk.c index 1c2a7d683..9c393b8fc 100644 --- a/util-linux/fdisk.c +++ b/util-linux/fdisk.c @@ -665,7 +665,7 @@ read_line(const char *prompt) sz = read_line_input(NULL, prompt, line_buffer, sizeof(line_buffer)); if (sz <= 0) - exit(EXIT_SUCCESS); /* Ctrl-D or Ctrl-C */ + exit_SUCCESS(); /* Ctrl-D or Ctrl-C */ if (line_buffer[sz-1] == '\n') line_buffer[--sz] = '\0'; @@ -2855,7 +2855,7 @@ xselect(void) if (ENABLE_FEATURE_CLEAN_UP) close_dev_fd(); bb_putchar('\n'); - exit(EXIT_SUCCESS); + exit_SUCCESS(); case 'r': return; case 's': diff --git a/util-linux/fdisk_osf.c b/util-linux/fdisk_osf.c index 765740ff1..6c66c130d 100644 --- a/util-linux/fdisk_osf.c +++ b/util-linux/fdisk_osf.c @@ -383,7 +383,7 @@ bsd_select(void) if (xbsd_readlabel(NULL) == 0) if (xbsd_create_disklabel() == 0) - exit(EXIT_SUCCESS); + exit_SUCCESS(); #endif @@ -411,7 +411,7 @@ bsd_select(void) case 'q': if (ENABLE_FEATURE_CLEAN_UP) close_dev_fd(); - exit(EXIT_SUCCESS); + exit_SUCCESS(); case 'r': return; case 's': diff --git a/util-linux/fsck_minix.c b/util-linux/fsck_minix.c index 40b86d01b..dd2265c32 100644 --- a/util-linux/fsck_minix.c +++ b/util-linux/fsck_minix.c @@ -423,7 +423,7 @@ static void check_mount(void) cont = ask("Do you really want to continue", 0); if (!cont) { puts("Check aborted"); - exit(EXIT_SUCCESS); + exit_SUCCESS(); } } } From vda.linux at googlemail.com Wed Jan 5 22:03:54 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 5 Jan 2022 23:03:54 +0100 Subject: [git commit] libbb: change xstrndup, xmemdup to take size_t as size parameter Message-ID: <20220105215808.4F56182B7B@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=6062c0d19bc201cbeb61b8875598cdd7a14a5ae0 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Also, remove entirely usually-disabled paranoia check (was also using wrong config option to enable itself). Signed-off-by: Denys Vlasenko --- include/libbb.h | 4 ++-- libbb/xfuncs_printf.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/include/libbb.h b/include/libbb.h index c93058f6d..daa310776 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -429,8 +429,8 @@ void *xrealloc(void *old, size_t size) FAST_FUNC; xrealloc_vector_helper((vector), (sizeof((vector)[0]) << 8) + (shift), (idx)) void* xrealloc_vector_helper(void *vector, unsigned sizeof_and_shift, int idx) FAST_FUNC; char *xstrdup(const char *s) FAST_FUNC RETURNS_MALLOC; -char *xstrndup(const char *s, int n) FAST_FUNC RETURNS_MALLOC; -void *xmemdup(const void *s, int n) FAST_FUNC RETURNS_MALLOC; +char *xstrndup(const char *s, size_t n) FAST_FUNC RETURNS_MALLOC; +void *xmemdup(const void *s, size_t n) FAST_FUNC RETURNS_MALLOC; void *mmap_read(int fd, size_t size) FAST_FUNC; void *mmap_anon(size_t size) FAST_FUNC; void *xmmap_anon(size_t size) FAST_FUNC; diff --git a/libbb/xfuncs_printf.c b/libbb/xfuncs_printf.c index d29acebcd..fc630d176 100644 --- a/libbb/xfuncs_printf.c +++ b/libbb/xfuncs_printf.c @@ -91,13 +91,10 @@ char* FAST_FUNC xstrdup(const char *s) // Die if we can't allocate n+1 bytes (space for the null terminator) and copy // the (possibly truncated to length n) string into it. -char* FAST_FUNC xstrndup(const char *s, int n) +char* FAST_FUNC xstrndup(const char *s, size_t n) { char *t; - if (ENABLE_DEBUG && s == NULL) - bb_simple_error_msg_and_die("xstrndup bug"); - t = strndup(s, n); if (t == NULL) @@ -106,7 +103,7 @@ char* FAST_FUNC xstrndup(const char *s, int n) return t; } -void* FAST_FUNC xmemdup(const void *s, int n) +void* FAST_FUNC xmemdup(const void *s, size_t n) { return memcpy(xmalloc(n), s, n); } From vda.linux at googlemail.com Thu Jan 6 09:55:53 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 6 Jan 2022 10:55:53 +0100 Subject: [git commit] fdisk: recognize EBBR protective partitions Message-ID: <20220106095422.9942382B4D@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=a93668cc4277b14eaff07fcfdef9693c990ec824 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master The MBR partition type 0xF8 is used by the Arm EBBR specification[1] for protective partitions over fixed-location firmware images. [1]: https://github.com/ARM-software/ebbr Signed-off-by: Vincent Stehl?? Signed-off-by: Denys Vlasenko --- util-linux/fdisk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/util-linux/fdisk.c b/util-linux/fdisk.c index 9c393b8fc..20e7d56fa 100644 --- a/util-linux/fdisk.c +++ b/util-linux/fdisk.c @@ -355,6 +355,7 @@ static const char *const i386_sys_types[] ALIGN_PTR = { "\xef" "EFI (FAT-12/16/32)", /* Intel EFI System Partition */ "\xf0" "Linux/PA-RISC boot", /* Linux/PA-RISC boot loader */ "\xf2" "DOS secondary", /* DOS 3.3+ secondary */ + "\xf8" "EBBR protective", /* Arm EBBR firmware protective partition */ "\xfd" "Linux raid autodetect", /* New (2.2.x) raid partition with autodetect using persistent superblock */ From vda.linux at googlemail.com Thu Jan 6 23:43:59 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Fri, 7 Jan 2022 00:43:59 +0100 Subject: [git commit] libbb/sha1: optional x86-64 hardware accelerates hashing Message-ID: <20220106233840.5801C82BA5@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=711e20ecb85d13f98ba3e2bdcb344ee7534829c4 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64_shaNI - 510 +510 sha1_begin 52 107 +55 .rodata 108285 108301 +16 static.shaNI - 1 +1 ------------------------------------------------------------------------------ (add/remove: 4/0 grow/shrink: 2/0 up/down: 582/0) Total: 582 bytes Signed-off-by: Denys Vlasenko --- libbb/Config.src | 7 ++ libbb/Kbuild.src | 1 + libbb/hash_md5_sha.c | 38 ++++++- libbb/hash_md5_sha_x86-64_shaNI.S | 225 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 270 insertions(+), 1 deletion(-) diff --git a/libbb/Config.src b/libbb/Config.src index c80bee286..708d3b0c8 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -63,6 +63,13 @@ config SHA1_SMALL 1 224 229 654 732 2,3 200 195 358 380 +config SHA1_HWACCEL + bool "SHA1: Use hardware accelerated instructions if possible" + default y + help + On x86, this adds ~590 bytes of code. Throughput + is about twice as fast as fully-unrolled generic code. + config SHA3_SMALL int "SHA3: Trade bytes for speed (0:fast, 1:slow)" default 1 # all "fast or small" options default to small diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index 19b8aad60..a3db02b6f 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src @@ -57,6 +57,7 @@ lib-y += make_directory.o lib-y += makedev.o lib-y += hash_md5_sha.o lib-y += hash_md5_sha_x86-64.o +lib-y += hash_md5_sha_x86-64_shaNI.o # Alternative (disabled) MD5 implementation #lib-y += hash_md5prime.o lib-y += messages.o diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index ee19c1cb7..4c6904b48 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -699,7 +699,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) /* in hash_md5_sha_x86-64.S */ struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; -void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM); +void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx); # else /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. @@ -1142,6 +1142,28 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) } #endif /* NEED_SHA512 */ +#if ENABLE_SHA1_HWACCEL +# if defined(__GNUC__) && defined(__x86_64__) +static void cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) +{ + asm ( + "cpuid\n" + : "=a"(*eax), /* Output */ + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(*eax), /* Input */ + "1"(*ebx), + "2"(*ecx), + "3"(*edx) + /* No clobbered registers */ + ); +} +struct ASM_expects_80_shaNI { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; +void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx); +# endif +#endif + void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) { ctx->hash[0] = 0x67452301; @@ -1151,6 +1173,20 @@ void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) ctx->hash[4] = 0xc3d2e1f0; ctx->total64 = 0; ctx->process_block = sha1_process_block64; +#if ENABLE_SHA1_HWACCEL +# if defined(__GNUC__) && defined(__x86_64__) + { + static smallint shaNI; + if (!shaNI) { + unsigned eax = 7, ebx = ebx, ecx = 0, edx = edx; + cpuid(&eax, &ebx, &ecx, &edx); + shaNI = ((ebx >> 28) & 2) - 1; + } + if (shaNI > 0) + ctx->process_block = sha1_process_block64_shaNI; + } +# endif +#endif } static const uint32_t init256[] ALIGN4 = { diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S new file mode 100644 index 000000000..473b472f1 --- /dev/null +++ b/libbb/hash_md5_sha_x86-64_shaNI.S @@ -0,0 +1,225 @@ +#if ENABLE_SHA1_HWACCEL && defined(__GNUC__) && defined(__x86_64__) +/* The code is adapted from Linux kernel's source */ + +// We use shorter insns, even though they are for "wrong" +// data type (fp, not int). +// For Intel, there is no penalty for doing it at all +// (CPUs which do have such penalty do not support SHA1 insns). +// For AMD, the penalty is one extra cycle +// (allegedly: I failed to find measurable difference). + +//#define mova128 movdqa +#define mova128 movaps +//#define movu128 movdqu +#define movu128 movups +//#define xor128 pxor +#define xor128 xorps +//#define shuf128_32 pshufd +#define shuf128_32 shufps + +#define extr128_32 pextrd +//#define extr128_32 extractps # not shorter + + .section .text.sha1_process_block64_shaNI,"ax", at progbits + .globl sha1_process_block64_shaNI + .hidden sha1_process_block64_shaNI + .type sha1_process_block64_shaNI, @function + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + .balign 8 # allow decoders to fetch at least 2 first insns +sha1_process_block64_shaNI: + /* load initial hash values */ + + xor128 E0, E0 + movu128 80(%rdi), ABCD + pinsrd $3, 80+4*4(%rdi), E0 # load to upper 32-bit word + shuf128_32 $0x1B, ABCD, ABCD # 00011011: bswap + + mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + + /* Save hash values for addition after rounds */ + mova128 E0, %xmm9 + mova128 ABCD, %xmm8 + + /* Rounds 0-3 */ + movu128 0*16(%rdi), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + mova128 ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movu128 1*16(%rdi), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movu128 2*16(%rdi), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 12-15 */ + movu128 3*16(%rdi), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + xor128 MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte %xmm9, E0 + paddd %xmm8, ABCD + + /* Write hash values back in the correct order */ + shuf128_32 $0x1B, ABCD, ABCD + movu128 ABCD, 80(%rdi) + extr128_32 $3, E0, 80+4*4(%rdi) + + ret + .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f + +#endif From vda.linux at googlemail.com Fri Jan 7 00:33:46 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Fri, 7 Jan 2022 01:33:46 +0100 Subject: [git commit] powertop: fix cpuid asm: ebx saving/restoring is properly done by gcc Message-ID: <20220107002821.6E5F7810AF@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=89092c61bc37ed61bfacf10433d52b9bc4299116 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta print_intel_cstates 481 477 -4 Signed-off-by: Denys Vlasenko --- procps/powertop.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/procps/powertop.c b/procps/powertop.c index 24c2b320f..18ddaa3ec 100644 --- a/procps/powertop.c +++ b/procps/powertop.c @@ -506,12 +506,9 @@ static void cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, { /* EAX value specifies what information to return */ asm ( - " pushl %%ebx\n" /* Save EBX */ " cpuid\n" - " movl %%ebx, %1\n" /* Save content of EBX */ - " popl %%ebx\n" /* Restore EBX */ : "=a"(*eax), /* Output */ - "=r"(*ebx), + "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(*eax), /* Input */ From vda.linux at googlemail.com Fri Jan 7 00:32:13 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Fri, 7 Jan 2022 01:32:13 +0100 Subject: [git commit] libbb/sha1: optional i686 hardware accelerates hashing Message-ID: <20220107002821.666FA81476@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=a96ccbefe417aaac6a2ce59c788e01fc0f83902f branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_process_block64_shaNI - 524 +524 sha1_begin 57 114 +57 .rodata 104353 104369 +16 static.shaNI - 1 +1 ------------------------------------------------------------------------------ (add/remove: 4/0 grow/shrink: 2/0 up/down: 598/0) Total: 598 bytes Signed-off-by: Denys Vlasenko --- libbb/Kbuild.src | 1 + libbb/hash_md5_sha.c | 21 +++- libbb/hash_md5_sha_x86-32_shaNI.S | 231 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 252 insertions(+), 1 deletion(-) diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index a3db02b6f..e8bb24f6d 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src @@ -58,6 +58,7 @@ lib-y += makedev.o lib-y += hash_md5_sha.o lib-y += hash_md5_sha_x86-64.o lib-y += hash_md5_sha_x86-64_shaNI.o +lib-y += hash_md5_sha_x86-32_shaNI.o # Alternative (disabled) MD5 implementation #lib-y += hash_md5prime.o lib-y += messages.o diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 4c6904b48..0b3af723a 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -1143,6 +1143,25 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) #endif /* NEED_SHA512 */ #if ENABLE_SHA1_HWACCEL +# if defined(__GNUC__) && defined(__i386__) +static void cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) +{ + asm ( + " cpuid\n" + : "=a"(*eax), /* Output */ + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(*eax), /* Input */ + "1"(*ebx), + "2"(*ecx), + "3"(*edx) + /* No clobbered registers */ + ); +} +struct ASM_expects_76_shaNI { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 76)]; }; +void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx); +# endif # if defined(__GNUC__) && defined(__x86_64__) static void cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { @@ -1174,7 +1193,7 @@ void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) ctx->total64 = 0; ctx->process_block = sha1_process_block64; #if ENABLE_SHA1_HWACCEL -# if defined(__GNUC__) && defined(__x86_64__) +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) { static smallint shaNI; if (!shaNI) { diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S new file mode 100644 index 000000000..7202c7673 --- /dev/null +++ b/libbb/hash_md5_sha_x86-32_shaNI.S @@ -0,0 +1,231 @@ +#if ENABLE_SHA1_HWACCEL && defined(__GNUC__) && defined(__i386__) +/* The code is adapted from Linux kernel's source */ + +// We use shorter insns, even though they are for "wrong" +// data type (fp, not int). +// For Intel, there is no penalty for doing it at all +// (CPUs which do have such penalty do not support SHA1 insns). +// For AMD, the penalty is one extra cycle +// (allegedly: I failed to find measurable difference). + +//#define mova128 movdqa +#define mova128 movaps +//#define movu128 movdqu +#define movu128 movups +//#define xor128 pxor +#define xor128 xorps +//#define shuf128_32 pshufd +#define shuf128_32 shufps + +#define extr128_32 pextrd +//#define extr128_32 extractps # not shorter + + .section .text.sha1_process_block64_shaNI,"ax", at progbits + .globl sha1_process_block64_shaNI + .hidden sha1_process_block64_shaNI + .type sha1_process_block64_shaNI, @function + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + .balign 8 # allow decoders to fetch at least 2 first insns +sha1_process_block64_shaNI: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + andl $~0xF, %esp # paddd needs aligned memory operand + + /* load initial hash values */ + xor128 E0, E0 + movu128 76(%eax), ABCD + pinsrd $3, 76+4*4(%eax), E0 # load to upper 32-bit word + shuf128_32 $0x1B, ABCD, ABCD # 00011011: bswap + + mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK + + /* Save hash values for addition after rounds */ + movu128 E0, 16(%esp) + movu128 ABCD, (%esp) + + /* Rounds 0-3 */ + movu128 0*16(%eax), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + mova128 ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movu128 1*16(%eax), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movu128 2*16(%eax), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 12-15 */ + movu128 3*16(%eax), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + xor128 MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + xor128 MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + xor128 MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + mova128 ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + xor128 MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + mova128 ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + xor128 MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + mova128 ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + mova128 ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte 16(%esp), E0 + paddd (%esp), ABCD + + /* Write hash values back in the correct order */ + shuf128_32 $0x1B, ABCD, ABCD + movu128 ABCD, 76(%eax) + extr128_32 $3, E0, 76+4*4(%eax) + + movl %ebp, %esp + popl %ebp + ret + .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f + +#endif From vda.linux at googlemail.com Fri Jan 7 13:55:31 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Fri, 7 Jan 2022 14:55:31 +0100 Subject: [git commit] libbb/sha1: tweak comments Message-ID: <20220107134955.3431381EAD@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=0bab5da37e6c2351936bf3fade2eb1a6d75bb171 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-32_shaNI.S | 6 +++--- libbb/hash_md5_sha_x86-64_shaNI.S | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S index 7202c7673..6b12d1462 100644 --- a/libbb/hash_md5_sha_x86-32_shaNI.S +++ b/libbb/hash_md5_sha_x86-32_shaNI.S @@ -34,7 +34,7 @@ #define MSG3 %xmm6 #define SHUF_MASK %xmm7 - .balign 8 # allow decoders to fetch at least 2 first insns + .balign 8 # allow decoders to fetch at least 3 first insns sha1_process_block64_shaNI: pushl %ebp movl %esp, %ebp @@ -44,8 +44,8 @@ sha1_process_block64_shaNI: /* load initial hash values */ xor128 E0, E0 movu128 76(%eax), ABCD - pinsrd $3, 76+4*4(%eax), E0 # load to upper 32-bit word - shuf128_32 $0x1B, ABCD, ABCD # 00011011: bswap + pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word + shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S index 473b472f1..e2e5357e0 100644 --- a/libbb/hash_md5_sha_x86-64_shaNI.S +++ b/libbb/hash_md5_sha_x86-64_shaNI.S @@ -40,8 +40,8 @@ sha1_process_block64_shaNI: xor128 E0, E0 movu128 80(%rdi), ABCD - pinsrd $3, 80+4*4(%rdi), E0 # load to upper 32-bit word - shuf128_32 $0x1B, ABCD, ABCD # 00011011: bswap + pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word + shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK From vda.linux at googlemail.com Fri Jan 7 23:41:09 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 00:41:09 +0100 Subject: [git commit] whitespace fix Message-ID: <20220107233603.B60DE81D36@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=805ececa615606c02a5ca8f37f84dbbc527221eb branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-32_shaNI.S | 8 ++++---- libbb/hash_md5_sha_x86-64.S | 14 +++++++------- libbb/hash_md5_sha_x86-64.S.sh | 14 +++++++------- libbb/hash_md5_sha_x86-64_shaNI.S | 8 ++++---- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S index 6b12d1462..166cfd38a 100644 --- a/libbb/hash_md5_sha_x86-32_shaNI.S +++ b/libbb/hash_md5_sha_x86-32_shaNI.S @@ -18,11 +18,11 @@ #define shuf128_32 shufps #define extr128_32 pextrd -//#define extr128_32 extractps # not shorter +//#define extr128_32 extractps # not shorter .section .text.sha1_process_block64_shaNI,"ax", at progbits - .globl sha1_process_block64_shaNI - .hidden sha1_process_block64_shaNI + .globl sha1_process_block64_shaNI + .hidden sha1_process_block64_shaNI .type sha1_process_block64_shaNI, @function #define ABCD %xmm0 @@ -221,7 +221,7 @@ sha1_process_block64_shaNI: movl %ebp, %esp popl %ebp ret - .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI + .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 .align 16 diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index ff78fc049..87fb616a1 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -2,8 +2,8 @@ #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) .section .text.sha1_process_block64,"ax", at progbits - .globl sha1_process_block64 - .hidden sha1_process_block64 + .globl sha1_process_block64 + .hidden sha1_process_block64 .type sha1_process_block64, @function .balign 8 # allow decoders to fetch at least 5 first insns @@ -1273,15 +1273,15 @@ sha1_process_block64: popq %rdi # popq %r12 # - addl %eax, 80(%rdi) # ctx->hash[0] += a + addl %eax, 80(%rdi) # ctx->hash[0] += a popq %r13 # - addl %ebx, 84(%rdi) # ctx->hash[1] += b + addl %ebx, 84(%rdi) # ctx->hash[1] += b popq %r14 # - addl %ecx, 88(%rdi) # ctx->hash[2] += c + addl %ecx, 88(%rdi) # ctx->hash[2] += c popq %r15 # - addl %edx, 92(%rdi) # ctx->hash[3] += d + addl %edx, 92(%rdi) # ctx->hash[3] += d popq %rbx # - addl %ebp, 96(%rdi) # ctx->hash[4] += e + addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbp # ret diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 7e50b64fb..eef009590 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -11,8 +11,8 @@ echo \ #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) .section .text.sha1_process_block64,"ax", at progbits - .globl sha1_process_block64 - .hidden sha1_process_block64 + .globl sha1_process_block64 + .hidden sha1_process_block64 .type sha1_process_block64, @function .balign 8 # allow decoders to fetch at least 5 first insns @@ -265,15 +265,15 @@ RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx b echo " popq %rdi # popq %r12 # - addl %eax, 80(%rdi) # ctx->hash[0] += a + addl %eax, 80(%rdi) # ctx->hash[0] += a popq %r13 # - addl %ebx, 84(%rdi) # ctx->hash[1] += b + addl %ebx, 84(%rdi) # ctx->hash[1] += b popq %r14 # - addl %ecx, 88(%rdi) # ctx->hash[2] += c + addl %ecx, 88(%rdi) # ctx->hash[2] += c popq %r15 # - addl %edx, 92(%rdi) # ctx->hash[3] += d + addl %edx, 92(%rdi) # ctx->hash[3] += d popq %rbx # - addl %ebp, 96(%rdi) # ctx->hash[4] += e + addl %ebp, 96(%rdi) # ctx->hash[4] += e popq %rbp # ret diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S index e2e5357e0..33cc3bf7f 100644 --- a/libbb/hash_md5_sha_x86-64_shaNI.S +++ b/libbb/hash_md5_sha_x86-64_shaNI.S @@ -18,11 +18,11 @@ #define shuf128_32 shufps #define extr128_32 pextrd -//#define extr128_32 extractps # not shorter +//#define extr128_32 extractps # not shorter .section .text.sha1_process_block64_shaNI,"ax", at progbits - .globl sha1_process_block64_shaNI - .hidden sha1_process_block64_shaNI + .globl sha1_process_block64_shaNI + .hidden sha1_process_block64_shaNI .type sha1_process_block64_shaNI, @function #define ABCD %xmm0 @@ -215,7 +215,7 @@ sha1_process_block64_shaNI: extr128_32 $3, E0, 80+4*4(%rdi) ret - .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI + .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 .align 16 From vda.linux at googlemail.com Sat Jan 8 00:25:23 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 01:25:23 +0100 Subject: [git commit] libbb/sha1: simplify cpuid code Message-ID: <20220108001951.CFF1583459@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=e7ff29402d23e1c265769dbe809cf2d329a75ec2 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sha1_begin 107 106 -1 Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha.c | 42 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 0b3af723a..a23db5152 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -1143,43 +1143,21 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) #endif /* NEED_SHA512 */ #if ENABLE_SHA1_HWACCEL -# if defined(__GNUC__) && defined(__i386__) +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) static void cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { - asm ( - " cpuid\n" - : "=a"(*eax), /* Output */ - "=b"(*ebx), - "=c"(*ecx), - "=d"(*edx) - : "0"(*eax), /* Input */ - "1"(*ebx), - "2"(*ecx), - "3"(*edx) - /* No clobbered registers */ + asm ("cpuid" + : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) + : "0"(*eax), "1"(*ebx), "2"(*ecx), "3"(*edx) ); } -struct ASM_expects_76_shaNI { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 76)]; }; void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx); -# endif -# if defined(__GNUC__) && defined(__x86_64__) -static void cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) -{ - asm ( - "cpuid\n" - : "=a"(*eax), /* Output */ - "=b"(*ebx), - "=c"(*ecx), - "=d"(*edx) - : "0"(*eax), /* Input */ - "1"(*ebx), - "2"(*ecx), - "3"(*edx) - /* No clobbered registers */ - ); -} +# if defined(__i386__) +struct ASM_expects_76_shaNI { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 76)]; }; +# endif +# if defined(__x86_64__) struct ASM_expects_80_shaNI { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; -void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx); +# endif # endif #endif @@ -1199,7 +1177,7 @@ void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) if (!shaNI) { unsigned eax = 7, ebx = ebx, ecx = 0, edx = edx; cpuid(&eax, &ebx, &ecx, &edx); - shaNI = ((ebx >> 28) & 2) - 1; + shaNI = ((ebx >> 29) << 1) - 1; } if (shaNI > 0) ctx->process_block = sha1_process_block64_shaNI; From vda.linux at googlemail.com Sat Jan 8 16:16:00 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 17:16:00 +0100 Subject: [git commit] XXXsum: handle binary sums with " " in the path Message-ID: <20220108161037.5B87A831C3@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=84a1305888ffcd0f4e47cfc4c6fc57918b97bdda branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master If a line specifies a binary checksum whose path contains two adjacent spaces, when checking digests with -c the two spaces will be used as the separator between the digest and the pathname instead of " *", as shown: $ echo foo > "/tmp/two spaces" $ md5sum -b "/tmp/two spaces" # This is GNU md5sum d3b07384d113edec49eaa6238ad5ff00 */tmp/two spaces $ md5sum -b "/tmp/two spaces" | ./busybox md5sum -c md5sum: can't open 'spaces': No such file or directory spaces: FAILED md5sum: WARNING: 1 of 1 computed checksums did NOT match function old new delta md5_sha1_sum_main 503 496 -7 Signed-off-by: Emanuele Giacomelli Signed-off-by: Denys Vlasenko --- coreutils/md5_sha1_sum.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/coreutils/md5_sha1_sum.c b/coreutils/md5_sha1_sum.c index 3b389cb6b..0e57673f1 100644 --- a/coreutils/md5_sha1_sum.c +++ b/coreutils/md5_sha1_sum.c @@ -300,12 +300,10 @@ int md5_sha1_sum_main(int argc UNUSED_PARAM, char **argv) char *filename_ptr; count_total++; - filename_ptr = strstr(line, " "); - /* handle format for binary checksums */ - if (filename_ptr == NULL) { - filename_ptr = strstr(line, " *"); - } - if (filename_ptr == NULL) { + filename_ptr = strchr(line, ' '); + if (filename_ptr == NULL + || (filename_ptr[1] != ' ' && filename_ptr[1] != '*') + ) { if (flags & FLAG_WARN) { bb_simple_error_msg("invalid format"); } From vda.linux at googlemail.com Sat Jan 8 21:42:35 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 22:42:35 +0100 Subject: [git commit] awk: input numbers are never octal or hex (only program consts can be) Message-ID: <20220108213656.72FEC8345E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=e2952dfaff67f3641d3a6d3226753356170ff808 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta next_token 825 930 +105 getvar_i 114 129 +15 nextchar 49 53 +4 my_strtod 138 - -138 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 3/0 up/down: 124/-138) Total: -14 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 29 ++++++++++++++++++++++------- testsuite/awk.tests | 12 +++++++++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index f6314ac72..079d0bde5 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -848,7 +848,7 @@ static char *nextword(char **s) static char nextchar(char **s) { char c, *pps; - + again: c = *(*s)++; pps = *s; if (c == '\\') @@ -859,8 +859,11 @@ static char nextchar(char **s) */ if (c == '\\' && *s == pps) { /* unrecognized \z? */ c = *(*s); /* yes, fetch z */ - if (c) - (*s)++; /* advance unless z = NUL */ + if (c) { /* advance unless z = NUL */ + (*s)++; + if (c == '\n') /* \? eat it */ + goto again; + } } return c; } @@ -882,7 +885,13 @@ static ALWAYS_INLINE int isalnum_(int c) static double my_strtod(char **pp) { char *cp = *pp; - if (ENABLE_DESKTOP && cp[0] == '0') { + return strtod(cp, pp); +} +#if ENABLE_DESKTOP +static double my_strtod_or_hexoct(char **pp) +{ + char *cp = *pp; + if (cp[0] == '0') { /* Might be hex or octal integer: 0x123abc or 07777 */ char c = (cp[1] | 0x20); if (c == 'x' || isdigit(cp[1])) { @@ -901,6 +910,9 @@ static double my_strtod(char **pp) } return strtod(cp, pp); } +#else +# define my_strtod_or_hexoct(p) my_strtod(p) +#endif /* -------- working with variables (set/get/copy/etc) -------- */ @@ -1014,6 +1026,7 @@ static double getvar_i(var *v) if (s && *s) { debug_printf_eval("getvar_i: '%s'->", s); v->number = my_strtod(&s); + /* ^^^ hex/oct NOT allowed here! */ debug_printf_eval("%f (s:'%s')\n", v->number, s); if (v->type & VF_USER) { //TODO: skip_spaces() also skips backslash+newline, is it intended here? @@ -1125,10 +1138,10 @@ static uint32_t next_token(uint32_t expected) if (*p == '\0') { tc = TC_EOF; debug_printf_parse("%s: token found: TC_EOF\n", __func__); - } else if (*p == '\"') { + } else if (*p == '"') { /* it's a string */ char *s = t_string = ++p; - while (*p != '\"') { + while (*p != '"') { char *pp; if (*p == '\0' || *p == '\n') syntax_error(EMSG_UNEXP_EOS); @@ -1166,7 +1179,8 @@ static uint32_t next_token(uint32_t expected) } else if (*p == '.' || isdigit(*p)) { /* it's a number */ char *pp = p; - t_double = my_strtod(&pp); + t_double = my_strtod_or_hexoct(&pp); + /* ^^^ awk only allows hex/oct consts in _program_, not in _input_ */ p = pp; if (*p == '.') syntax_error(EMSG_UNEXP_TOKEN); @@ -3503,6 +3517,7 @@ static var *evaluate(node *op, var *res) i = (Ld == 0); break; } + debug_printf_eval("COMPARE result: %d\n", (i == 0) ^ (opn & 1)); setvar_i(res, (i == 0) ^ (opn & 1)); break; } diff --git a/testsuite/awk.tests b/testsuite/awk.tests index bcaafe8fd..93e25d8c1 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -102,9 +102,13 @@ testing "awk unused function args are evaluated" \ optional DESKTOP testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" testing "awk hex const 2" "awk '{ print or(0x80000000,1) }'" "2147483649\n" "" "\n" -testing "awk oct const" "awk '{ print or(01234,1) }'" "669\n" "" "\n" +testing "awk oct const" "awk '{ print or(01234,1) }'" "669\n" "" "\n" SKIP= +# check that "hex/oct integer" heuristic doesn't kick in on input +# (must be done only when parsing program text) +testing "awk input is never oct" "awk '{ print \$1, \$1+1 }'" "011 12\n" "" "011\n" + # check that "hex/oct integer" heuristic doesn't kick in on 00NN.NNN testing "awk floating const with leading zeroes" \ "awk '{ printf \"%f %f\n\", \"000.123\", \"009.123\" }'" \ @@ -469,4 +473,10 @@ testing 'awk printf %% prints one %' \ "%\n" \ '' '' +testing 'awk backslash+newline eaten with no trace' \ + "awk 'BEGIN { printf \"Hello\\ + world\n\" }'" \ + "Hello world\n" \ + '' '' + exit $FAILCOUNT From vda.linux at googlemail.com Sat Jan 8 21:43:24 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 22:43:24 +0100 Subject: [git commit] libbb/sha1: add a comment Message-ID: <20220108213803.A5E328345E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=143356876b5712c28b8af61ea9144959a4dc6a5b branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index eef009590..901896e6e 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -6,6 +6,28 @@ # also contains the diff of the generated file. exec >hash_md5_sha_x86-64.S +# There is a way to use XMM registers (which always exist for x86-64!) for W[] +# For example, if we load W as follows: +# %xmm0: w[0x0] w[0x1] w[0x2] w[0x3] +# %xmm4: w[0x4] w[0x5] w[0x6] w[0x7] +# %xmm8: w[0x8] w[0x9] w[0xa] w[0xb] +# %xmm12: w[0xc] w[0xd] w[0xe] w[0xf] +# then the xor'ing operation to generate next W[0..3] is: +# movaps %xmm0, %xmmT2 +# palignr $0x8, %xmm4, %xmmT2 # form (w[0x2],w[0x3],w[0x4],w[0x5]) +# # Right-shifts xmm4:xmmT2 by 8 bytes. Writes shifted result to xmmT2. SSSE3 insn. +# movaps %xmm0, %xmmT13 +# palignr $0x4,%xmm0,%xmmT13 # form (w[0xd],w[0xe],w[0xf],w[0x0]) +# xmm0 = xmm0 ^ t2 ^ xmm8 ^ t13 +# xmm0 = rol32(xmm0,1) # no such insn, have to use pslld+psrld+or +# and then results can be extracted for use: +# movd %xmm0, %esi # new W[0] +# pextrd $1, %xmm0, %esi # new W[1] +# # SSE4.1 insn. Can use EXTRACTPS (also SSE4.1) +# pextrd $2, %xmm0, %esi # new W[2] +# pextrd $3, %xmm0, %esi # new W[3] +# ... but this requires SSE4.1 and SSSE3, which are not universally available on x86-64. + echo \ '### Generated by hash_md5_sha_x86-64.S.sh ### From bugzilla at busybox.net Sat Jan 8 21:44:29 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sat, 08 Jan 2022 21:44:29 +0000 Subject: [Bug 14381] busybox awk '$2 == var' can fail to give only lines with given search string In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14381 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #2 from Denys Vlasenko --- Fixed in git. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sat Jan 8 21:59:49 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sat, 8 Jan 2022 22:59:49 +0100 Subject: [git commit] ls: implement ls -sh (human-readable allocated blocks) Message-ID: <20220108215418.1CBF183455@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=ff8fda848284e82d97299806b31c196651b372a5 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta display_single 979 1018 +39 Signed-off-by: Denys Vlasenko --- coreutils/ls.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/coreutils/ls.c b/coreutils/ls.c index 48f5eb482..b69b80460 100644 --- a/coreutils/ls.c +++ b/coreutils/ls.c @@ -498,9 +498,16 @@ static NOINLINE unsigned display_single(const struct dnode *dn) if (opt & OPT_i) /* show inode# */ column += printf("%7llu ", (long long) dn->dn_ino); -//TODO: -h should affect -s too: - if (opt & OPT_s) /* show allocated blocks */ - column += printf("%6"OFF_FMT"u ", (off_t) (dn->dn_blocks >> 1)); + if (opt & OPT_s) { /* show allocated blocks */ + if (opt & OPT_h) { + column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ", + /* print size, show one fractional, use suffixes */ + make_human_readable_str((off_t)dn->dn_blocks << 9, 1, 0) + ); + } else { + column += printf("%6"OFF_FMT"u ", (off_t)(dn->dn_blocks >> 1)); + } + } if (opt & OPT_l) { /* long listing: show mode */ char modestr[12]; From bugzilla at busybox.net Sat Jan 8 22:00:40 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sat, 08 Jan 2022 22:00:40 +0000 Subject: [Bug 14386] ls -sh does not show human readable size In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14386 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Denys Vlasenko --- Fixed in git. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Mon Jan 10 01:23:56 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 10 Jan 2022 01:23:56 +0000 Subject: [Bug 14446] gzip: Please add --rsyncable support In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14446 --- Comment #1 from Denys Vlasenko --- Do you have a patch? -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Mon Jan 10 20:52:43 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 10 Jan 2022 20:52:43 +0000 Subject: [Bug 14506] New: "makesave" returns "0" if a device_table file does not exist Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14506 Bug ID: 14506 Summary: "makesave" returns "0" if a device_table file does not exist Product: Busybox Version: unspecified Hardware: All OS: Linux Status: NEW Severity: normal Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: 4yg_4mm6p8ifiten at byom.de CC: busybox-cvs at busybox.net Target Milestone: --- BusyBox version v1.34.1, which could not be selected "makedevs" returns "0" if it fails because a device_table file does not exist return code should be >0: $ makedevs -d /does/not/exist /tmp ; echo ret:$? rootdir=/tmp table='/does/not/exist' makedevs: /does/not/exist: No such file or directory ret:0 return code as expected >0: $ makedevs -d /does/not/exist /does/not/exist ; echo ret:$? makedevs: can't change directory to '/does/not/exist': No such file or directory ret:1 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Mon Jan 10 20:54:16 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 10 Jan 2022 20:54:16 +0000 Subject: [Bug 14506] "makedevs" returns "0" if a device_table file does not exist In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14506 4yg_4mm6p8ifiten at byom.de changed: What |Removed |Added ---------------------------------------------------------------------------- Summary|"makesave" returns "0" if a |"makedevs" returns "0" if a |device_table file does not |device_table file does not |exist |exist -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 11 13:23:53 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 11 Jan 2022 13:23:53 +0000 Subject: [Bug 14326] [PATCH] pkill: add -e to display the name and PID of the process being killed In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14326 --- Comment #1 from Denys Vlasenko --- pkill from procps-ng-3.3.15 has no such option. Do you propose it as an extension? -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 11 14:12:00 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 11 Jan 2022 14:12:00 +0000 Subject: [Bug 14511] New: when login or run passwd as root it comes out with id 0 unknown. Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14511 Bug ID: 14511 Summary: when login or run passwd as root it comes out with id 0 unknown. Product: Busybox Version: 1.33.x Hardware: All OS: Linux Status: NEW Severity: normal Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: georgimirzoyan at gmail.com CC: busybox-cvs at busybox.net Target Milestone: --- It seems that it's a parsing issue. When my /etc/passwd is root::0:0:root:/root:/bin/bash Error is there. When I manually put x in /etc/passwd like this root:x:0:0:root:/root:/bin/bash then it works fine I am able to change the password. When I revert the commit c9fc15359ef8fe5aa98ab0308c1563d9bcf99bb8 it work fine. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 11 14:14:10 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 11 Jan 2022 14:14:10 +0000 Subject: [Bug 14511] when login or run passwd as root it comes out with id 0 unknown. In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14511 --- Comment #1 from Georgi --- My config is: grep PASSWD .config CONFIG_FEATURE_SHADOWPASSWDS=y CONFIG_CHPASSWD=y CONFIG_FEATURE_DEFAULT_PASSWD_ALGO="des" CONFIG_MKPASSWD=y CONFIG_PASSWD=y CONFIG_FEATURE_PASSWD_WEAK_CHECK=y -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 11 15:11:51 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 11 Jan 2022 15:11:51 +0000 Subject: [Bug 14326] [PATCH] pkill: add -e to display the name and PID of the process being killed In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14326 --- Comment #2 from Louis Sautier --- This seems weird to me, this option was added years ago in https://gitlab.com/procps-ng/procps/-/commit/bc974b3c335c354192e510b34928877dad206198 I tried on Debian Buster which has the version you mentioned: root at 9dce64e0e00d:/# pkill -h |& grep echo -e, --echo display what is killed root at 9dce64e0e00d:/# pkill --version pkill from procps-ng 3.3.15 -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Tue Jan 11 18:32:16 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 11 Jan 2022 19:32:16 +0100 Subject: [git commit] nmeter: %[md] %[mw] - dirty file-backed pages, writeback pages Message-ID: <20220111182654.810FF80236@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=524fecf7c3496e25cca235506b2a3328cee63de8 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta collect_mem 333 387 +54 .rodata 104369 104380 +11 packed_usage 34184 34175 -9 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 65/-9) Total: 56 bytes Signed-off-by: Denys Vlasenko --- procps/nmeter.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/procps/nmeter.c b/procps/nmeter.c index f08938654..2310e9844 100644 --- a/procps/nmeter.c +++ b/procps/nmeter.c @@ -27,6 +27,8 @@ //usage: "\n (displays: S:system U:user N:niced D:iowait I:irq i:softirq)" //usage: "\n %[nINTERFACE] Network INTERFACE" //usage: "\n %m Allocated memory" +//usage: "\n %[md] Dirty file-backed memory" +//usage: "\n %[mw] Memory being written to storage" //usage: "\n %[mf] Free memory" //usage: "\n %[mt] Total memory" //usage: "\n %s Allocated swap" @@ -208,7 +210,7 @@ enum conv_type { // Reads decimal values from line. Values start after key, for example: // "cpu 649369 0 341297 4336769..." - key is "cpu" here. // Values are stored in vec[]. -// posbits is a bit lit of positions we are interested in. +// posbits is a bit list of positions we are interested in. // for example: 00100110 - we want 1st, 2nd and 5th value. // posbits.bit0 encodes conversion type. static int rdval(const char* p, const char* key, ullong *vec, long posbits) @@ -661,13 +663,31 @@ S_STAT_END(mem_stat) //Hugepagesize: 4096 kB static void FAST_FUNC collect_mem(mem_stat *s) { - ullong m_total = 0; - ullong m_free = 0; - ullong m_bufs = 0; - ullong m_cached = 0; - ullong m_slab = 0; + ullong m_total; + ullong m_free; + ullong m_bufs; + ullong m_cached; + ullong m_slab; - if (rdval(get_file(&proc_meminfo), "MemTotal:", &m_total, 1 << 1)) { + const char *meminfo = get_file(&proc_meminfo); + + if (s->opt == 'd' /* dirty page cache */ + || s->opt == 'w' /* under writeback */ + ) { + m_total = 0; /* temporary reuse m_total */ + if (rdval(meminfo, + (s->opt == 'd' ? "Dirty:" : "Writeback:"), + &m_total, 1 << 1) + ) { + put_question_marks(4); + return; + } + scale(m_total << 10); + return; + } + + m_total = 0; + if (rdval(meminfo, "MemTotal:", &m_total, 1 << 1)) { put_question_marks(4); return; } @@ -676,10 +696,14 @@ static void FAST_FUNC collect_mem(mem_stat *s) return; } - if (rdval(proc_meminfo.file, "MemFree:", &m_free , 1 << 1) - || rdval(proc_meminfo.file, "Buffers:", &m_bufs , 1 << 1) - || rdval(proc_meminfo.file, "Cached:", &m_cached, 1 << 1) - || rdval(proc_meminfo.file, "Slab:", &m_slab , 1 << 1) + m_free = 0; + m_bufs = 0; + m_cached = 0; + m_slab = 0; + if (rdval(meminfo, "MemFree:", &m_free , 1 << 1) + || rdval(meminfo, "Buffers:", &m_bufs , 1 << 1) + || rdval(meminfo, "Cached:", &m_cached, 1 << 1) + || rdval(meminfo, "Slab:", &m_slab , 1 << 1) ) { put_question_marks(4); return; From vda.linux at googlemail.com Wed Jan 12 16:21:14 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 12 Jan 2022 17:21:14 +0100 Subject: [git commit] reset: better --help text Message-ID: <20220112161511.E45DB8348E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=004cefa918483513a9eca13e7701c74dff160e95 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta packed_usage 34175 34233 +58 Signed-off-by: Denys Vlasenko --- console-tools/reset.c | 2 +- shell/ash.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/console-tools/reset.c b/console-tools/reset.c index 113bb5c76..b3acf69f8 100644 --- a/console-tools/reset.c +++ b/console-tools/reset.c @@ -21,7 +21,7 @@ //usage:#define reset_trivial_usage //usage: "" //usage:#define reset_full_usage "\n\n" -//usage: "Reset the screen" +//usage: "Reset terminal (ESC codes) and termios (signals, buffering, echo)" /* "Standard" version of this tool is in ncurses package */ diff --git a/shell/ash.c b/shell/ash.c index 4a8ec0c03..4c5dd1298 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -11358,7 +11358,7 @@ options(int *login_sh) int val; int c; - if (login_sh) + if (login_sh != NULL) /* if we came from startup code */ minusc = NULL; while ((p = *argptr) != NULL) { c = *p++; @@ -11369,7 +11369,7 @@ options(int *login_sh) if (c == '-') { val = 1; if (p[0] == '\0' || LONE_DASH(p)) { - if (!login_sh) { + if (login_sh == NULL) { /* we came from setcmd() */ /* "-" means turn off -x and -v */ if (p[0] == '\0') xflag = vflag = 0; @@ -11382,7 +11382,7 @@ options(int *login_sh) } /* first char was + or - */ while ((c = *p++) != '\0') { - if (login_sh) { + if (login_sh != NULL) { /* if we came from startup code */ /* bash 3.2 indeed handles -c CMD and +c CMD the same */ if (c == 'c') { minusc = p; /* command is after shell args */ @@ -11406,6 +11406,9 @@ options(int *login_sh) if (strcmp(p, "login") == 0) { *login_sh = 1; } +/* TODO: --noprofile: e.g. if I want to run emergency shell from sulogin, + * I want minimal/no shell init scripts - but it insists on running it as "-ash"... + */ break; } } From bugzilla at busybox.net Wed Jan 12 16:24:39 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 12 Jan 2022 16:24:39 +0000 Subject: [Bug 14516] New: sh: possible wrong behaviour with patterns using a quoted ^ at the start of a bracket expression Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14516 Bug ID: 14516 Summary: sh: possible wrong behaviour with patterns using a quoted ^ at the start of a bracket expression Product: Busybox Version: 1.30.x Hardware: All OS: All Status: NEW Severity: major Priority: P5 Component: Standard Compliance Assignee: unassigned at busybox.net Reporter: calestyo at scientia.org CC: busybox-cvs at busybox.net Target Milestone: --- Hey. I recently got some big deal of help from the people at the help-bash mailing list when I've tried to understand what POSIX mandates with respect to pattern matching (that is in the sense of [0], not Basic/Extended Regular Expressions). I'm still not so sure whether I understand it exactly by the wording of POSIX itself (which seems a bit odd to me), but what people explained[1] me at help-bash - and I hope I explain it correctly - is: In the patterns, even in a bracket expression in a pattern, there may be quoting (with double and single quotes), and - and this is key - anything that is quoted is already taken literal with respect to the pattern. So when one has e.g. a case compound command: case $foo in (['*?']) ... is already the literal * and ? within a pattern's bracket expression. Further, POSIX says: "If an open bracket introduces a bracket expression as in XBD RE Bracket Expression, except that the character ( '!' ) shall replace the character ( '^' ) in its role in a non-matching list in the regular expression notation, it shall introduce a pattern bracket expression. A bracket expression starting with an unquoted character produces unspecified results. Otherwise, '[' shall match the character itself." I found not the following probably wrong behaviour of dash and busybox' sh: $ cat circumflex-test case "$1" in (['^.a']) echo match ;; (*) echo else esac $ cat exclamation-test case "$1" in (['!.a']) echo match ;; (*) echo else esac $ cat run-circumflex echo dash: dash circumflex-test ^ dash circumflex-test . dash circumflex-test a echo busybox-sh: busybox sh circumflex-test ^ busybox sh circumflex-test . busybox sh circumflex-test a echo bash: bash circumflex-test ^ bash circumflex-test . bash circumflex-test a echo klibc-sh: /usr/lib/klibc/bin/sh circumflex-test ^ /usr/lib/klibc/bin/sh circumflex-test . /usr/lib/klibc/bin/sh circumflex-test a $ cat run-exlamation echo dash: dash exclamation-test '!' dash exclamation-test . dash exclamation-test a echo busybox-sh: busybox sh exclamation-test '!' busybox sh exclamation-test . busybox sh exclamation-test a echo bash: bash exclamation-test '!' bash exclamation-test . bash exclamation-test a echo klibc-sh: /usr/lib/klibc/bin/sh exclamation-test '!' /usr/lib/klibc/bin/sh exclamation-test . /usr/lib/klibc/bin/sh exclamation-test a When run: $ sh run-circumflex | paste - - - - | column -t dash: match else else busybox-sh: match else else bash: match match match klibc-sh: match match match $ ^ . a $ sh run-exlamation | paste - - - - | column -t dash: match match match busybox-sh: match match match bash: match match match klibc-sh: match match match $ ! . a The results for the run-circumflex seem pretty odd. Apparently, the ^ is taken literally, but the other two are negated. $ dash circumflex-test b match $ busybox sh circumflex-test b match match again (which, AFAIU, they should not). While POSIX does say: "A bracket expression starting with an unquoted character produces unspecified results." ... the circumflex *is* quoted above... I haven't verified any further unusual patterns like ['$var'] vs. ["$var"], so maybe an eye should be kept open, whether there could be any issues as well. Thanks, Chris. [0] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 [1] https://lists.gnu.org/archive/html/help-bash/2022-01/msg00000.html -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 12 16:42:51 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 12 Jan 2022 16:42:51 +0000 Subject: [Bug 14516] sh: possible wrong behaviour with patterns using a quoted ^ at the start of a bracket expression In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14516 --- Comment #1 from Christoph Anton Mitterer --- For the records, the bug over at dash's mailing list: https://lore.kernel.org/dash/09fd70edcf08c75b4f9f2e158e3e6c0832d35d90.camel at scientia.org/T/#u -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Wed Jan 12 22:19:11 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Wed, 12 Jan 2022 23:19:11 +0100 Subject: [git commit] sulogin: increase util-linux compatibility Message-ID: <20220112221510.507F782B5A@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=d162a7b978a98b910e410dc10a40d5de12db0419 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Change to root's HOME. Set some envvars. Steal ctty if necessary and possible. function old new delta sulogin_main 240 340 +100 setup_environment 225 233 +8 su_main 479 474 -5 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 108/-5) Total: 103 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 18 ++++++++++-------- libbb/setup_environment.c | 8 ++++---- loginutils/sulogin.c | 23 +++++++++++++++++++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/include/libbb.h b/include/libbb.h index daa310776..a0ffbef62 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1726,15 +1726,16 @@ extern void selinux_or_die(void) FAST_FUNC; /* setup_environment: - * if chdir pw->pw_dir: ok: else if to_tmp == 1: goto /tmp else: goto / or die - * if clear_env = 1: cd(pw->pw_dir), clear environment, then set + * if !SETUP_ENV_NO_CHDIR: + * if cd(pw->pw_dir): ok: else if SETUP_ENV_TO_TMP: cd(/tmp) else: cd(/) or die + * if SETUP_ENV_CLEARENV: cd(pw->pw_dir), clear environment, then set * TERM=(old value) * USER=pw->pw_name, LOGNAME=pw->pw_name * PATH=bb_default_[root_]path * HOME=pw->pw_dir * SHELL=shell - * else if change_env = 1: - * if not root (if pw->pw_uid != 0): + * else if SETUP_ENV_CHANGEENV: + * if not root (if pw->pw_uid != 0) or if SETUP_ENV_CHANGEENV_LOGNAME: * USER=pw->pw_name, LOGNAME=pw->pw_name * HOME=pw->pw_dir * SHELL=shell @@ -1743,10 +1744,11 @@ extern void selinux_or_die(void) FAST_FUNC; * NB: CHANGEENV and CLEARENV use setenv() - this leaks memory! * If setup_environment() is used is vforked child, this leaks memory _in parent too_! */ -#define SETUP_ENV_CHANGEENV (1 << 0) -#define SETUP_ENV_CLEARENV (1 << 1) -#define SETUP_ENV_TO_TMP (1 << 2) -#define SETUP_ENV_NO_CHDIR (1 << 4) +#define SETUP_ENV_CHANGEENV (1 << 0) +#define SETUP_ENV_CHANGEENV_LOGNAME (1 << 1) +#define SETUP_ENV_CLEARENV (1 << 2) +#define SETUP_ENV_TO_TMP (1 << 3) +#define SETUP_ENV_NO_CHDIR (1 << 4) void setup_environment(const char *shell, int flags, const struct passwd *pw) FAST_FUNC; void nuke_str(char *str) FAST_FUNC; #if ENABLE_FEATURE_SECURETTY && !ENABLE_PAM diff --git a/libbb/setup_environment.c b/libbb/setup_environment.c index f8de44967..df2983958 100644 --- a/libbb/setup_environment.c +++ b/libbb/setup_environment.c @@ -54,15 +54,15 @@ void FAST_FUNC setup_environment(const char *shell, int flags, const struct pass xsetenv("TERM", term); xsetenv("PATH", (pw->pw_uid ? bb_default_path : bb_default_root_path)); goto shortcut; - // No, gcc (4.2.1) is not clever enougn to do it itself. + // No, gcc (4.2.1) is not clever enough to do it itself. //xsetenv("USER", pw->pw_name); //xsetenv("LOGNAME", pw->pw_name); //xsetenv("HOME", pw->pw_dir); //xsetenv("SHELL", shell); } else if (flags & SETUP_ENV_CHANGEENV) { - /* Set HOME, SHELL, and if not becoming a super-user, - * USER and LOGNAME. */ - if (pw->pw_uid) { + /* Set HOME, SHELL, and if not becoming a super-user + * or if SETUP_ENV_CHANGEENV_LOGNAME, USER and LOGNAME. */ + if ((flags & SETUP_ENV_CHANGEENV_LOGNAME) || pw->pw_uid != 0) { shortcut: xsetenv("USER", pw->pw_name); xsetenv("LOGNAME", pw->pw_name); diff --git a/loginutils/sulogin.c b/loginutils/sulogin.c index 69d8b5ec7..5f1c1178f 100644 --- a/loginutils/sulogin.c +++ b/loginutils/sulogin.c @@ -28,6 +28,7 @@ int sulogin_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int sulogin_main(int argc UNUSED_PARAM, char **argv) { + int tsid; int timeout = 0; struct passwd *pwd; const char *shell; @@ -88,6 +89,28 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) if (!shell) shell = pwd->pw_shell; + /* util-linux 2.36.1 compat: cd to root's HOME, set a few envvars */ + setup_environment(shell, SETUP_ENV_CHANGEENV | SETUP_ENV_CHANGEENV_LOGNAME, pwd); + // no SETUP_ENV_CLEARENV + // SETUP_ENV_CHANGEENV[+LOGNAME] - set HOME, SHELL, USER,and LOGNAME + // no SETUP_ENV_NO_CHDIR - IOW: cd to $HOME + + /* util-linux 2.36.1 compat: steal ctty if we don't have it yet + * (yes, util-linux uses force=1) */ + tsid = tcgetsid(STDIN_FILENO); + if (tsid < 0 || getpid() != tsid) { + if (ioctl(STDIN_FILENO, TIOCSCTTY, /*force:*/ (long)1) != 0) { +// bb_perror_msg("TIOCSCTTY1 tsid:%d", tsid); + if (setsid() > 0) { +// bb_error_msg("done setsid()"); + /* If it still does not work, ignore */ + if (ioctl(STDIN_FILENO, TIOCSCTTY, /*force:*/ (long)1) != 0) { +// bb_perror_msg("TIOCSCTTY2 tsid:%d", tsid); + } + } + } + } + /* Exec login shell with no additional parameters. Never returns. */ exec_login_shell(shell); } From vda.linux at googlemail.com Thu Jan 13 00:15:10 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 13 Jan 2022 01:15:10 +0100 Subject: [git commit] ash: ^C with SIG_INGed SIGINT should not exit the shell Message-ID: <20220113000936.87583832DB@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=68b402ee51f12f8c3b11638b042f57e025359faf branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta __pgetc 501 522 +21 Signed-off-by: Denys Vlasenko --- loginutils/login.c | 4 +++- loginutils/sulogin.c | 5 +++++ shell/ash.c | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/loginutils/login.c b/loginutils/login.c index 569053c12..cac4349b2 100644 --- a/loginutils/login.c +++ b/loginutils/login.c @@ -608,7 +608,9 @@ int login_main(int argc UNUSED_PARAM, char **argv) * But without this, bash 3.0 will not enable ctrl-c either. * Maybe bash is buggy? * Need to find out what standards say about /bin/login - - * should we leave SIGINT etc enabled or disabled? */ + * should we leave SIGINT etc enabled or disabled? + * Also note: sulogin does not do it! Why? + */ signal(SIGINT, SIG_DFL); /* Exec login shell with no additional parameters */ diff --git a/loginutils/sulogin.c b/loginutils/sulogin.c index 5f1c1178f..2f87c77c0 100644 --- a/loginutils/sulogin.c +++ b/loginutils/sulogin.c @@ -111,6 +111,11 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) } } + /* + * Note: login does this (should we do it too?): + */ + /*signal(SIGINT, SIG_DFL);*/ + /* Exec login shell with no additional parameters. Never returns. */ exec_login_shell(shell); } diff --git a/shell/ash.c b/shell/ash.c index 4c5dd1298..12b2db3a9 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -10784,18 +10784,24 @@ preadfd(void) line_input_state->path_lookup = pathval(); # endif reinit_unicode_for_ash(); + again: nr = read_line_input(line_input_state, cmdedit_prompt, buf, IBUFSIZ); if (nr == 0) { /* ^C pressed, "convert" to SIGINT */ write(STDOUT_FILENO, "^C", 2); raise(SIGINT); + /* raise(SIGINT) did not work! (e.g. if SIGINT + * is SIG_INGed on startup, it stays SIG_IGNed) + */ if (trap[SIGINT]) { buf[0] = '\n'; buf[1] = '\0'; return 1; } exitstatus = 128 + SIGINT; - return -1; + /* bash behavior on ^C + ignored SIGINT: */ + write(STDOUT_FILENO, "\n", 1); + goto again; } if (nr < 0) { if (errno == 0) { From vda.linux at googlemail.com Thu Jan 13 00:38:47 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 13 Jan 2022 01:38:47 +0100 Subject: [git commit] sulogin: start _login_ shell only with -p Message-ID: <20220113003320.D549B82C08@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=96771ec5f5fb81b479973fbd25af48c5cb83b2bb branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Also, shorten/eliminate messages to be closer to util-linux. function old new delta packed_usage 34233 34238 +5 sulogin_main 340 341 +1 .rodata 104380 104327 -53 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 6/-53) Total: -47 bytes Signed-off-by: Denys Vlasenko --- loginutils/sulogin.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/loginutils/sulogin.c b/loginutils/sulogin.c index 2f87c77c0..56b88c086 100644 --- a/loginutils/sulogin.c +++ b/loginutils/sulogin.c @@ -20,7 +20,8 @@ //usage: "[-t N] [TTY]" //usage:#define sulogin_full_usage "\n\n" //usage: "Single user login\n" -//usage: "\n -t N Timeout" +//usage: "\n -p Start a login shell" +//usage: "\n -t SEC Timeout" #include "libbb.h" #include @@ -30,6 +31,7 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) { int tsid; int timeout = 0; + unsigned opts; struct passwd *pwd; const char *shell; @@ -44,7 +46,7 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) logmode = LOGMODE_BOTH; openlog(applet_name, 0, LOG_AUTH); - getopt32(argv, "t:+", &timeout); + opts = getopt32(argv, "pt:+", &timeout); argv += optind; if (argv[0]) { @@ -64,8 +66,8 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) int r; r = ask_and_check_password_extended(pwd, timeout, - "Give root password for system maintenance\n" - "(or type Control-D for normal startup):" + "Give root password for maintenance\n" + "(or type Ctrl-D to continue): " ); if (r < 0) { /* ^D, ^C, timeout, or read error */ @@ -79,7 +81,8 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) bb_simple_info_msg("Login incorrect"); } - bb_simple_info_msg("starting shell for system maintenance"); + /* util-linux 2.36.1 compat: no message */ + /*bb_simple_info_msg("starting shell for system maintenance");*/ IF_SELINUX(renew_current_security_context()); @@ -116,6 +119,6 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) */ /*signal(SIGINT, SIG_DFL);*/ - /* Exec login shell with no additional parameters. Never returns. */ - exec_login_shell(shell); + /* Exec shell with no additional parameters. Never returns. */ + exec_shell(shell, /* -p? then shell is login:*/(opts & 1), NULL); } From vda.linux at googlemail.com Thu Jan 13 00:43:11 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 13 Jan 2022 01:43:11 +0100 Subject: [git commit] sulogin: util-linux does not say "normal startup" on Ctrl-D Message-ID: <20220113003720.8B33182DBE@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=b3eec1651fb02d70716caa355f49320719f74c75 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta sulogin_main 341 331 -10 .rodata 104327 104312 -15 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-25) Total: -25 bytes Signed-off-by: Denys Vlasenko --- loginutils/sulogin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loginutils/sulogin.c b/loginutils/sulogin.c index 56b88c086..c9817960c 100644 --- a/loginutils/sulogin.c +++ b/loginutils/sulogin.c @@ -71,7 +71,8 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) ); if (r < 0) { /* ^D, ^C, timeout, or read error */ - bb_simple_info_msg("normal startup"); + /* util-linux 2.36.1 compat: no message */ + /*bb_simple_info_msg("normal startup");*/ return 0; } if (r > 0) { From vda.linux at googlemail.com Thu Jan 13 11:56:10 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 13 Jan 2022 12:56:10 +0100 Subject: [git commit] libbb: introduce and use chdir_or_warn() Message-ID: <20220113152911.2A63C817D5@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=c2788f88f430da8ae5fb5f293b13fc2b167ea2fe branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta chdir_or_warn - 37 +37 send_cgi_and_exit 720 711 -9 xchdir 27 15 -12 setup_environment 233 217 -16 fork_job 449 433 -16 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/4 up/down: 37/-53) Total: -16 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/setup_environment.c | 3 +-- libbb/xfuncs_printf.c | 11 +++++++++-- miscutils/crond.c | 3 +-- networking/httpd.c | 3 +-- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/libbb.h b/include/libbb.h index 780e9ae7d..91b456915 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -645,6 +645,7 @@ void xsetgid(gid_t gid) FAST_FUNC; void xsetuid(uid_t uid) FAST_FUNC; void xsetegid(gid_t egid) FAST_FUNC; void xseteuid(uid_t euid) FAST_FUNC; +int chdir_or_warn(const char *path) FAST_FUNC; void xchdir(const char *path) FAST_FUNC; void xfchdir(int fd) FAST_FUNC; void xchroot(const char *path) FAST_FUNC; diff --git a/libbb/setup_environment.c b/libbb/setup_environment.c index 37777204e..3549e2099 100644 --- a/libbb/setup_environment.c +++ b/libbb/setup_environment.c @@ -37,8 +37,7 @@ void FAST_FUNC setup_environment(const char *shell, int flags, const struct pass /* Change the current working directory to be the home directory * of the user */ if (flags & SETUP_ENV_CHDIR) { - if (chdir(pw->pw_dir) != 0) { - bb_error_msg("can't change directory to '%s'", pw->pw_dir); + if (chdir_or_warn(pw->pw_dir) != 0) { xchdir((flags & SETUP_ENV_TO_TMP) ? "/tmp" : "/"); } } diff --git a/libbb/xfuncs_printf.c b/libbb/xfuncs_printf.c index fc630d176..842d10cd2 100644 --- a/libbb/xfuncs_printf.c +++ b/libbb/xfuncs_printf.c @@ -415,11 +415,18 @@ void FAST_FUNC xseteuid(uid_t euid) if (seteuid(euid)) bb_simple_perror_msg_and_die("seteuid"); } +int FAST_FUNC chdir_or_warn(const char *path) +{ + int r = chdir(path); + if (r != 0) + bb_perror_msg("can't change directory to '%s'", path); + return r; +} // Die if we can't chdir to a new path. void FAST_FUNC xchdir(const char *path) { - if (chdir(path)) - bb_perror_msg_and_die("can't change directory to '%s'", path); + if (chdir_or_warn(path) != 0) + xfunc_die(); } void FAST_FUNC xfchdir(int fd) diff --git a/miscutils/crond.c b/miscutils/crond.c index b74427351..1965af656 100644 --- a/miscutils/crond.c +++ b/miscutils/crond.c @@ -675,8 +675,7 @@ static void change_user(struct passwd *pas) { /* careful: we're after vfork! */ change_identity(pas); /* - initgroups, setgid, setuid */ - if (chdir(pas->pw_dir) < 0) { - bb_error_msg("can't change directory to '%s'", pas->pw_dir); + if (chdir_or_warn(pas->pw_dir) != 0) { xchdir(CRON_DIR); } } diff --git a/networking/httpd.c b/networking/httpd.c index 33045163f..ffc58e10b 100644 --- a/networking/httpd.c +++ b/networking/httpd.c @@ -1667,8 +1667,7 @@ static void send_cgi_and_exit( script = last_slash; if (script != url) { /* paranoia */ *script = '\0'; - if (chdir(url + 1) != 0) { - bb_perror_msg("can't change directory to '%s'", url + 1); + if (chdir_or_warn(url + 1) != 0) { goto error_execing_cgi; } // not needed: *script = '/'; From vda.linux at googlemail.com Thu Jan 13 11:50:48 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Thu, 13 Jan 2022 12:50:48 +0100 Subject: [git commit] libbb: invert the meaning of SETUP_ENV_NO_CHDIR -> SETUP_ENV_CHDIR Message-ID: <20220113152911.1D4548265E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=931c55f9e2b41473132683488820c6fb7c47506b branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Double negatives are hard to grok. function old new delta login_main 986 988 +2 su_main 474 470 -4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 2/-4) Total: -2 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 6 +++--- libbb/setup_environment.c | 5 +++-- loginutils/login.c | 4 +++- loginutils/su.c | 7 +++---- loginutils/sulogin.c | 9 ++++++--- miscutils/crontab.c | 4 ++-- shell/ash.c | 2 +- shell/hush.c | 2 +- 8 files changed, 22 insertions(+), 17 deletions(-) diff --git a/include/libbb.h b/include/libbb.h index a0ffbef62..780e9ae7d 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1726,7 +1726,7 @@ extern void selinux_or_die(void) FAST_FUNC; /* setup_environment: - * if !SETUP_ENV_NO_CHDIR: + * if SETUP_ENV_CHDIR: * if cd(pw->pw_dir): ok: else if SETUP_ENV_TO_TMP: cd(/tmp) else: cd(/) or die * if SETUP_ENV_CLEARENV: cd(pw->pw_dir), clear environment, then set * TERM=(old value) @@ -1734,7 +1734,7 @@ extern void selinux_or_die(void) FAST_FUNC; * PATH=bb_default_[root_]path * HOME=pw->pw_dir * SHELL=shell - * else if SETUP_ENV_CHANGEENV: + * else if SETUP_ENV_CHANGEENV | SETUP_ENV_CHANGEENV_LOGNAME: * if not root (if pw->pw_uid != 0) or if SETUP_ENV_CHANGEENV_LOGNAME: * USER=pw->pw_name, LOGNAME=pw->pw_name * HOME=pw->pw_dir @@ -1748,7 +1748,7 @@ extern void selinux_or_die(void) FAST_FUNC; #define SETUP_ENV_CHANGEENV_LOGNAME (1 << 1) #define SETUP_ENV_CLEARENV (1 << 2) #define SETUP_ENV_TO_TMP (1 << 3) -#define SETUP_ENV_NO_CHDIR (1 << 4) +#define SETUP_ENV_CHDIR (1 << 4) void setup_environment(const char *shell, int flags, const struct passwd *pw) FAST_FUNC; void nuke_str(char *str) FAST_FUNC; #if ENABLE_FEATURE_SECURETTY && !ENABLE_PAM diff --git a/libbb/setup_environment.c b/libbb/setup_environment.c index df2983958..37777204e 100644 --- a/libbb/setup_environment.c +++ b/libbb/setup_environment.c @@ -36,7 +36,7 @@ void FAST_FUNC setup_environment(const char *shell, int flags, const struct pass /* Change the current working directory to be the home directory * of the user */ - if (!(flags & SETUP_ENV_NO_CHDIR)) { + if (flags & SETUP_ENV_CHDIR) { if (chdir(pw->pw_dir) != 0) { bb_error_msg("can't change directory to '%s'", pw->pw_dir); xchdir((flags & SETUP_ENV_TO_TMP) ? "/tmp" : "/"); @@ -59,7 +59,8 @@ void FAST_FUNC setup_environment(const char *shell, int flags, const struct pass //xsetenv("LOGNAME", pw->pw_name); //xsetenv("HOME", pw->pw_dir); //xsetenv("SHELL", shell); - } else if (flags & SETUP_ENV_CHANGEENV) { + } else + if (flags & (SETUP_ENV_CHANGEENV|SETUP_ENV_CHANGEENV_LOGNAME)) { /* Set HOME, SHELL, and if not becoming a super-user * or if SETUP_ENV_CHANGEENV_LOGNAME, USER and LOGNAME. */ if ((flags & SETUP_ENV_CHANGEENV_LOGNAME) || pw->pw_uid != 0) { diff --git a/loginutils/login.c b/loginutils/login.c index cac4349b2..332238181 100644 --- a/loginutils/login.c +++ b/loginutils/login.c @@ -564,7 +564,9 @@ int login_main(int argc UNUSED_PARAM, char **argv) change_identity(pw); setup_environment(pw->pw_shell, - (!(opt & LOGIN_OPT_p) * SETUP_ENV_CLEARENV) + SETUP_ENV_CHANGEENV, + (!(opt & LOGIN_OPT_p) * SETUP_ENV_CLEARENV) + + SETUP_ENV_CHANGEENV + + SETUP_ENV_CHDIR, pw); #if ENABLE_PAM diff --git a/loginutils/su.c b/loginutils/su.c index e1db7590f..6efe1981a 100644 --- a/loginutils/su.c +++ b/loginutils/su.c @@ -176,10 +176,9 @@ int su_main(int argc UNUSED_PARAM, char **argv) change_identity(pw); setup_environment(opt_shell, - ((flags & SU_OPT_l) / SU_OPT_l * SETUP_ENV_CLEARENV) - + (!(flags & SU_OPT_mp) * SETUP_ENV_CHANGEENV) - + (!(flags & SU_OPT_l) * SETUP_ENV_NO_CHDIR), - pw); + ((flags & SU_OPT_l) ? (SETUP_ENV_CLEARENV + SETUP_ENV_CHDIR) : 0) + + (!(flags & SU_OPT_mp) * SETUP_ENV_CHANGEENV), + pw); IF_SELINUX(set_current_security_context(NULL);) if (opt_command) { diff --git a/loginutils/sulogin.c b/loginutils/sulogin.c index c9817960c..681022acb 100644 --- a/loginutils/sulogin.c +++ b/loginutils/sulogin.c @@ -94,10 +94,13 @@ int sulogin_main(int argc UNUSED_PARAM, char **argv) shell = pwd->pw_shell; /* util-linux 2.36.1 compat: cd to root's HOME, set a few envvars */ - setup_environment(shell, SETUP_ENV_CHANGEENV | SETUP_ENV_CHANGEENV_LOGNAME, pwd); + setup_environment(shell, 0 + + SETUP_ENV_CHANGEENV_LOGNAME + + SETUP_ENV_CHDIR + , pwd); // no SETUP_ENV_CLEARENV - // SETUP_ENV_CHANGEENV[+LOGNAME] - set HOME, SHELL, USER,and LOGNAME - // no SETUP_ENV_NO_CHDIR - IOW: cd to $HOME + // SETUP_ENV_CHANGEENV_LOGNAME - set HOME, SHELL, USER,and LOGNAME + // SETUP_ENV_CHDIR - cd to $HOME /* util-linux 2.36.1 compat: steal ctty if we don't have it yet * (yes, util-linux uses force=1) */ diff --git a/miscutils/crontab.c b/miscutils/crontab.c index 411a18a50..1111f4d54 100644 --- a/miscutils/crontab.c +++ b/miscutils/crontab.c @@ -55,8 +55,8 @@ static void edit_file(const struct passwd *pas, const char *file) /* initgroups, setgid, setuid */ change_identity(pas); setup_environment(pas->pw_shell, - SETUP_ENV_CHANGEENV | SETUP_ENV_TO_TMP, - pas); + SETUP_ENV_CHANGEENV | SETUP_ENV_TO_TMP | SETUP_ENV_CHDIR, + pas); ptr = getenv("VISUAL"); if (!ptr) { ptr = getenv("EDITOR"); diff --git a/shell/ash.c b/shell/ash.c index 12b2db3a9..ca5c755b6 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -10791,7 +10791,7 @@ preadfd(void) write(STDOUT_FILENO, "^C", 2); raise(SIGINT); /* raise(SIGINT) did not work! (e.g. if SIGINT - * is SIG_INGed on startup, it stays SIG_IGNed) + * is SIG_IGNed on startup, it stays SIG_IGNed) */ if (trap[SIGINT]) { buf[0] = '\n'; diff --git a/shell/hush.c b/shell/hush.c index 982fc356a..7d0dc67e4 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -10361,7 +10361,7 @@ int hush_main(int argc, char **argv) //it ignores TERM: // bash -i -c 'kill $$; echo ALIVE' // ALIVE -//it resets SIG_INGed HUP to SIG_DFL: +//it resets SIG_IGNed HUP to SIG_DFL: // trap '' hup; bash -i -c 'kill -hup $$; echo ALIVE' // Hangup [the message is not printed by bash, it's the shell which started it] //is talkative about jobs and exiting: From bugzilla at busybox.net Mon Jan 17 03:28:46 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 17 Jan 2022 03:28:46 +0000 Subject: [Bug 14526] New: possible wrong behaviour with patterns double [ with no closing ] Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14526 Bug ID: 14526 Summary: possible wrong behaviour with patterns double [ with no closing ] Product: Busybox Version: 1.30.x Hardware: All OS: All Status: NEW Severity: normal Priority: P5 Component: Standard Compliance Assignee: unassigned at busybox.net Reporter: calestyo at scientia.org CC: busybox-cvs at busybox.net Target Milestone: --- Hey. Assuming a pattern of: [.*^\] my understanding was that this would actually mean: - the literal string [. followed by - the pattern notation special character * (i.e. any string) followed by - the literal string ^] Because ] is escaped, it's to be taken literally and in a pattern a [ without corresponding ] is to be taken literally as well. (see POSIX, https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 ) That seems to work in busybox sh: $ case '[.^]' in ([.*^\]) echo match;; (*) echo no match;; esac match $ case '[.x^]' in ([.*^\]) echo match;; (*) echo no match;; esac match $ case '[.xx^]' in ([.*^\]) echo match;; (*) echo no match;; esac match However, adding another [ to the pattern: [.*^[\] which should be: - the literal string [. followed by - the pattern notation special character * (i.e. any string) followed by - the literal string ^[] No longer matches: $ case '[.^[]' in ([.*^[\]) echo match;; (*) echo no match;; esac no match $ case '[.x^[]' in ([.*^[\]) echo match;; (*) echo no match;; esac no match $ case '[.xx^[]' in ([.*^[\]) echo match;; (*) echo no match;; esac no match Whereas, AFAIU POSIX, it should. This works, btw. in bash, but neither in dash, nor klibc sh. Cheers, Chris. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Mon Jan 17 10:46:23 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 17 Jan 2022 11:46:23 +0100 Subject: [git commit] ash, hush: fix handling of SIGINT while waiting for interactive input Message-ID: <20220117182353.79B528348E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=12566e7f9b5e5c5d445bc4d36991d134b431dc6c branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta lineedit_read_key 160 237 +77 __pgetc 522 589 +67 fgetc_interactive 244 309 +65 safe_read_key - 39 +39 read_key 588 607 +19 record_pending_signo 23 32 +9 signal_handler 75 81 +6 .rodata 104312 104309 -3 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 6/1 up/down: 282/-3) Total: 279 bytes Signed-off-by: Denys Vlasenko --- editors/vi.c | 4 ++-- include/libbb.h | 5 +++- libbb/lineedit.c | 24 ++++++++++++++++--- libbb/read_key.c | 16 +++++++++++-- miscutils/hexedit.c | 2 +- miscutils/less.c | 4 ++-- procps/top.c | 2 +- shell/ash.c | 39 ++++++++++++++++++++++++------- shell/hush.c | 67 +++++++++++++++++++++++++++++++++++++---------------- 9 files changed, 122 insertions(+), 41 deletions(-) diff --git a/editors/vi.c b/editors/vi.c index 3dbe5b471..d37cd48a3 100644 --- a/editors/vi.c +++ b/editors/vi.c @@ -1122,7 +1122,7 @@ static int readit(void) // read (maybe cursor) key from stdin // on nonblocking stdin. // Note: read_key sets errno to 0 on success. again: - c = read_key(STDIN_FILENO, readbuffer, /*timeout:*/ -1); + c = safe_read_key(STDIN_FILENO, readbuffer, /*timeout:*/ -1); if (c == -1) { // EOF/error if (errno == EAGAIN) // paranoia goto again; @@ -4770,7 +4770,7 @@ static void edit_file(char *fn) uint64_t k; write1(ESC"[999;999H" ESC"[6n"); fflush_all(); - k = read_key(STDIN_FILENO, readbuffer, /*timeout_ms:*/ 100); + k = safe_read_key(STDIN_FILENO, readbuffer, /*timeout_ms:*/ 100); if ((int32_t)k == KEYCODE_CURSOR_POS) { uint32_t rc = (k >> 32); columns = (rc & 0x7fff); diff --git a/include/libbb.h b/include/libbb.h index 91b456915..b45ce91c5 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1908,6 +1908,8 @@ enum { * >=0: poll() for TIMEOUT milliseconds, return -1/EAGAIN on timeout */ int64_t read_key(int fd, char *buffer, int timeout) FAST_FUNC; +/* This version loops on EINTR: */ +int64_t safe_read_key(int fd, char *buffer, int timeout) FAST_FUNC; void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC; @@ -1961,7 +1963,8 @@ enum { USERNAME_COMPLETION = 4 * ENABLE_FEATURE_USERNAME_COMPLETION, VI_MODE = 8 * ENABLE_FEATURE_EDITING_VI, WITH_PATH_LOOKUP = 0x10, - FOR_SHELL = DO_HISTORY | TAB_COMPLETION | USERNAME_COMPLETION, + LI_INTERRUPTIBLE = 0x20, + FOR_SHELL = DO_HISTORY | TAB_COMPLETION | USERNAME_COMPLETION | LI_INTERRUPTIBLE, }; line_input_t *new_line_input_t(int flags) FAST_FUNC; #if ENABLE_FEATURE_EDITING_SAVEHISTORY diff --git a/libbb/lineedit.c b/libbb/lineedit.c index e14c78707..f76afd37d 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c @@ -2161,12 +2161,30 @@ static int lineedit_read_key(char *read_key_buffer, int timeout) * insist on full MB_CUR_MAX buffer to declare input like * "\xff\n",pause,"ls\n" invalid and thus won't lose "ls". * + * If LI_INTERRUPTIBLE, return -1 if got EINTR in poll() + * inside read_key, or if bb_got_signal != 0 (IOW: if signal + * arrived before poll() is reached). + * * Note: read_key sets errno to 0 on success. */ - IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) - ic = read_key(STDIN_FILENO, read_key_buffer, timeout); - IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 0;) + do { + if ((state->flags & LI_INTERRUPTIBLE) && bb_got_signal) { + errno = EINTR; + return -1; + } +//FIXME: still races here with signals, but small window to poll() inside read_key + IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) + ic = read_key(STDIN_FILENO, read_key_buffer, timeout); + IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 0;) + } while (!(state->flags & LI_INTERRUPTIBLE) && errno == EINTR); + if (errno) { + /* LI_INTERRUPTIBLE can bail out with EINTR here, + * but nothing really guarantees that bb_got_signal + * is nonzero. Follow the least surprise principle: + */ + if (errno == EINTR && bb_got_signal == 0) + bb_got_signal = 255; /* something nonzero */ #if ENABLE_UNICODE_SUPPORT if (errno == EAGAIN && unicode_idx != 0) goto pushback; diff --git a/libbb/read_key.c b/libbb/read_key.c index 03b7da656..829ae215c 100644 --- a/libbb/read_key.c +++ b/libbb/read_key.c @@ -126,7 +126,10 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) * if fd can be in non-blocking mode. */ if (timeout >= -1) { - if (safe_poll(&pfd, 1, timeout) == 0) { + n = poll(&pfd, 1, timeout); + if (n < 0 && errno == EINTR) + return n; + if (n == 0) { /* Timed out */ errno = EAGAIN; return -1; @@ -138,7 +141,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) * When we were reading 3 bytes here, we were eating * "li" too, and cat was getting wrong input. */ - n = safe_read(fd, buffer, 1); + n = read(fd, buffer, 1); if (n <= 0) return -1; } @@ -284,6 +287,15 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) goto start_over; } +int64_t FAST_FUNC safe_read_key(int fd, char *buffer, int timeout) +{ + int64_t r; + do { + r = read_key(fd, buffer, timeout); + } while (errno == EINTR); + return r; +} + void FAST_FUNC read_key_ungets(char *buffer, const char *str, unsigned len) { unsigned cur_len = (unsigned char)buffer[0]; diff --git a/miscutils/hexedit.c b/miscutils/hexedit.c index f8ff9b62b..15ad78377 100644 --- a/miscutils/hexedit.c +++ b/miscutils/hexedit.c @@ -292,7 +292,7 @@ int hexedit_main(int argc UNUSED_PARAM, char **argv) fflush_all(); G.in_read_key = 1; if (!bb_got_signal) - key = read_key(STDIN_FILENO, G.read_key_buffer, -1); + key = safe_read_key(STDIN_FILENO, G.read_key_buffer, -1); G.in_read_key = 0; if (bb_got_signal) key = CTRL('X'); diff --git a/miscutils/less.c b/miscutils/less.c index 82c4b21f0..8a0525cb7 100644 --- a/miscutils/less.c +++ b/miscutils/less.c @@ -1137,9 +1137,9 @@ static int64_t getch_nowait(void) #endif } - /* We have kbd_fd in O_NONBLOCK mode, read inside read_key() + /* We have kbd_fd in O_NONBLOCK mode, read inside safe_read_key() * would not block even if there is no input available */ - key64 = read_key(kbd_fd, kbd_input, /*timeout off:*/ -2); + key64 = safe_read_key(kbd_fd, kbd_input, /*timeout off:*/ -2); if ((int)key64 == -1) { if (errno == EAGAIN) { /* No keyboard input available. Since poll() did return, diff --git a/procps/top.c b/procps/top.c index 4cd545c69..804d6f258 100644 --- a/procps/top.c +++ b/procps/top.c @@ -913,7 +913,7 @@ static unsigned handle_input(unsigned scan_mask, duration_t interval) while (1) { int32_t c; - c = read_key(STDIN_FILENO, G.kbd_input, interval * 1000); + c = safe_read_key(STDIN_FILENO, G.kbd_input, interval * 1000); if (c == -1 && errno != EAGAIN) { /* error/EOF */ option_mask32 |= OPT_EOF; diff --git a/shell/ash.c b/shell/ash.c index 086773dd7..55df54bd0 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -3679,7 +3679,9 @@ signal_handler(int signo) if (!trap[SIGCHLD]) return; } - +#if ENABLE_FEATURE_EDITING + bb_got_signal = signo; /* for read_line_input: "we got a signal" */ +#endif gotsig[signo - 1] = 1; pending_sig = signo; @@ -10784,33 +10786,52 @@ preadfd(void) # endif reinit_unicode_for_ash(); again: -//BUG: not in INT_OFF/INT_ON section - SIGINT et al would longjmp out of read_line_input()! -//This would cause a memory leak in interactive shell -//(repeated internal allocations in read_line_input): -// (while kill -INT $$; do :; done) & + /* For shell, LI_INTERRUPTIBLE is set: + * read_line_input will abort on either + * getting EINTR in poll(), or if it sees bb_got_signal != 0 + * (IOW: if signal arrives before poll() is reached). + * Interactive testcases: + * (while kill -INT $$; do sleep 1; done) & + * #^^^ prints ^C, prints prompt, repeats + * trap 'echo I' int; (while kill -INT $$; do sleep 1; done) & + * #^^^ prints ^C, prints "I", prints prompt, repeats + * trap 'echo T' term; (while kill $$; do sleep 1; done) & + * #^^^ prints "T", prints prompt, repeats + * #(bash 5.0.17 exits after first "T", looks like a bug) + */ + bb_got_signal = 0; + INT_OFF; /* no longjmp'ing out of read_line_input please */ nr = read_line_input(line_input_state, cmdedit_prompt, buf, IBUFSIZ); + if (bb_got_signal == SIGINT) + write(STDOUT_FILENO, "^C\n", 3); + INT_ON; /* here non-blocked SIGINT will longjmp */ if (nr == 0) { /* ^C pressed, "convert" to SIGINT */ - write(STDOUT_FILENO, "^C", 2); - raise(SIGINT); + write(STDOUT_FILENO, "^C\n", 3); + raise(SIGINT); /* here non-blocked SIGINT will longjmp */ /* raise(SIGINT) did not work! (e.g. if SIGINT * is SIG_IGNed on startup, it stays SIG_IGNed) */ if (trap[SIGINT]) { + empty_line_input: buf[0] = '\n'; buf[1] = '\0'; return 1; } exitstatus = 128 + SIGINT; /* bash behavior on ^C + ignored SIGINT: */ - write(STDOUT_FILENO, "\n", 1); goto again; } if (nr < 0) { if (errno == 0) { - /* Ctrl+D pressed */ + /* ^D pressed */ nr = 0; } + else if (errno == EINTR) { /* got signal? */ + if (bb_got_signal != SIGINT) + write(STDOUT_FILENO, "\n", 1); + goto empty_line_input; + } # if ENABLE_ASH_IDLE_TIMEOUT else if (errno == EAGAIN && timeout > 0) { puts("\007timed out waiting for input: auto-logout"); diff --git a/shell/hush.c b/shell/hush.c index 7d0dc67e4..6dc2ecaac 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -918,6 +918,7 @@ struct globals { #if ENABLE_HUSH_INTERACTIVE smallint promptmode; /* 0: PS1, 1: PS2 */ #endif + /* set by signal handler if SIGINT is received _and_ its trap is not set */ smallint flag_SIGINT; #if ENABLE_HUSH_LOOPS smallint flag_break_continue; @@ -1944,6 +1945,9 @@ enum { static void record_pending_signo(int sig) { sigaddset(&G.pending_set, sig); +#if ENABLE_FEATURE_EDITING + bb_got_signal = sig; /* for read_line_input: "we got a signal" */ +#endif #if ENABLE_HUSH_FAST if (sig == SIGCHLD) { G.count_SIGCHLD++; @@ -2652,30 +2656,53 @@ static int get_user_input(struct in_str *i) for (;;) { reinit_unicode_for_hush(); G.flag_SIGINT = 0; - /* buglet: SIGINT will not make new prompt to appear _at once_, - * only after . (^C works immediately) */ - r = read_line_input(G.line_input_state, prompt_str, + + bb_got_signal = 0; + if (!sigisemptyset(&G.pending_set)) { + /* Whoops, already got a signal, do not call read_line_input */ + bb_got_signal = r = -1; + } else { + /* For shell, LI_INTERRUPTIBLE is set: + * read_line_input will abort on either + * getting EINTR in poll(), or if it sees bb_got_signal != 0 + * (IOW: if signal arrives before poll() is reached). + * Interactive testcases: + * (while kill -INT $$; do sleep 1; done) & + * #^^^ prints ^C, prints prompt, repeats + * trap 'echo I' int; (while kill -INT $$; do sleep 1; done) & + * #^^^ prints ^C, prints "I", prints prompt, repeats + * trap 'echo T' term; (while kill $$; do sleep 1; done) & + * #^^^ prints "T", prints prompt, repeats + * #(bash 5.0.17 exits after first "T", looks like a bug) + */ + r = read_line_input(G.line_input_state, prompt_str, G.user_input_buf, CONFIG_FEATURE_EDITING_MAX_LEN-1 - ); - /* read_line_input intercepts ^C, "convert" it to SIGINT */ - if (r == 0) { - raise(SIGINT); + ); + /* read_line_input intercepts ^C, "convert" it to SIGINT */ + if (r == 0) + raise(SIGINT); + } + /* bash prints ^C (before running a trap, if any) + * both on keyboard ^C and on real SIGINT (non-kbd generated). + */ + if (sigismember(&G.pending_set, SIGINT)) { + write(STDOUT_FILENO, "^C\n", 3); + G.last_exitcode = 128 | SIGINT; } check_and_run_traps(); - if (r != 0 && !G.flag_SIGINT) + if (r == 0) /* keyboard ^C? */ + continue; /* go back, read another input line */ + if (r > 0) /* normal input? (no ^C, no ^D, no signals) */ break; - /* ^C or SIGINT: repeat */ - /* bash prints ^C even on real SIGINT (non-kbd generated) */ - write(STDOUT_FILENO, "^C\n", 3); - G.last_exitcode = 128 | SIGINT; - } - if (r < 0) { - /* EOF/error detected */ - /* ^D on interactive input goes to next line before exiting: */ - write(STDOUT_FILENO, "\n", 1); - i->p = NULL; - i->peek_buf[0] = r = EOF; - return r; + if (!bb_got_signal) { + /* r < 0: ^D/EOF/error detected (but not signal) */ + /* ^D on interactive input goes to next line before exiting: */ + write(STDOUT_FILENO, "\n", 1); + i->p = NULL; + i->peek_buf[0] = r = EOF; + return r; + } + /* it was a signal: go back, read another input line */ } i->p = G.user_input_buf; return (unsigned char)*i->p++; From vda.linux at googlemail.com Sun Jan 16 22:54:46 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 16 Jan 2022 23:54:46 +0100 Subject: [git commit] shell: add comments about SIGINT-related problems Message-ID: <20220117182353.5B3088348E@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=a277506a64404e6c4472ff89c944c4f353db1c33 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- shell/ash.c | 13 ++++++++----- shell/shell_common.c | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/shell/ash.c b/shell/ash.c index ca5c755b6..086773dd7 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -664,7 +664,7 @@ raise_exception(int e) /* * Called when a SIGINT is received. (If the user specifies * that SIGINT is to be trapped or ignored using the trap builtin, then - * this routine is not called.) Suppressint is nonzero when interrupts + * this routine is not called.) suppress_int is nonzero when interrupts * are held using the INT_OFF macro. (The test for iflag is just * defensive programming.) */ @@ -695,13 +695,12 @@ raise_interrupt(void) } while (0) #endif -static IF_ASH_OPTIMIZE_FOR_SIZE(inline) void +static IF_NOT_ASH_OPTIMIZE_FOR_SIZE(inline) void int_on(void) { barrier(); - if (--suppress_int == 0 && pending_int) { + if (--suppress_int == 0 && pending_int) raise_interrupt(); - } } #if DEBUG_INTONOFF # define INT_ON do { \ @@ -711,7 +710,7 @@ int_on(void) #else # define INT_ON int_on() #endif -static IF_ASH_OPTIMIZE_FOR_SIZE(inline) void +static IF_NOT_ASH_OPTIMIZE_FOR_SIZE(inline) void force_int_on(void) { barrier(); @@ -10785,6 +10784,10 @@ preadfd(void) # endif reinit_unicode_for_ash(); again: +//BUG: not in INT_OFF/INT_ON section - SIGINT et al would longjmp out of read_line_input()! +//This would cause a memory leak in interactive shell +//(repeated internal allocations in read_line_input): +// (while kill -INT $$; do :; done) & nr = read_line_input(line_input_state, cmdedit_prompt, buf, IBUFSIZ); if (nr == 0) { /* ^C pressed, "convert" to SIGINT */ diff --git a/shell/shell_common.c b/shell/shell_common.c index 2e36d9208..13163acdf 100644 --- a/shell/shell_common.c +++ b/shell/shell_common.c @@ -196,6 +196,7 @@ shell_builtin_read(struct builtin_read_params *params) */ errno = 0; pfd[0].events = POLLIN; +//TODO race with a signal arriving just before the poll! if (poll(pfd, 1, timeout) <= 0) { /* timed out, or EINTR */ err = errno; From vda.linux at googlemail.com Mon Jan 17 22:59:46 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 17 Jan 2022 23:59:46 +0100 Subject: [git commit] fix "defined but not used" warnings Message-ID: <20220117225342.7327D82B42@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=8ad2acf352d790d0bdd792b8e126d58a088451f3 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- archival/libarchive/get_header_tar.c | 2 ++ miscutils/i2c_tools.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c index d26868bf8..cc6f3f0ad 100644 --- a/archival/libarchive/get_header_tar.c +++ b/archival/libarchive/get_header_tar.c @@ -147,11 +147,13 @@ static void process_pax_hdr(archive_handle_t *archive_handle, unsigned sz, int g #endif } +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS static void die_if_bad_fnamesize(off_t sz) { if ((uoff_t)sz > 0xfff) /* more than 4k?! no funny business please */ bb_simple_error_msg_and_die("bad archive"); } +#endif char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) { diff --git a/miscutils/i2c_tools.c b/miscutils/i2c_tools.c index e3741eeba..da26f5e19 100644 --- a/miscutils/i2c_tools.c +++ b/miscutils/i2c_tools.c @@ -120,6 +120,7 @@ static int32_t i2c_smbus_access(int fd, char read_write, uint8_t cmd, return ioctl(fd, I2C_SMBUS, &args); } +#if ENABLE_I2CGET || ENABLE_I2CSET || ENABLE_I2CDUMP || ENABLE_I2CDETECT static int32_t i2c_smbus_read_byte(int fd) { union i2c_smbus_data data; @@ -131,6 +132,7 @@ static int32_t i2c_smbus_read_byte(int fd) return data.byte; } +#endif #if ENABLE_I2CGET || ENABLE_I2CSET || ENABLE_I2CDUMP static int32_t i2c_smbus_write_byte(int fd, uint8_t val) From vda.linux at googlemail.com Mon Jan 17 23:36:42 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 18 Jan 2022 00:36:42 +0100 Subject: [git commit] libbb: shrink lineedit_read_key() Message-ID: <20220117233125.973F183545@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=1e825acf8d715fe49af040cb02f9e96c26955832 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta lineedit_read_key 237 231 -6 Signed-off-by: Denys Vlasenko --- archival/libarchive/decompress_bunzip2.c | 2 +- coreutils/head.c | 6 +++--- editors/patch.c | 2 +- editors/patch_toybox.c | 2 +- include/libbb.h | 2 ++ libbb/lineedit.c | 26 ++++++++++++++++---------- libbb/read_key.c | 1 + 7 files changed, 25 insertions(+), 16 deletions(-) diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c index 42e2b4f88..4a2b668aa 100644 --- a/archival/libarchive/decompress_bunzip2.c +++ b/archival/libarchive/decompress_bunzip2.c @@ -654,7 +654,7 @@ static int read_bunzip(bunzip_data *bd, char *outbuf, int len) /* Subtract the 1 copy we'd output anyway to get extras */ --bd->writeCopies; } - } /* for(;;) */ + } /* for (;;) */ /* Decompression of this input block completed successfully */ bd->writeCRC = CRC = ~CRC; diff --git a/coreutils/head.c b/coreutils/head.c index 9586f869f..c7537a20e 100644 --- a/coreutils/head.c +++ b/coreutils/head.c @@ -76,7 +76,7 @@ print_except_N_last_bytes(FILE *fp, unsigned count) { unsigned char *circle = xmalloc(++count); unsigned head = 0; - for(;;) { + for (;;) { int c; c = getc(fp); if (c == EOF) @@ -105,7 +105,7 @@ print_except_N_last_lines(FILE *fp, unsigned count) { char **circle = xzalloc((++count) * sizeof(circle[0])); unsigned head = 0; - for(;;) { + for (;;) { char *c; c = xmalloc_fgets(fp); if (!c) @@ -127,7 +127,7 @@ print_except_N_last_lines(FILE *fp, unsigned count) } ret: head = 0; - for(;;) { + for (;;) { free(circle[head++]); if (head == count) break; diff --git a/editors/patch.c b/editors/patch.c index 110176630..aebb5073e 100644 --- a/editors/patch.c +++ b/editors/patch.c @@ -418,7 +418,7 @@ int patch_main(int argc UNUSED_PARAM, char **argv) } // Loop through the lines in the patch - for(;;) { + for (;;) { char *patchline; patchline = xmalloc_fgetline(stdin); diff --git a/editors/patch_toybox.c b/editors/patch_toybox.c index aebab8132..69a508b2e 100644 --- a/editors/patch_toybox.c +++ b/editors/patch_toybox.c @@ -441,7 +441,7 @@ int patch_main(int argc UNUSED_PARAM, char **argv) TT.filein = TT.fileout = -1; // Loop through the lines in the patch - for(;;) { + for (;;) { char *patchline; patchline = get_line(TT.filepatch); diff --git a/include/libbb.h b/include/libbb.h index b45ce91c5..8e3b7ae8e 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1900,6 +1900,8 @@ enum { * (unless fd is in non-blocking mode), * subsequent reads will time out after a few milliseconds. * Return of -1 means EOF or error (errno == 0 on EOF). + * Nonzero errno is not preserved across the call: + * if there was no error, errno will be cleared to 0. * buffer[0] is used as a counter of buffered chars and must be 0 * on first call. * timeout: diff --git a/libbb/lineedit.c b/libbb/lineedit.c index f76afd37d..82624757e 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c @@ -2155,7 +2155,7 @@ static int lineedit_read_key(char *read_key_buffer, int timeout) #endif fflush_all(); - while (1) { + for (;;) { /* Wait for input. TIMEOUT = -1 makes read_key wait even * on nonblocking stdin, TIMEOUT = 50 makes sure we won't * insist on full MB_CUR_MAX buffer to declare input like @@ -2167,24 +2167,30 @@ static int lineedit_read_key(char *read_key_buffer, int timeout) * * Note: read_key sets errno to 0 on success. */ - do { + for (;;) { if ((state->flags & LI_INTERRUPTIBLE) && bb_got_signal) { errno = EINTR; return -1; } //FIXME: still races here with signals, but small window to poll() inside read_key IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) + /* errno = 0; - read_key does this itself */ ic = read_key(STDIN_FILENO, read_key_buffer, timeout); IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 0;) - } while (!(state->flags & LI_INTERRUPTIBLE) && errno == EINTR); + if (errno != EINTR) + break; + if (state->flags & LI_INTERRUPTIBLE) { + /* LI_INTERRUPTIBLE bails out on EINTR, + * but nothing really guarantees that bb_got_signal + * is nonzero. Follow the least surprise principle: + */ + if (bb_got_signal == 0) + bb_got_signal = 255; + goto ret; + } + } if (errno) { - /* LI_INTERRUPTIBLE can bail out with EINTR here, - * but nothing really guarantees that bb_got_signal - * is nonzero. Follow the least surprise principle: - */ - if (errno == EINTR && bb_got_signal == 0) - bb_got_signal = 255; /* something nonzero */ #if ENABLE_UNICODE_SUPPORT if (errno == EAGAIN && unicode_idx != 0) goto pushback; @@ -2251,7 +2257,7 @@ static int lineedit_read_key(char *read_key_buffer, int timeout) #endif break; } - + ret: return ic; } diff --git a/libbb/read_key.c b/libbb/read_key.c index 829ae215c..cf8ed411e 100644 --- a/libbb/read_key.c +++ b/libbb/read_key.c @@ -291,6 +291,7 @@ int64_t FAST_FUNC safe_read_key(int fd, char *buffer, int timeout) { int64_t r; do { + /* errno = 0; - read_key does this itself */ r = read_key(fd, buffer, timeout); } while (errno == EINTR); return r; From bugzilla at busybox.net Tue Jan 18 14:25:42 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 18 Jan 2022 14:25:42 +0000 Subject: [Bug 14526] possible wrong behaviour with patterns with double [ with no closing ] In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14526 Christoph Anton Mitterer changed: What |Removed |Added ---------------------------------------------------------------------------- Summary|possible wrong behaviour |possible wrong behaviour |with patterns double [ with |with patterns with double [ |no closing ] |with no closing ] -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 18 14:27:56 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 18 Jan 2022 14:27:56 +0000 Subject: [Bug 14526] possible wrong behaviour with patterns with double [ with no closing ] In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14526 --- Comment #1 from Christoph Anton Mitterer --- If busybox sh would use glibc's fnmatch() for this, then this may also be a bug there. See: https://lore.kernel.org/dash/YeZbt7nhvODBSL0I at gondor.apana.org.au/T/#m090520913643547feccac71ed6e6f48219fb5988 respectively: https://sourceware.org/bugzilla/show_bug.cgi?id=28792 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Fri Jan 21 11:43:15 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Fri, 21 Jan 2022 11:43:15 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gTmV3OiBBd2sgZnJvbSBidXN5Ym94LXYxLjM1?= =?UTF-8?B?LjAgZG9lc27igJl0IHdvcmsu?= Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 Bug ID: 14536 Summary: Awk from busybox-v1.35.0 doesn?t work. Product: Busybox Version: unspecified Hardware: Other OS: Other Status: NEW Severity: critical Priority: P5 Component: Standard Compliance Assignee: unassigned at busybox.net Reporter: aswerklon at gmail.com CC: busybox-cvs at busybox.net Target Milestone: --- Created attachment 9201 --> https://bugs.busybox.net/attachment.cgi?id=9201&action=edit the patch-revert awk to busybox-1.33.1. Form data handler
is not executed. awk problem from busybox-v1.35.0. Reverting awk itself to busybox-v1.33.1 and everything works. Description here: https://forum.openwrt.org/t/awk-from-busybox-v1-35-0-doesnt-work/117917 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Fri Jan 21 11:55:30 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Fri, 21 Jan 2022 11:55:30 +0000 Subject: [Bug 14526] possible wrong behaviour with patterns with double [ with no closing ] In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14526 melsem changed: What |Removed |Added ---------------------------------------------------------------------------- Version|1.30.x |unspecified Severity|normal |critical Hardware|All |Other OS|All |Other -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Fri Jan 21 16:06:09 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Fri, 21 Jan 2022 16:06:09 +0000 Subject: [Bug 14541] New: sed: s-command with "semi-special" delimiters get wrong behaviour Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14541 Bug ID: 14541 Summary: sed: s-command with "semi-special" delimiters get wrong behaviour Product: Busybox Version: 1.30.x Hardware: All OS: All Status: NEW Severity: major Priority: P5 Component: Standard Compliance Assignee: unassigned at busybox.net Reporter: calestyo at scientia.org CC: busybox-cvs at busybox.net Target Milestone: --- Hey. I recently looked into behaviour of sed implementations when unusual delimiters were used in: - context addresses - s-command The outcome was, that in my opinion POSIX itself is pretty ambiguous with respect to how it defines these and their parsing and semantics. See more about that here: - https://www.austingroupbugs.net/view.php?id=1550 (not so interesting, just minor clarification proposals) - https://www.austingroupbugs.net/view.php?id=1551 - https://www.austingroupbugs.net/view.php?id=1556 Especially issue 1551 gives several example where BusyBox' sed behaves considerably different from GNU's sed (which by itself is however not necessarily a bug, especially when even POSIX seems ambiguous). But there is one case at least, where I think BusyBox' sed is definitely wrong It's described in detail in: - https://austingroupbugs.net/view.php?id=1551 (the "main" report and there point (3) ) https://austingroupbugs.net/view.php?id=1551#c5611 (some additions to point "(3)") and a tabular overview given in: - https://austingroupbugs.net/view.php?id=1551#c5612 In short (using BREs as example): Consider a delimiter is used, that is by itself not a special character (again BREs as example) like (the non-standard) '+' which is however special when preceded by a '\' (that is itself not quoted). Same example would work with the (standard) '(', though with different effects. In BusyBox sed, the following happens: $ printf '%s\n' '9+' | busybox sed 's+9\++X+' X+ $ printf '%s\n' '99+' | busybox sed 's+9\++X+' X+ $ printf '%s\n' '999+' | busybox sed 's+9\++X+' X+ In GNU sed: $ printf '%s\n' '9+' | sed 's+9\++X+' X $ printf '%s\n' '99+' | sed 's+9\++X+' 9X $ printf '%s\n' '999+' | sed 's+9\++X+' 99X In BREs, '+' alone is never special, and the '\+' here is clearly the escape of the delimiter. Regardless of what POSIX actually means with "literal" (see the discussion in the tickets above), ... there is IMO no way to interpret it like what BusyBox seems to do, which is: - "un-delimiter" the '+' (because of it's preceding '\' ) - still keep the '\' with respect to the RE and give special meaning to the '+' That's like "doubling" the effect of the '\'. The https://austingroupbugs.net/view.php?id=1551#c5611 mentioned above, describes how dangerous this actually is. Cause even if one uses delimiters, which don't seem "special" in any way, like with: $ printf '%s\n' '9' | busybox sed 'sw9\wwXw' 9 $ printf '%s\n' '99' | busybox sed 'sw9\wwXw' X $ printf '%s\n' '999' | busybox sed 'sw9\wwXw' X9 BusyBox behaviour get's odd, when that normal character has some special meaning, when preceded by a '\' and when that sequence has than some (non-standard) special meaning (like '\w'). GNU sed handles these like: $ printf '%s\n' '99' | sed 'sw9\wwXw' 99 $ printf '%s\n' '9w' | sed 'sw9\wwXw' X so it effectively takes the \w ... makes it a non-delimiter (and removes the escaping), being just left with the character 'w' which is by itself literal. I'd guess that all this might also apply to context addresses in BusyBox sed. Not sure whether EREs are also affect in some weird way. Thanks, Chris. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sun Jan 23 11:57:27 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 12:57:27 +0100 Subject: [git commit] libbb/sha1: use SSE2 in unrolled x86-64 code. ~10% faster Message-ID: <20220123120001.AEAE682A14@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=39369ff460f3e2dbfec7f6be181b2fb98f3c1867 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta .rodata 108241 108305 +64 sha1_process_block64 3502 3495 -7 ------------------------------------------------------------------------------ (add/remove: 5/0 grow/shrink: 1/1 up/down: 64/-7) Total: 57 bytes Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S | 992 +++++++++++++++++++++++------------------ libbb/hash_md5_sha_x86-64.S.sh | 440 ++++++++++++------ 2 files changed, 854 insertions(+), 578 deletions(-) Patch is too large, so refusing to show it From vda.linux at googlemail.com Sun Jan 23 14:46:05 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 15:46:05 +0100 Subject: [git commit] add busybox_ldscript.README.txt Message-ID: <20220123143927.8314E81E38@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=33a9f34df5c53d3dd074a2168ff40d612a36667a branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- busybox_ldscript.README.txt | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/busybox_ldscript.README.txt b/busybox_ldscript.README.txt new file mode 100644 index 000000000..1625a970a --- /dev/null +++ b/busybox_ldscript.README.txt @@ -0,0 +1,47 @@ +/* Add SORT_BY_ALIGNMENT to linker script (found in busybox_unstripped.out): +## .rodata : { *(.rodata SORT_BY_ALIGNMENT(.rodata.*) .gnu.linkonce.r.*) } +## .data : { *(.data SORT_BY_ALIGNMENT(.data.*) .gnu.linkonce.d.*) } +## .bss : { *(.bss SORT_BY_ALIGNMENT(.bss.*) .gnu.linkonce.b.*) } +## This will eliminate most of the padding (~3kb). +## Hmm, "ld --sort-section alignment" should do it too. +## +## There is a ld hack which is meant to decrease disk usage +## at the cost of more RAM usage (??!!) in standard ld script: +## . = ALIGN (0x1000) - ((0x1000 - .) & (0x1000 - 1)); . = DATA_SEGMENT_ALIGN (0x1000, 0x1000); +## Replace it with: +## . = ALIGN (0x1000); . = DATA_SEGMENT_ALIGN (0x1000, 0x1000); +## to unconditionally align .data to the next page boundary, +## instead of "next page, plus current offset in this page" +*/ + +/* To reduce the number of VMAs each bbox process has, +## move *(.bss SORT_BY_ALIGNMENT(.bss.*) ...) +## part from .bss : {...} block to .data : { ... } block. +## (This usually increases .data section by only one page). +## Result: +## +## text data bss dec hex filename +## 1050792 560 7580 1058932 102874 busybox.bss +## 1050792 8149 0 1058941 10287d busybox.nobss +## +## $ exec busybox.bss pmap $$ +## 0000000008048000 1028K r-xp /path/to/busybox.bss +## 0000000008149000 8K rw-p /path/to/busybox.bss +## 000000000814b000 4K rw-p [ anon ] <---- this VMA is eliminated +## 00000000085f5000 4K ---p [heap] +## 00000000085f6000 4K rw-p [heap] +## 00000000f7778000 8K rw-p [ anon ] +## 00000000f777a000 12K r--p [vvar] +## 00000000f777d000 8K r-xp [vdso] +## 00000000ff7e9000 132K rw-p [stack] +## +## $ exec busybox.nobss pmap $$ +## 0000000008048000 1028K r-xp /path/to/busybox.nobss +## 0000000008149000 12K rw-p /path/to/busybox.nobss +## 00000000086f0000 4K ---p [heap] +## 00000000086f1000 4K rw-p [heap] +## 00000000f7783000 8K rw-p [ anon ] +## 00000000f7785000 12K r--p [vvar] +## 00000000f7788000 8K r-xp [vdso] +## 00000000ffac0000 132K rw-p [stack] +*/ From bugzilla at busybox.net Sun Jan 23 15:07:07 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sun, 23 Jan 2022 15:07:07 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gQXdrIGZyb20gYnVzeWJveC12MS4zNS4wIGRv?= =?UTF-8?B?ZXNu4oCZdCB3b3JrLg==?= In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 --- Comment #1 from Denys Vlasenko --- Please give the testcase which does not work. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sun Jan 23 17:48:49 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 18:48:49 +0100 Subject: [git commit] sed: fix handling of escaped delimiters in s/// search pattern, closes 14541 Message-ID: <20220123174249.1E04682980@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=e998c7c032458a05a7afcc13ce0dc980b99ecc6c branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta copy_parsing_escapes 67 96 +29 parse_regex_delim 109 111 +2 get_address 213 215 +2 add_cmd 1176 1178 +2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0) Total: 35 bytes Signed-off-by: Denys Vlasenko --- editors/sed.c | 19 +++++++++++-------- testsuite/sed.tests | 10 ++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index 48b0dbf67..02a527b4a 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -246,7 +246,6 @@ static void cleanup_outname(void) } /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ - static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) { char *d = dest; @@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from return d - dest; } -static char *copy_parsing_escapes(const char *string, int len) +static char *copy_parsing_escapes(const char *string, int len, char delim) { const char *s; char *dest = xmalloc(len + 1); @@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len) len = parse_escapes(dest, string, len, s[1], s[0]); string = dest; } + if (delim) { + /* we additionally unescape any instances of escaped delimiter. + * For example, in 's+9\++X+' the pattern is "9+", not "9\+". + */ + len = parse_escapes(dest, string, len, delim, delim); + } return dest; } - /* * index_of_next_unescaped_regexp_delim - walks left to right through a string * beginning at a specified index and returns the index of the next regular @@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the match string */ idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); - *match = copy_parsing_escapes(cmdstr_ptr, idx); - + *match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter); /* save the replacement string */ cmdstr_ptr += idx + 1; idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); - *replace = copy_parsing_escapes(cmdstr_ptr, idx); + *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0); return ((cmdstr_ptr - cmdstr) + idx); } @@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); if (next != 0) { - temp = copy_parsing_escapes(pos, next); + temp = copy_parsing_escapes(pos, next, 0); G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t)); xregcomp(*regex, temp, G.regex_type); free(temp); @@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) cmdstr++; } len = strlen(cmdstr); - sed_cmd->string = copy_parsing_escapes(cmdstr, len); + sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0); cmdstr += len; /* "\anychar" -> "anychar" */ parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); diff --git a/testsuite/sed.tests b/testsuite/sed.tests index e62b839f7..440996a21 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \ "sed 's/ *$/_/g'" \ "qwerty_\n" "" "qwerty\n" +# the pattern here is interpreted as "9+", not as "9\+" +testing "sed special char as s/// delimiter, in pattern" \ + "sed 's+9\++X+'" \ + "X8=17\n" "" "9+8=17\n" + +# but in replacement string, "\&" remains "\&", not interpreted as "&" +testing "sed special char as s/// delimiter, in replacement" \ + "sed 's&9&X\&&'" \ + "X&+8=17\n" "" "9+8=17\n" + testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \ "sed ': testcont; /\\\\$/{ =; N; b testcont }'" \ "\ From bugzilla at busybox.net Sun Jan 23 17:52:15 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Sun, 23 Jan 2022 17:52:15 +0000 Subject: [Bug 14541] sed: s-command with "semi-special" delimiters get wrong behaviour In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14541 Denys Vlasenko changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Denys Vlasenko --- Fixed in git. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Sun Jan 23 18:04:27 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 19:04:27 +0100 Subject: [git commit] sed: fix handling of escaped delimiters in s/// replacement Message-ID: <20220123175801.A542280800@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=f12fb1e4092900f26f7f8c71cde44b1cd7d26439 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master function old new delta parse_regex_delim 111 140 +29 Signed-off-by: Denys Vlasenko --- editors/sed.c | 5 ++++- testsuite/sed.tests | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index 02a527b4a..32a4b61f6 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -355,7 +355,10 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the replacement string */ cmdstr_ptr += idx + 1; idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); - *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0); +//GNU sed 4.8: +// echo 789 | sed 's&8&\&&' - 7&9 ("\&" remained "\&") +// echo 789 | sed 's1\(8\)1\1\11' - 7119 ("\1\1" become "11") + *replace = copy_parsing_escapes(cmdstr_ptr, idx, delimiter != '&' ? delimiter : 0); return ((cmdstr_ptr - cmdstr) + idx); } diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 440996a21..626542e33 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -329,10 +329,15 @@ testing "sed special char as s/// delimiter, in pattern" \ "sed 's+9\++X+'" \ "X8=17\n" "" "9+8=17\n" -# but in replacement string, "\&" remains "\&", not interpreted as "&" -testing "sed special char as s/// delimiter, in replacement" \ +# Matching GNU sed 4.8: +# in replacement string, "\&" remains "\&", not interpreted as "&" +testing "sed special char as s/// delimiter, in replacement 1" \ "sed 's&9&X\&&'" \ "X&+8=17\n" "" "9+8=17\n" +# in replacement string, "\1" is interpreted as "1" +testing "sed special char as s/// delimiter, in replacement 2" \ + "sed 's1\(9\)1X\11'" \ + "X1+8=17\n" "" "9+8=17\n" testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \ "sed ': testcont; /\\\\$/{ =; N; b testcont }'" \ From vda.linux at googlemail.com Sun Jan 23 19:24:32 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 20:24:32 +0100 Subject: [git commit] Add support for long options to cmp Message-ID: <20220123192020.68BD882B29@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=6dd6a6c42d1465d8cca2539476f6bffd5e1353dd branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master In order to improve compatibility with GNU cmp add support for long options to busybox cmp. function old new delta static.cmp_longopts - 36 +36 cmp_main 589 594 +5 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/0 up/down: 41/0) Total: 41 bytes Signed-off-by: Walter Lozano Signed-off-by: Denys Vlasenko --- editors/cmp.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/editors/cmp.c b/editors/cmp.c index 6d2b0c6c3..b89e519ad 100644 --- a/editors/cmp.c +++ b/editors/cmp.c @@ -54,6 +54,7 @@ int cmp_main(int argc UNUSED_PARAM, char **argv) int retval = 0; int max_count = -1; +#if !ENABLE_LONG_OPTS opt = getopt32(argv, "^" OPT_STR "\0" "-1" @@ -62,6 +63,23 @@ int cmp_main(int argc UNUSED_PARAM, char **argv) ":l--s:s--l", &max_count ); +#else + static const char cmp_longopts[] ALIGN1 = + "bytes\0" Required_argument "n" + "quiet\0" No_argument "s" + "silent\0" No_argument "s" + "verbose\0" No_argument "l" + ; + opt = getopt32long(argv, "^" + OPT_STR + "\0" "-1" + IF_DESKTOP(":?4") + IF_NOT_DESKTOP(":?2") + ":l--s:s--l", + cmp_longopts, + &max_count + ); +#endif argv += optind; filename1 = *argv; From vda.linux at googlemail.com Sun Jan 23 22:07:07 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 23:07:07 +0100 Subject: [git commit] mkfs.vfat: fix volume label to be padded with space Message-ID: <20220123220032.D28F982A9C@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=78fdf4d22d578d5d51cc08c768b35d050a92902a branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master The specification requires volume label to be space padded. Latest fsck.vfat will remove the zero padded volume label as invalid. See also: https://github.com/dosfstools/dosfstools/issues/172 Make the default label also "NO NAME" which has the special meaning that label is not set. function old new delta mkfs_vfat_main 1470 1502 +32 static.NO_NAME_11 - 12 +12 .rodata 104309 104318 +9 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 2/0 up/down: 53/0) Total: 53 bytes Signed-off-by: Timo Ter??s Signed-off-by: Denys Vlasenko --- util-linux/mkfs_vfat.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/util-linux/mkfs_vfat.c b/util-linux/mkfs_vfat.c index 844d965f8..821371953 100644 --- a/util-linux/mkfs_vfat.c +++ b/util-linux/mkfs_vfat.c @@ -218,8 +218,11 @@ static const char boot_code[] ALIGN1 = int mkfs_vfat_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int mkfs_vfat_main(int argc UNUSED_PARAM, char **argv) { + static const char NO_NAME_11[] = "NO NAME "; + struct stat st; - const char *volume_label = ""; + const char *arg_volume_label = NO_NAME_11; //default + char volume_label11[12]; char *buf; char *device_name; uoff_t volume_size_bytes; @@ -257,14 +260,17 @@ int mkfs_vfat_main(int argc UNUSED_PARAM, char **argv) opts = getopt32(argv, "^" "Ab:cCf:F:h:Ii:l:m:n:r:R:s:S:v" "\0" "-1", //:b+:f+:F+:h+:r+:R+:s+:S+:vv:c--l:l--c - NULL, NULL, NULL, NULL, NULL, - NULL, NULL, &volume_label, NULL, NULL, NULL, NULL); + /*b*/NULL, /*f*/NULL, /*F*/NULL, /*h*/NULL, /*i*/NULL, + /*l*/NULL, /*m*/NULL, /*n*/&arg_volume_label, + /*r*/NULL, /*R*/NULL, /*s*/NULL, /*S*/NULL); argv += optind; // cache device name device_name = argv[0]; // default volume ID = creation time volume_id = time(NULL); + // truncate to exactly 11 chars, pad with spaces + sprintf(volume_label11, "%-11.11s", arg_volume_label); dev = xopen(device_name, O_RDWR); xfstat(dev, &st, device_name); @@ -459,7 +465,7 @@ int mkfs_vfat_main(int argc UNUSED_PARAM, char **argv) (int)media_byte, volume_size_sect, (int)total_clust, (int)sect_per_clust, sect_per_fat, - (int)volume_id, volume_label + (int)volume_id, volume_label11 ); } @@ -508,7 +514,7 @@ int mkfs_vfat_main(int argc UNUSED_PARAM, char **argv) STORE_LE(boot_blk->vi.ext_boot_sign, 0x29); STORE_LE(boot_blk->vi.volume_id32, volume_id); memcpy(boot_blk->vi.fs_type, "FAT32 ", sizeof(boot_blk->vi.fs_type)); - strncpy(boot_blk->vi.volume_label, volume_label, sizeof(boot_blk->vi.volume_label)); + memcpy(boot_blk->vi.volume_label, volume_label11, 11); memcpy(boot_blk->boot_code, boot_code, sizeof(boot_code)); STORE_LE(boot_blk->boot_sign, BOOT_SIGN); @@ -545,15 +551,18 @@ int mkfs_vfat_main(int argc UNUSED_PARAM, char **argv) // root directory // empty directory is just a set of zero bytes memset(buf, 0, sect_per_clust * bytes_per_sect); - if (volume_label[0]) { - // create dir entry for volume_label + // not "NO NAME", "NO NAME " etc? + // (mkfs.fat 4.1 won't create dir entry even with explicit -n 'NO NAME', + // but will create one with e.g. -n '', -n ' zZz') + if (strcmp(volume_label11, NO_NAME_11) != 0) { + // create dir entry for volume label struct msdos_dir_entry *de; #if 0 struct tm tm_time; uint16_t t, d; #endif de = (void*)buf; - strncpy(de->name, volume_label, sizeof(de->name)); + memcpy(de->name, volume_label11, 11); STORE_LE(de->attr, ATTR_VOLUME); #if 0 localtime_r(&create_time, &tm_time); From vda.linux at googlemail.com Sun Jan 23 22:13:44 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Sun, 23 Jan 2022 23:13:44 +0100 Subject: [git commit] apply const trick to ptr_to_globals Message-ID: <20220123220820.26F8E81918@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=117a8c9b7a50053964159c342af1f3810cbbd5b8 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master This was missing in the previous attempt to fix it via [1] This helps fix segfaults when compiling with clang ( seen on riscv64 ) [ 452.428349] less[270]: unhandled signal 11 code 0x1 at 0x000000000000000c in busybox.nosuid[2ab7491000+ba000] [ 452.430246] CPU: 3 PID: 270 Comm: less Not tainted 5.15.13-yocto-standard #1 [ 452.431323] Hardware name: riscv-virtio,qemu (DT) [ 452.431925] epc : 0000002ab74a19ee ra : 0000002ab74a19dc sp : 0000003fec6ec980 [ 452.432725] gp : 0000002ab754dcb0 tp : 0000003f88783800 t0 : 0000003f8878d4a0 [ 452.433744] t1 : 0000002ab749b00c t2 : 0000000000000000 s0 : 0000003fec6ecc38 [ 452.434732] s1 : 000000000000004c a0 : 00000000ffffffff a1 : 0000002ab754dde0 [ 452.435861] a2 : 0000000000000000 a3 : 0000000000000100 a4 : 0000002ab754f3a0 [ 452.436787] a5 : 0000002ab754f3a0 a6 : 0000000000000000 a7 : 0000002ab754f2a0 [ 452.437974] s2 : 0000000000000002 s3 : 0000002ab754b6c8 s4 : 0000002ab749b60e [ 452.438781] s5 : 0000000000000000 s6 : 0000002ab754b6c8 s7 : 0000003f88943060 [ 452.439723] s8 : 0000003f88944050 s9 : 0000002ad8502e88 s10: 0000002ad8502de8 [ 452.440538] s11: 0000000000000014 t3 : 0000003f887fceb6 t4 : 0000003f8893af0c [ 452.441438] t5 : 0000000000000000 t6 : 0000003f88923000 [1] https://git.busybox.net/busybox/commit/?id=1f925038a Signed-off-by: Khem Raj Signed-off-by: Denys Vlasenko --- include/libbb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libbb.h b/include/libbb.h index 8e3b7ae8e..6aeec249d 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2292,7 +2292,7 @@ struct globals; /* '*const' ptr makes gcc optimize code much better. * Magic prevents ptr_to_globals from going into rodata. * If you want to assign a value, use SET_PTR_TO_GLOBALS(x) */ -extern struct globals *const ptr_to_globals; +extern struct globals *BB_GLOBAL_CONST ptr_to_globals; #define barrier() asm volatile ("":::"memory") From vda.linux at googlemail.com Mon Jan 24 06:07:17 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Mon, 24 Jan 2022 07:07:17 +0100 Subject: [git commit] cut: build fix for FEATURE_CUT_REGEX Message-ID: <20220124060058.890BE822DD@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=99e22d230ded676ab53dfa8ab276c1301c2955a0 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master Signed-off-by: Denys Vlasenko --- libbb/Kbuild.src | 1 + 1 file changed, 1 insertion(+) diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index e8bb24f6d..b9d34de8e 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src @@ -200,6 +200,7 @@ lib-$(CONFIG_PGREP) += xregcomp.o lib-$(CONFIG_PKILL) += xregcomp.o lib-$(CONFIG_DEVFSD) += xregcomp.o lib-$(CONFIG_FEATURE_FIND_REGEX) += xregcomp.o +lib-$(CONFIG_FEATURE_CUT_REGEX) += xregcomp.o # Add the experimental logging functionality, only used by zcip lib-$(CONFIG_ZCIP) += logenv.o From bugzilla at busybox.net Mon Jan 24 14:49:14 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 24 Jan 2022 14:49:14 +0000 Subject: [Bug 14541] sed: s-command with "semi-special" delimiters get wrong behaviour In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14541 --- Comment #2 from Christoph Anton Mitterer --- Just for confirmation: - Since the tests are only about s-commands, does your commit also fix the context addresses? - The test you added for \& handling was just about adding the missing test, right? The behaviour itself already worked before?! - I assume these functions were just for BRE, right?! - Does that commit also "fix" (well POSIX is ambiguous, IMO, so its actually more a "align with GNU sed") the following two: a) \1 in replacement: Consider: s1\(x\)1\11 which, depending on what "literal" means, could be either effectively: s/\(x\)/\1/ (BusyBox sed seems to do this: $ printf '%s\n' 'oxo' | busybox sed 's1\(x\)1\11' oxo $ printf '%s\n' 'owo' | busybox sed 's1\(x\)1\11' owo ) or: s/\(x\)/1/ (GNU sed seems to do this: $ printf '%s\n' 'oxo' | sed 's1\(x\)1\11' o1o $ printf '%s\n' 'owo' | sed 's1\(x\)1\11' owo ) b) \1 in BRE (but not ERE, where \1 isn't defined for the RE part) Consider: s1\(xo\)\11X1 which, depending on what "literal" means, could be either effectively: s/\(xo\)\1/X/ (BusyBox sed seems to do this: $ printf '%s\n' 'xoxo' | busybox sed 's1\(xo\)\11X1' X $ printf '%s\n' 'xo1' | busybox sed 's1\(xo\)\11X1' xo1 ) or: s/\(xo\)1/X/ (GNU sed seems to do this: $ printf '%s\n' 'xoxo' | sed 's1\(xo\)\11X1' xoxo $ printf '%s\n' 'xo1' | sed 's1\(xo\)\11X1' X ) -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Mon Jan 24 15:37:28 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Mon, 24 Jan 2022 15:37:28 +0000 Subject: [Bug 14541] sed: s-command with "semi-special" delimiters get wrong behaviour In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14541 Christoph Anton Mitterer changed: What |Removed |Added ---------------------------------------------------------------------------- Resolution|FIXED |--- Status|RESOLVED |REOPENED --- Comment #3 from Christoph Anton Mitterer --- I just went through my list of test cases in https://www.austingroupbugs.net/view.php?id=1551#c5612 : It seems busybox now (with the patch) behaves as GNU, except or one case: GNU: $ printf '%s\n' 'oxo' | sed 's1\(x\)1\11' o1o $ printf '%s\n' 'owo' | sed 's1\(x\)1\11' owo BusyBox with patch: $ printf '%s\n' 'oxo' | ./busybox sed 's1\(x\)1\11' oxo $ printf '%s\n' 'owo' | ./busybox sed 's1\(x\)1\11' owo So in the replacement, the bug doesn't seem to be fixed, yet. Same case,.. the \1 is first "un-delimitered" but then still counted as \1, though the \ should have already been removed because of the "un-delimitering". Thus reopneing. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Tue Jan 25 04:39:15 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 25 Jan 2022 04:39:15 +0000 Subject: [Bug 14546] New: ash: test whether LC_* variables can be changed Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14546 Bug ID: 14546 Summary: ash: test whether LC_* variables can be changed Product: Busybox Version: unspecified Hardware: All OS: All Status: NEW Severity: enhancement Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: calestyo at scientia.org CC: busybox-cvs at busybox.net Target Milestone: --- Created attachment 9206 --> https://bugs.busybox.net/attachment.cgi?id=9206&action=edit patch In some shells (dash, klibc-utils sh) this does currently not work. So it seems like a good idea to test, whether it does. Thanks, Chris. -- You are receiving this mail because: You are on the CC list for the bug. From vda.linux at googlemail.com Tue Jan 25 16:21:45 2022 From: vda.linux at googlemail.com (Denys Vlasenko) Date: Tue, 25 Jan 2022 17:21:45 +0100 Subject: [git commit] libbb/sha1: in unrolled x86-64 code, pass initial W[] in registers, not on stack Message-ID: <20220125161553.F032E8215B@busybox.osuosl.org> commit: https://git.busybox.net/busybox/commit/?id=205042c07a3bf6c8e685c434713f2a9e46630cd0 branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master This can be faster on some CPUs. On Skylake, evidently load latency from L1 (or store-to-load forwarding in LSU) is fast enough to completely hide memory reference latencies here. function old new delta sha1_process_block64 3495 3514 +19 Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha_x86-64.S | 310 +++++++++++++++++++++-------------------- libbb/hash_md5_sha_x86-64.S.sh | 109 ++++++++------- 2 files changed, 214 insertions(+), 205 deletions(-) diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 069a18719..743269d98 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S @@ -1,7 +1,7 @@ ### Generated by hash_md5_sha_x86-64.S.sh ### #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) - .section .text.sha1_process_block64,"ax", at progbits + .section .text.sha1_process_block64, "ax", @progbits .globl sha1_process_block64 .hidden sha1_process_block64 .type sha1_process_block64, @function @@ -10,7 +10,7 @@ sha1_process_block64: pushq %rbp # 1 byte insn pushq %rbx # 1 byte insn - pushq %r15 # 2 byte insn +# pushq %r15 # 2 byte insn pushq %r14 # 2 byte insn pushq %r13 # 2 byte insn pushq %r12 # 2 byte insn @@ -19,7 +19,8 @@ sha1_process_block64: #Register and stack use: # eax..edx: a..d # ebp: e -# esi,edi: temps +# esi,edi,r8..r14: temps +# r15: unused # xmm0..xmm3: W[] # xmm4,xmm5: temps # xmm6: current round constant @@ -33,147 +34,148 @@ sha1_process_block64: movaps rconst0x5A827999(%rip), %xmm6 - # For round 1, steps 0 and 8..15, we pass W[0,8..15] in esi,r8..r15 - # instead of spilling them to stack. - # (We lose parallelized addition of RCONST, but LEA - # can do two additions at once, so...) + # Load W[] to xmm registers, byteswapping on the fly. + # + # For iterations 0..15, we pass W[] in rsi,r8..r14 + # for use in RD1A's instead of spilling them to stack. + # We lose parallelized addition of RCONST, but LEA + # can do two additions at once, so it's probably a wash. + # (We use rsi instead of rN because this makes two + # LEAs in two first RD1A's shorter by one byte). movq 4*0(%rdi), %rsi - movq 4*2(%rdi), %r10 + movq 4*2(%rdi), %r8 bswapq %rsi - bswapq %r10 + bswapq %r8 rolq $32, %rsi # rsi = W[1]:W[0] - rolq $32, %r10 + rolq $32, %r8 # r8 = W[3]:W[2] movq %rsi, %xmm0 - movq %r10, %xmm4 - punpcklqdq %xmm4, %xmm0 # xmm0 = r10:rsi = (W[0],W[1],W[2],W[3]) - movaps %xmm0, %xmm4 - paddd %xmm6, %xmm4 - movups %xmm4, -64+4*0(%rsp) + movq %r8, %xmm4 + punpcklqdq %xmm4, %xmm0 # xmm0 = r8:rsi = (W[0],W[1],W[2],W[3]) +# movaps %xmm0, %xmm4 # add RCONST, spill to stack +# paddd %xmm6, %xmm4 +# movups %xmm4, -64+16*0(%rsp) - movq 4*4(%rdi), %r8 + movq 4*4(%rdi), %r9 movq 4*6(%rdi), %r10 - bswapq %r8 + bswapq %r9 bswapq %r10 - rolq $32, %r8 - rolq $32, %r10 - movq %r8, %xmm1 + rolq $32, %r9 # r9 = W[5]:W[4] + rolq $32, %r10 # r10 = W[7]:W[6] + movq %r9, %xmm1 movq %r10, %xmm4 - punpcklqdq %xmm4, %xmm1 # xmm1 = r10:r8 = (W[4],W[5],W[6],W[7]) - movaps %xmm1, %xmm4 - paddd %xmm6, %xmm4 - movups %xmm4, -64+4*4(%rsp) + punpcklqdq %xmm4, %xmm1 # xmm1 = r10:r9 = (W[4],W[5],W[6],W[7]) - movq 4*8(%rdi), %r8 - movq 4*10(%rdi), %r10 - bswapq %r8 - bswapq %r10 - movl %r8d, %r9d # r9d = W[9] - rolq $32, %r8 # r8 = W[9]:W[8] - movl %r10d, %r11d # r11d = W[11] - rolq $32, %r10 # r10 = W[11]:W[10] - movq %r8, %xmm2 - movq %r10, %xmm4 - punpcklqdq %xmm4, %xmm2 # xmm2 = r10:r8 = (W[8],W[9],W[10],W[11]) + movq 4*8(%rdi), %r11 + movq 4*10(%rdi), %r12 + bswapq %r11 + bswapq %r12 + rolq $32, %r11 # r11 = W[9]:W[8] + rolq $32, %r12 # r12 = W[11]:W[10] + movq %r11, %xmm2 + movq %r12, %xmm4 + punpcklqdq %xmm4, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) - movq 4*12(%rdi), %r12 + movq 4*12(%rdi), %r13 movq 4*14(%rdi), %r14 - bswapq %r12 + bswapq %r13 bswapq %r14 - movl %r12d, %r13d # r13d = W[13] - rolq $32, %r12 # r12 = W[13]:W[12] - movl %r14d, %r15d # r15d = W[15] + rolq $32, %r13 # r13 = W[13]:W[12] rolq $32, %r14 # r14 = W[15]:W[14] - movq %r12, %xmm3 + movq %r13, %xmm3 movq %r14, %xmm4 - punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r12 = (W[12],W[13],W[14],W[15]) + punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) # 0 leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n] + shrq $32, %rsi movl %ecx, %edi # c xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebp # e += (((c ^ d) & b) ^ d) - movl %eax, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebp # e += rotl32(a,5) + movl %eax, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 1 - addl -64+4*1(%rsp), %edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n] movl %ebx, %edi # c xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) addl %edi, %edx # e += (((c ^ d) & b) ^ d) - movl %ebp, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %edx # e += rotl32(a,5) + movl %ebp, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 2 - addl -64+4*2(%rsp), %ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%r8), %ecx # e += RCONST + W[n] + shrq $32, %r8 movl %eax, %edi # c xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ecx # e += (((c ^ d) & b) ^ d) - movl %edx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ecx # e += rotl32(a,5) + movl %edx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 3 - addl -64+4*3(%rsp), %ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%r8), %ebx # e += RCONST + W[n] movl %ebp, %edi # c xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebx # e += (((c ^ d) & b) ^ d) - movl %ecx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebx # e += rotl32(a,5) + movl %ecx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 4 - addl -64+4*4(%rsp), %eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%r9), %eax # e += RCONST + W[n] + shrq $32, %r9 movl %edx, %edi # c xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) addl %edi, %eax # e += (((c ^ d) & b) ^ d) - movl %ebx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %eax # e += rotl32(a,5) + movl %ebx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 5 - addl -64+4*5(%rsp), %ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%r9), %ebp # e += RCONST + W[n] movl %ecx, %edi # c xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebp # e += (((c ^ d) & b) ^ d) - movl %eax, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebp # e += rotl32(a,5) + movl %eax, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 6 - addl -64+4*6(%rsp), %edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%r10), %edx # e += RCONST + W[n] + shrq $32, %r10 movl %ebx, %edi # c xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) addl %edi, %edx # e += (((c ^ d) & b) ^ d) - movl %ebp, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %edx # e += rotl32(a,5) + movl %ebp, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) # 7 - addl -64+4*7(%rsp), %ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%r10), %ecx # e += RCONST + W[n] movl %eax, %edi # c xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ecx # e += (((c ^ d) & b) ^ d) - movl %edx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ecx # e += rotl32(a,5) + movl %edx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # PREP %xmm0 %xmm1 %xmm2 %xmm3 -64+16*0(%rsp) movaps %xmm3, %xmm4 @@ -186,9 +188,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm0, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm0, %xmm0 # shift left by 1 - psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 + pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm0, %xmm0 # shift left by 1 + psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -201,48 +203,50 @@ sha1_process_block64: paddd %xmm6, %xmm5 movups %xmm5, -64+16*0(%rsp) # 8 - leal 0x5A827999(%rbx,%r8), %ebx # e += RCONST + W[n] + leal 0x5A827999(%rbx,%r11), %ebx # e += RCONST + W[n] + shrq $32, %r11 movl %ebp, %edi # c xorl %eax, %edi # ^d andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebx # e += (((c ^ d) & b) ^ d) - movl %ecx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebx # e += rotl32(a,5) + movl %ecx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 9 - leal 0x5A827999(%rax,%r9), %eax # e += RCONST + W[n] + leal 0x5A827999(%rax,%r11), %eax # e += RCONST + W[n] movl %edx, %edi # c xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) addl %edi, %eax # e += (((c ^ d) & b) ^ d) - movl %ebx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %eax # e += rotl32(a,5) + movl %ebx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 10 - leal 0x5A827999(%rbp,%r10), %ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%r12), %ebp # e += RCONST + W[n] + shrq $32, %r12 movl %ecx, %edi # c xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebp # e += (((c ^ d) & b) ^ d) - movl %eax, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebp # e += rotl32(a,5) + movl %eax, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # 11 - leal 0x5A827999(%rdx,%r11), %edx # e += RCONST + W[n] + leal 0x5A827999(%rdx,%r12), %edx # e += RCONST + W[n] movl %ebx, %edi # c xorl %ecx, %edi # ^d andl %eax, %edi # &b xorl %ecx, %edi # (((c ^ d) & b) ^ d) addl %edi, %edx # e += (((c ^ d) & b) ^ d) - movl %ebp, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %edx # e += rotl32(a,5) + movl %ebp, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %edx # e += rotl32(a,5) rorl $2, %eax # b = rotl32(b,30) movaps rconst0x6ED9EBA1(%rip), %xmm6 # PREP %xmm1 %xmm2 %xmm3 %xmm0 -64+16*1(%rsp) @@ -256,9 +260,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm1, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm1, %xmm1 # shift left by 1 - psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 + pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm1, %xmm1 # shift left by 1 + psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -271,15 +275,16 @@ sha1_process_block64: paddd %xmm6, %xmm5 movups %xmm5, -64+16*1(%rsp) # 12 - leal 0x5A827999(%rcx,%r12), %ecx # e += RCONST + W[n] + leal 0x5A827999(%rcx,%r13), %ecx # e += RCONST + W[n] + shrq $32, %r13 movl %eax, %edi # c xorl %ebx, %edi # ^d andl %ebp, %edi # &b xorl %ebx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ecx # e += (((c ^ d) & b) ^ d) - movl %edx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ecx # e += rotl32(a,5) + movl %edx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ecx # e += rotl32(a,5) rorl $2, %ebp # b = rotl32(b,30) # 13 leal 0x5A827999(%rbx,%r13), %ebx # e += RCONST + W[n] @@ -288,31 +293,32 @@ sha1_process_block64: andl %edx, %edi # &b xorl %eax, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebx # e += (((c ^ d) & b) ^ d) - movl %ecx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebx # e += rotl32(a,5) + movl %ecx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebx # e += rotl32(a,5) rorl $2, %edx # b = rotl32(b,30) # 14 leal 0x5A827999(%rax,%r14), %eax # e += RCONST + W[n] + shrq $32, %r14 movl %edx, %edi # c xorl %ebp, %edi # ^d andl %ecx, %edi # &b xorl %ebp, %edi # (((c ^ d) & b) ^ d) addl %edi, %eax # e += (((c ^ d) & b) ^ d) - movl %ebx, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %eax # e += rotl32(a,5) + movl %ebx, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %eax # e += rotl32(a,5) rorl $2, %ecx # b = rotl32(b,30) # 15 - leal 0x5A827999(%rbp,%r15), %ebp # e += RCONST + W[n] + leal 0x5A827999(%rbp,%r14), %ebp # e += RCONST + W[n] movl %ecx, %edi # c xorl %edx, %edi # ^d andl %ebx, %edi # &b xorl %edx, %edi # (((c ^ d) & b) ^ d) addl %edi, %ebp # e += (((c ^ d) & b) ^ d) - movl %eax, %esi # - roll $5, %esi # rotl32(a,5) - addl %esi, %ebp # e += rotl32(a,5) + movl %eax, %edi # + roll $5, %edi # rotl32(a,5) + addl %edi, %ebp # e += rotl32(a,5) rorl $2, %ebx # b = rotl32(b,30) # PREP %xmm2 %xmm3 %xmm0 %xmm1 -64+16*2(%rsp) movaps %xmm1, %xmm4 @@ -325,9 +331,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm2, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm2, %xmm2 # shift left by 1 - psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 + pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm2, %xmm2 # shift left by 1 + psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -394,9 +400,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm3, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm3, %xmm3 # shift left by 1 - psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 + pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm3, %xmm3 # shift left by 1 + psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -459,9 +465,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm0, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm0, %xmm0 # shift left by 1 - psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 + pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm0, %xmm0 # shift left by 1 + psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -524,9 +530,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm1, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm1, %xmm1 # shift left by 1 - psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 + pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm1, %xmm1 # shift left by 1 + psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -590,9 +596,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm2, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm2, %xmm2 # shift left by 1 - psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 + pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm2, %xmm2 # shift left by 1 + psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -655,9 +661,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm3, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm3, %xmm3 # shift left by 1 - psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 + pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm3, %xmm3 # shift left by 1 + psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -720,9 +726,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm0, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm0, %xmm0 # shift left by 1 - psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 + pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm0, %xmm0 # shift left by 1 + psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -797,9 +803,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm1, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm1, %xmm1 # shift left by 1 - psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 + pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm1, %xmm1 # shift left by 1 + psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -874,9 +880,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm2, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm2, %xmm2 # shift left by 1 - psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 + pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm2, %xmm2 # shift left by 1 + psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -952,9 +958,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm3, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm3, %xmm3 # shift left by 1 - psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 + pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm3, %xmm3 # shift left by 1 + psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -1029,9 +1035,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm0, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm0, %xmm0 # shift left by 1 - psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 + pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm0, %xmm0 # shift left by 1 + psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -1106,9 +1112,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm1, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm1, %xmm1 # shift left by 1 - psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 + pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm1, %xmm1 # shift left by 1 + psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -1171,9 +1177,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm2, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm2, %xmm2 # shift left by 1 - psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 + pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm2, %xmm2 # shift left by 1 + psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -1236,9 +1242,9 @@ sha1_process_block64: # W0 = unrotated (W[0]..W[3]), still needs W[3] fixup movaps %xmm3, %xmm5 xorps %xmm4, %xmm4 # rol(W0,1): - pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) - paddd %xmm3, %xmm3 # shift left by 1 - psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 + pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1) + paddd %xmm3, %xmm3 # shift left by 1 + psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) movaps %xmm5, %xmm4 @@ -1378,7 +1384,7 @@ sha1_process_block64: addl %ebx, 84(%rdi) # ctx->hash[1] += b popq %r14 # addl %ecx, 88(%rdi) # ctx->hash[2] += c - popq %r15 # +# popq %r15 # addl %edx, 92(%rdi) # ctx->hash[3] += d popq %rbx # addl %ebp, 96(%rdi) # ctx->hash[4] += e diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 87c2d0800..47c40af0d 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh @@ -102,7 +102,7 @@ echo \ "### Generated by hash_md5_sha_x86-64.S.sh ### #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) - .section .text.sha1_process_block64,\"ax\", at progbits + .section .text.sha1_process_block64, \"ax\", @progbits .globl sha1_process_block64 .hidden sha1_process_block64 .type sha1_process_block64, @function @@ -111,7 +111,7 @@ echo \ sha1_process_block64: pushq %rbp # 1 byte insn pushq %rbx # 1 byte insn - pushq %r15 # 2 byte insn +# pushq %r15 # 2 byte insn pushq %r14 # 2 byte insn pushq %r13 # 2 byte insn pushq %r12 # 2 byte insn @@ -120,7 +120,8 @@ sha1_process_block64: #Register and stack use: # eax..edx: a..d # ebp: e -# esi,edi: temps +# esi,edi,r8..r14: temps +# r15: unused # xmm0..xmm3: W[] # xmm4,xmm5: temps # xmm6: current round constant @@ -134,59 +135,56 @@ sha1_process_block64: movaps rconst0x5A827999(%rip), $xmmRCONST - # For round 1, steps 0 and 8..15, we pass W[0,8..15] in esi,r8..r15 - # instead of spilling them to stack. - # (We lose parallelized addition of RCONST, but LEA - # can do two additions at once, so...) + # Load W[] to xmm registers, byteswapping on the fly. + # + # For iterations 0..15, we pass W[] in rsi,r8..r14 + # for use in RD1A's instead of spilling them to stack. + # We lose parallelized addition of RCONST, but LEA + # can do two additions at once, so it's probably a wash. + # (We use rsi instead of rN because this makes two + # LEAs in two first RD1A's shorter by one byte). movq 4*0(%rdi), %rsi - movq 4*2(%rdi), %r10 + movq 4*2(%rdi), %r8 bswapq %rsi - bswapq %r10 + bswapq %r8 rolq \$32, %rsi # rsi = W[1]:W[0] - rolq \$32, %r10 + rolq \$32, %r8 # r8 = W[3]:W[2] movq %rsi, %xmm0 - movq %r10, $xmmT1 - punpcklqdq $xmmT1, %xmm0 # xmm0 = r10:rsi = (W[0],W[1],W[2],W[3]) - movaps %xmm0, $xmmT1 - paddd $xmmRCONST, $xmmT1 - movups $xmmT1, -64+4*0(%rsp) + movq %r8, $xmmT1 + punpcklqdq $xmmT1, %xmm0 # xmm0 = r8:rsi = (W[0],W[1],W[2],W[3]) +# movaps %xmm0, $xmmT1 # add RCONST, spill to stack +# paddd $xmmRCONST, $xmmT1 +# movups $xmmT1, -64+16*0(%rsp) - movq 4*4(%rdi), %r8 + movq 4*4(%rdi), %r9 movq 4*6(%rdi), %r10 - bswapq %r8 + bswapq %r9 bswapq %r10 - rolq \$32, %r8 - rolq \$32, %r10 - movq %r8, %xmm1 + rolq \$32, %r9 # r9 = W[5]:W[4] + rolq \$32, %r10 # r10 = W[7]:W[6] + movq %r9, %xmm1 movq %r10, $xmmT1 - punpcklqdq $xmmT1, %xmm1 # xmm1 = r10:r8 = (W[4],W[5],W[6],W[7]) - movaps %xmm1, $xmmT1 - paddd $xmmRCONST, $xmmT1 - movups $xmmT1, -64+4*4(%rsp) + punpcklqdq $xmmT1, %xmm1 # xmm1 = r10:r9 = (W[4],W[5],W[6],W[7]) - movq 4*8(%rdi), %r8 - movq 4*10(%rdi), %r10 - bswapq %r8 - bswapq %r10 - movl %r8d, %r9d # r9d = W[9] - rolq \$32, %r8 # r8 = W[9]:W[8] - movl %r10d, %r11d # r11d = W[11] - rolq \$32, %r10 # r10 = W[11]:W[10] - movq %r8, %xmm2 - movq %r10, $xmmT1 - punpcklqdq $xmmT1, %xmm2 # xmm2 = r10:r8 = (W[8],W[9],W[10],W[11]) + movq 4*8(%rdi), %r11 + movq 4*10(%rdi), %r12 + bswapq %r11 + bswapq %r12 + rolq \$32, %r11 # r11 = W[9]:W[8] + rolq \$32, %r12 # r12 = W[11]:W[10] + movq %r11, %xmm2 + movq %r12, $xmmT1 + punpcklqdq $xmmT1, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) - movq 4*12(%rdi), %r12 + movq 4*12(%rdi), %r13 movq 4*14(%rdi), %r14 - bswapq %r12 + bswapq %r13 bswapq %r14 - movl %r12d, %r13d # r13d = W[13] - rolq \$32, %r12 # r12 = W[13]:W[12] - movl %r14d, %r15d # r15d = W[15] + rolq \$32, %r13 # r13 = W[13]:W[12] rolq \$32, %r14 # r14 = W[15]:W[14] - movq %r12, %xmm3 + movq %r13, %xmm3 movq %r14, $xmmT1 - punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r12 = (W[12],W[13],W[14],W[15]) + punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) " PREP() { @@ -215,9 +213,9 @@ echo "# PREP $@ movaps $xmmW0, $xmmT2 xorps $xmmT1, $xmmT1 # rol(W0,1): - pcmpgtd $xmmW0, $xmmT1 # ffffffff for elements <0 (ones with msb bit 1) - paddd $xmmW0, $xmmW0 # shift left by 1 - psubd $xmmT1, $xmmW0 # add 1 to those who had msb bit 1 + pcmpgtd $xmmW0, $xmmT1 # ffffffff for elements <0 (ones with msb bit 1) + paddd $xmmW0, $xmmW0 # shift left by 1 + psubd $xmmT1, $xmmW0 # add 1 to those who had msb bit 1 # W0 = rotated (W[0]..W[3]), still needs W[3] fixup pslldq \$12, $xmmT2 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0]) @@ -256,23 +254,28 @@ RD1A() { local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 local n=$(($6)) local n0=$(((n+0) & 15)) +local rN=$((7+n0/2)) echo " # $n ";test $n0 = 0 && echo " leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n] -";test $n0 != 0 && test $n0 -lt 8 && echo " - addl -64+4*$n0(%rsp), %e$e # e += RCONST + W[n] -";test $n0 -ge 8 && echo " - leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n] + shrq \$32, %rsi +";test $n0 = 1 && echo " + leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n] +";test $n0 -ge 2 && test $((n0 & 1)) = 0 && echo " + leal $RCONST(%r$e,%r$rN), %e$e # e += RCONST + W[n] + shrq \$32, %r$rN +";test $n0 -ge 2 && test $((n0 & 1)) = 1 && echo " + leal $RCONST(%r$e,%r$rN), %e$e # e += RCONST + W[n] ";echo " movl %e$c, %edi # c xorl %e$d, %edi # ^d andl %e$b, %edi # &b xorl %e$d, %edi # (((c ^ d) & b) ^ d) addl %edi, %e$e # e += (((c ^ d) & b) ^ d) - movl %e$a, %esi # - roll \$5, %esi # rotl32(a,5) - addl %esi, %e$e # e += rotl32(a,5) + movl %e$a, %edi # + roll \$5, %edi # rotl32(a,5) + addl %edi, %e$e # e += rotl32(a,5) rorl \$2, %e$b # b = rotl32(b,30) " } @@ -420,7 +423,7 @@ echo " addl %ebx, 84(%rdi) # ctx->hash[1] += b popq %r14 # addl %ecx, 88(%rdi) # ctx->hash[2] += c - popq %r15 # +# popq %r15 # addl %edx, 92(%rdi) # ctx->hash[3] += d popq %rbx # addl %ebp, 96(%rdi) # ctx->hash[4] += e From bugzilla at busybox.net Tue Jan 25 19:57:23 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Tue, 25 Jan 2022 19:57:23 +0000 Subject: [Bug 14551] New: hexedit-applet desperately needs a "readonly"-option Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14551 Bug ID: 14551 Summary: hexedit-applet desperately needs a "readonly"-option Product: Busybox Version: 1.33.x Hardware: All OS: Linux Status: NEW Severity: enhancement Priority: P5 Component: Other Assignee: unassigned at busybox.net Reporter: gb_about_gnu at gmx.net CC: busybox-cvs at busybox.net Target Milestone: --- The "hexedit" BB applet is extremely useful for quickly examining boot sectors, partition tables and similar on-disk structures. However, the current applet does not have a "readonly" option, and immediately writes any keyboard input to the disk. This makes the usage of this applet extremely dangerous in practice: Accidentally pressing any key which corresponds to a hexadecimal character immediately damages the boot sector, superblock of a filesystem etc. I therefore strongly recommend either adding a "-r" option to the applet for opening the target in read-only mode, or adding a second applet (like "hexbrowse") which shares the same code except for the mode argument to open(). Another option would be to cache the input in a buffer and only write it back after issuing a special local subcommand, such as pressing the F2 key in the original hexedit application. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 26 17:14:41 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 26 Jan 2022 17:14:41 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gQXdrIGZyb20gYnVzeWJveC12MS4zNS4wIGRv?= =?UTF-8?B?ZXNu4oCZdCB3b3JrLg==?= In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 --- Comment #2 from melsem --- Created attachment 9211 --> https://bugs.busybox.net/attachment.cgi?id=9211&action=edit awk-scripts.cgi -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 26 17:19:18 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 26 Jan 2022 17:19:18 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gQXdrIGZyb20gYnVzeWJveC12MS4zNS4wIGRv?= =?UTF-8?B?ZXNu4oCZdCB3b3JrLg==?= In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 --- Comment #3 from melsem --- Created attachment 9216 --> https://bugs.busybox.net/attachment.cgi?id=9216&action=edit print-screen-awk-scripts-from-busybox-1.35.zip - screenshot where awk-script is not working. -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 26 17:21:40 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 26 Jan 2022 17:21:40 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gQXdrIGZyb20gYnVzeWJveC12MS4zNS4wIGRv?= =?UTF-8?B?ZXNu4oCZdCB3b3JrLg==?= In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 --- Comment #4 from melsem --- Created attachment 9221 --> https://bugs.busybox.net/attachment.cgi?id=9221&action=edit screenshot of the awk script running (awk patched from busybox-1.33.1 -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Wed Jan 26 17:23:45 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Wed, 26 Jan 2022 17:23:45 +0000 Subject: =?UTF-8?B?W0J1ZyAxNDUzNl0gQXdrIGZyb20gYnVzeWJveC12MS4zNS4wIGRv?= =?UTF-8?B?ZXNu4oCZdCB3b3JrLg==?= In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=14536 --- Comment #5 from melsem --- Created attachment 9226 --> https://bugs.busybox.net/attachment.cgi?id=9226&action=edit example.html -- You are receiving this mail because: You are on the CC list for the bug. From bugzilla at busybox.net Thu Jan 27 09:20:27 2022 From: bugzilla at busybox.net (bugzilla at busybox.net) Date: Thu, 27 Jan 2022 09:20:27 +0000 Subject: [Bug 8876] add some useful wget options In-Reply-To: References: Message-ID: https://bugs.busybox.net/show_bug.cgi?id=8876 Axel Fontaine changed: What |Removed |Added ---------------------------------------------------------------------------- Severity|minor |major --- Comment #4 from Axel Fontaine --- --method=METHOD is also desperately needed as this would allow busybox to issue the necessary PUT request to connect to the AWS instance metadata service v2 endpoint -- You are receiving this mail because: You are on the CC list for the bug.