Source file src/crypto/sha1/_asm/sha1block_amd64_shani.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  
    10  	. "github.com/mmcloughlin/avo/build"
    11  	. "github.com/mmcloughlin/avo/operand"
    12  	. "github.com/mmcloughlin/avo/reg"
    13  )
    14  
    15  // Implement the SHA-1 block function using the Intel(R) SHA extensions
    16  // (SHA1RNDS4, SHA1NEXTE, SHA1MSG1, and SHA1MSG2). This implementation requires
    17  // the AVX, SHA, SSE2, SSE4.1, and SSSE3 extensions.
    18  //
    19  // Reference:
    20  // S. Gulley, et al, "New Instructions Supporting the Secure Hash
    21  // Algorithm on IntelĀ® Architecture Processors", July 2013
    22  // https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
    23  
    24  func blockSHANI() {
    25  	Implement("blockSHANI")
    26  
    27  	digest := Load(Param("dig"), RDI)
    28  	data := Load(Param("p").Base(), RSI)
    29  	len := Load(Param("p").Len(), RDX)
    30  
    31  	abcd := XMM()
    32  	msg0, msg1, msg2, msg3 := XMM(), XMM(), XMM(), XMM()
    33  	e0, e1 := XMM(), XMM()
    34  	shufMask := XMM()
    35  
    36  	CMPQ(len, Imm(0))
    37  	JEQ(LabelRef("done"))
    38  	ADDQ(data, len)
    39  
    40  	stackPtr := GP64()
    41  	{
    42  		Comment("Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes")
    43  		local := AllocLocal(32 + 16)
    44  		LEAQ(local.Offset(15), stackPtr)
    45  		tmp := GP64()
    46  		MOVQ(U64(15), tmp)
    47  		NOTQ(tmp)
    48  		ANDQ(tmp, stackPtr)
    49  	}
    50  	e0_save := Mem{Base: stackPtr}
    51  	abcd_save := Mem{Base: stackPtr}.Offset(16)
    52  
    53  	Comment("Load initial hash state")
    54  	PINSRD(Imm(3), Mem{Base: digest}.Offset(16), e0)
    55  	VMOVDQU(Mem{Base: digest}, abcd)
    56  	PAND(upperMask(), e0)
    57  	PSHUFD(Imm(0x1b), abcd, abcd)
    58  
    59  	VMOVDQA(flipMask(), shufMask)
    60  
    61  	Label("loop")
    62  
    63  	Comment("Save ABCD and E working values")
    64  	VMOVDQA(e0, e0_save)
    65  	VMOVDQA(abcd, abcd_save)
    66  
    67  	Comment("Rounds 0-3")
    68  	VMOVDQU(Mem{Base: data}, msg0)
    69  	PSHUFB(shufMask, msg0)
    70  	PADDD(msg0, e0)
    71  	VMOVDQA(abcd, e1)
    72  	SHA1RNDS4(Imm(0), e0, abcd)
    73  
    74  	Comment("Rounds 4-7")
    75  	VMOVDQU(Mem{Base: data}.Offset(16), msg1)
    76  	PSHUFB(shufMask, msg1)
    77  	SHA1NEXTE(msg1, e1)
    78  	VMOVDQA(abcd, e0)
    79  	SHA1RNDS4(Imm(0), e1, abcd)
    80  	SHA1MSG1(msg1, msg0)
    81  
    82  	Comment("Rounds 8-11")
    83  	VMOVDQU(Mem{Base: data}.Offset(16*2), msg2)
    84  	PSHUFB(shufMask, msg2)
    85  	SHA1NEXTE(msg2, e0)
    86  	VMOVDQA(abcd, e1)
    87  	SHA1RNDS4(Imm(0), e0, abcd)
    88  	SHA1MSG1(msg2, msg1)
    89  	PXOR(msg2, msg0)
    90  
    91  	// Rounds 12 through 67 use the same repeated pattern, with e0 and e1 ping-ponging
    92  	// back and forth, and each of the msg temporaries moving up one every four rounds.
    93  	msgs := []VecVirtual{msg3, msg0, msg1, msg2}
    94  	for i := range 14 {
    95  		Comment(fmt.Sprintf("Rounds %d-%d", 12+(i*4), 12+(i*4)+3))
    96  		a, b := e1, e0
    97  		if i == 0 {
    98  			VMOVDQU(Mem{Base: data}.Offset(16*3), msg3)
    99  			PSHUFB(shufMask, msg3)
   100  		}
   101  		if i%2 == 1 {
   102  			a, b = e0, e1
   103  		}
   104  		imm := uint64((12 + i*4) / 20)
   105  
   106  		SHA1NEXTE(msgs[i%4], a)
   107  		VMOVDQA(abcd, b)
   108  		SHA1MSG2(msgs[i%4], msgs[(1+i)%4])
   109  		SHA1RNDS4(Imm(imm), a, abcd)
   110  		SHA1MSG1(msgs[i%4], msgs[(3+i)%4])
   111  		PXOR(msgs[i%4], msgs[(2+i)%4])
   112  	}
   113  
   114  	Comment("Rounds 68-71")
   115  	SHA1NEXTE(msg1, e1)
   116  	VMOVDQA(abcd, e0)
   117  	SHA1MSG2(msg1, msg2)
   118  	SHA1RNDS4(Imm(3), e1, abcd)
   119  	PXOR(msg1, msg3)
   120  
   121  	Comment("Rounds 72-75")
   122  	SHA1NEXTE(msg2, e0)
   123  	VMOVDQA(abcd, e1)
   124  	SHA1MSG2(msg2, msg3)
   125  	SHA1RNDS4(Imm(3), e0, abcd)
   126  
   127  	Comment("Rounds 76-79")
   128  	SHA1NEXTE(msg3, e1)
   129  	VMOVDQA(abcd, e0)
   130  	SHA1RNDS4(Imm(3), e1, abcd)
   131  
   132  	Comment("Add saved E and ABCD")
   133  	SHA1NEXTE(e0_save, e0)
   134  	PADDD(abcd_save, abcd)
   135  
   136  	Comment("Check if we are done, if not return to the loop")
   137  	ADDQ(Imm(64), data)
   138  	CMPQ(data, len)
   139  	JNE(LabelRef("loop"))
   140  
   141  	Comment("Write the hash state back to digest")
   142  	PSHUFD(Imm(0x1b), abcd, abcd)
   143  	VMOVDQU(abcd, Mem{Base: digest})
   144  	PEXTRD(Imm(3), e0, Mem{Base: digest}.Offset(16))
   145  
   146  	Label("done")
   147  	RET()
   148  }
   149  
   150  func flipMask() Mem {
   151  	mask := GLOBL("shuffle_mask", RODATA)
   152  	// 0x000102030405060708090a0b0c0d0e0f
   153  	DATA(0x00, U64(0x08090a0b0c0d0e0f))
   154  	DATA(0x08, U64(0x0001020304050607))
   155  	return mask
   156  }
   157  
   158  func upperMask() Mem {
   159  	mask := GLOBL("upper_mask", RODATA)
   160  	// 0xFFFFFFFF000000000000000000000000
   161  	DATA(0x00, U64(0x0000000000000000))
   162  	DATA(0x08, U64(0xFFFFFFFF00000000))
   163  	return mask
   164  }
   165  

View as plain text