Source file
src/crypto/sha1/_asm/sha1block_amd64_shani.go
1
2
3
4
5 package main
6
7 import (
8 "fmt"
9
10 . "github.com/mmcloughlin/avo/build"
11 . "github.com/mmcloughlin/avo/operand"
12 . "github.com/mmcloughlin/avo/reg"
13 )
14
15
16
17
18
19
20
21
22
23
24 func blockSHANI() {
25 Implement("blockSHANI")
26
27 digest := Load(Param("dig"), RDI)
28 data := Load(Param("p").Base(), RSI)
29 len := Load(Param("p").Len(), RDX)
30
31 abcd := XMM()
32 msg0, msg1, msg2, msg3 := XMM(), XMM(), XMM(), XMM()
33 e0, e1 := XMM(), XMM()
34 shufMask := XMM()
35
36 CMPQ(len, Imm(0))
37 JEQ(LabelRef("done"))
38 ADDQ(data, len)
39
40 stackPtr := GP64()
41 {
42 Comment("Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes")
43 local := AllocLocal(32 + 16)
44 LEAQ(local.Offset(15), stackPtr)
45 tmp := GP64()
46 MOVQ(U64(15), tmp)
47 NOTQ(tmp)
48 ANDQ(tmp, stackPtr)
49 }
50 e0_save := Mem{Base: stackPtr}
51 abcd_save := Mem{Base: stackPtr}.Offset(16)
52
53 Comment("Load initial hash state")
54 PINSRD(Imm(3), Mem{Base: digest}.Offset(16), e0)
55 VMOVDQU(Mem{Base: digest}, abcd)
56 PAND(upperMask(), e0)
57 PSHUFD(Imm(0x1b), abcd, abcd)
58
59 VMOVDQA(flipMask(), shufMask)
60
61 Label("loop")
62
63 Comment("Save ABCD and E working values")
64 VMOVDQA(e0, e0_save)
65 VMOVDQA(abcd, abcd_save)
66
67 Comment("Rounds 0-3")
68 VMOVDQU(Mem{Base: data}, msg0)
69 PSHUFB(shufMask, msg0)
70 PADDD(msg0, e0)
71 VMOVDQA(abcd, e1)
72 SHA1RNDS4(Imm(0), e0, abcd)
73
74 Comment("Rounds 4-7")
75 VMOVDQU(Mem{Base: data}.Offset(16), msg1)
76 PSHUFB(shufMask, msg1)
77 SHA1NEXTE(msg1, e1)
78 VMOVDQA(abcd, e0)
79 SHA1RNDS4(Imm(0), e1, abcd)
80 SHA1MSG1(msg1, msg0)
81
82 Comment("Rounds 8-11")
83 VMOVDQU(Mem{Base: data}.Offset(16*2), msg2)
84 PSHUFB(shufMask, msg2)
85 SHA1NEXTE(msg2, e0)
86 VMOVDQA(abcd, e1)
87 SHA1RNDS4(Imm(0), e0, abcd)
88 SHA1MSG1(msg2, msg1)
89 PXOR(msg2, msg0)
90
91
92
93 msgs := []VecVirtual{msg3, msg0, msg1, msg2}
94 for i := range 14 {
95 Comment(fmt.Sprintf("Rounds %d-%d", 12+(i*4), 12+(i*4)+3))
96 a, b := e1, e0
97 if i == 0 {
98 VMOVDQU(Mem{Base: data}.Offset(16*3), msg3)
99 PSHUFB(shufMask, msg3)
100 }
101 if i%2 == 1 {
102 a, b = e0, e1
103 }
104 imm := uint64((12 + i*4) / 20)
105
106 SHA1NEXTE(msgs[i%4], a)
107 VMOVDQA(abcd, b)
108 SHA1MSG2(msgs[i%4], msgs[(1+i)%4])
109 SHA1RNDS4(Imm(imm), a, abcd)
110 SHA1MSG1(msgs[i%4], msgs[(3+i)%4])
111 PXOR(msgs[i%4], msgs[(2+i)%4])
112 }
113
114 Comment("Rounds 68-71")
115 SHA1NEXTE(msg1, e1)
116 VMOVDQA(abcd, e0)
117 SHA1MSG2(msg1, msg2)
118 SHA1RNDS4(Imm(3), e1, abcd)
119 PXOR(msg1, msg3)
120
121 Comment("Rounds 72-75")
122 SHA1NEXTE(msg2, e0)
123 VMOVDQA(abcd, e1)
124 SHA1MSG2(msg2, msg3)
125 SHA1RNDS4(Imm(3), e0, abcd)
126
127 Comment("Rounds 76-79")
128 SHA1NEXTE(msg3, e1)
129 VMOVDQA(abcd, e0)
130 SHA1RNDS4(Imm(3), e1, abcd)
131
132 Comment("Add saved E and ABCD")
133 SHA1NEXTE(e0_save, e0)
134 PADDD(abcd_save, abcd)
135
136 Comment("Check if we are done, if not return to the loop")
137 ADDQ(Imm(64), data)
138 CMPQ(data, len)
139 JNE(LabelRef("loop"))
140
141 Comment("Write the hash state back to digest")
142 PSHUFD(Imm(0x1b), abcd, abcd)
143 VMOVDQU(abcd, Mem{Base: digest})
144 PEXTRD(Imm(3), e0, Mem{Base: digest}.Offset(16))
145
146 Label("done")
147 RET()
148 }
149
150 func flipMask() Mem {
151 mask := GLOBL("shuffle_mask", RODATA)
152
153 DATA(0x00, U64(0x08090a0b0c0d0e0f))
154 DATA(0x08, U64(0x0001020304050607))
155 return mask
156 }
157
158 func upperMask() Mem {
159 mask := GLOBL("upper_mask", RODATA)
160
161 DATA(0x00, U64(0x0000000000000000))
162 DATA(0x08, U64(0xFFFFFFFF00000000))
163 return mask
164 }
165
View as plain text