Text file src/crypto/internal/fips140/sha512/sha512block_riscv64.s

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA512 block routine. See sha512block.go for Go equivalent.
    10  //
    11  // The algorithm is detailed in FIPS 180-4:
    12  //
    13  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14  //
    15  // Wt = Mt; for 0 <= t <= 15
    16  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    17  //
    18  // a = H0
    19  // b = H1
    20  // c = H2
    21  // d = H3
    22  // e = H4
    23  // f = H5
    24  // g = H6
    25  // h = H7
    26  //
    27  // for t = 0 to 79 {
    28  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    29  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30  //    h = g
    31  //    g = f
    32  //    f = e
    33  //    e = d + T1
    34  //    d = c
    35  //    c = b
    36  //    b = a
    37  //    a = T1 + T2
    38  // }
    39  //
    40  // H0 = a + H0
    41  // H1 = b + H1
    42  // H2 = c + H2
    43  // H3 = d + H3
    44  // H4 = e + H4
    45  // H5 = f + H5
    46  // H6 = g + H6
    47  // H7 = h + H7
    48  
    49  // Wt = Mt; for 0 <= t <= 15
    50  #define MSGSCHEDULE0(index) \
    51  	MOVBU	((index*8)+0)(X29), X5; \
    52  	MOVBU	((index*8)+1)(X29), X6; \
    53  	MOVBU	((index*8)+2)(X29), X7; \
    54  	MOVBU	((index*8)+3)(X29), X8; \
    55  	SLL	$56, X5; \
    56  	SLL	$48, X6; \
    57  	OR	X5, X6, X5; \
    58  	SLL	$40, X7; \
    59  	OR	X5, X7, X5; \
    60  	SLL	$32, X8; \
    61  	OR	X5, X8, X5; \
    62  	MOVBU	((index*8)+4)(X29), X9; \
    63  	MOVBU	((index*8)+5)(X29), X6; \
    64  	MOVBU	((index*8)+6)(X29), X7; \
    65  	MOVBU	((index*8)+7)(X29), X8; \
    66  	SLL	$24, X9; \
    67  	OR	X5, X9, X5; \
    68  	SLL	$16, X6; \
    69  	OR	X5, X6, X5; \
    70  	SLL	$8, X7; \
    71  	OR	X5, X7, X5; \
    72  	OR	X5, X8, X5; \
    73  	MOV	X5, (index*8)(X19)
    74  
    75  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    76  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    77  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    78  #define MSGSCHEDULE1(index) \
    79  	MOV	(((index-2)&0xf)*8)(X19), X5; \
    80  	MOV	(((index-15)&0xf)*8)(X19), X6; \
    81  	MOV	(((index-7)&0xf)*8)(X19), X9; \
    82  	MOV	(((index-16)&0xf)*8)(X19), X21; \
    83  	ROR	$19, X5, X7; \
    84  	ROR	$61, X5, X8; \
    85  	SRL	$6, X5; \
    86  	XOR	X7, X5; \
    87  	XOR	X8, X5; \
    88  	ADD	X9, X5; \
    89  	ROR	$1, X6, X7; \
    90  	ROR	$8, X6, X8; \
    91  	SRL	$7, X6; \
    92  	XOR	X7, X6; \
    93  	XOR	X8, X6; \
    94  	ADD	X6, X5; \
    95  	ADD	X21, X5; \
    96  	MOV	X5, ((index&0xf)*8)(X19)
    97  
    98  // Calculate T1 in X5.
    99  // h is also used as an accumulator. Wt is passed in X5.
   100  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
   101  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
   102  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
   103  //                 = ((y XOR z) AND x) XOR z
   104  #define SHA512T1(index, e, f, g, h) \
   105  	MOV	(index*8)(X18), X8; \
   106  	ADD	X5, h; \
   107  	ROR	$14, e, X6; \
   108  	ADD	X8, h; \
   109  	ROR	$18, e, X7; \
   110  	ROR	$41, e, X8; \
   111  	XOR	X7, X6; \
   112  	XOR	f, g, X5; \
   113  	XOR	X8, X6; \
   114  	AND	e, X5; \
   115  	ADD	X6, h; \
   116  	XOR	g, X5; \
   117  	ADD	h, X5
   118  
   119  // Calculate T2 in X6.
   120  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   121  //     BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
   122  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   123  //                  = ((y XOR z) AND x) XOR (y AND z)
   124  #define SHA512T2(a, b, c) \
   125  	ROR	$28, a, X6; \
   126  	ROR	$34, a, X7; \
   127  	ROR	$39, a, X8; \
   128  	XOR	X7, X6; \
   129  	XOR	b, c, X9; \
   130  	AND	b, c, X7; \
   131  	AND	a, X9; \
   132  	XOR	X8, X6; \
   133  	XOR	X7, X9; \
   134  	ADD	X9, X6
   135  
   136  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   137  // The values for e and a are stored in d and h, ready for rotation.
   138  #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
   139  	SHA512T1(index, e, f, g, h); \
   140  	SHA512T2(a, b, c); \
   141  	ADD	X5, d; \
   142  	ADD	X6, X5, h
   143  
   144  #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
   145  	MSGSCHEDULE0(index); \
   146  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   147  
   148  #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
   149  	MSGSCHEDULE1(index); \
   150  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   151  
   152  // func block(dig *Digest, p []byte)
   153  TEXT ·block(SB),0,$128-32
   154  	MOV	p_base+8(FP), X29
   155  	MOV	p_len+16(FP), X30
   156  	SRL	$7, X30
   157  	SLL	$7, X30
   158  
   159  	ADD	X29, X30, X28
   160  	BEQ	X28, X29, end
   161  
   162  	MOV	$·_K(SB), X18		// const table
   163  	ADD	$8, X2, X19		// message schedule
   164  
   165  	MOV	dig+0(FP), X20
   166  	MOV	(0*8)(X20), X10		// a = H0
   167  	MOV	(1*8)(X20), X11		// b = H1
   168  	MOV	(2*8)(X20), X12		// c = H2
   169  	MOV	(3*8)(X20), X13		// d = H3
   170  	MOV	(4*8)(X20), X14		// e = H4
   171  	MOV	(5*8)(X20), X15		// f = H5
   172  	MOV	(6*8)(X20), X16		// g = H6
   173  	MOV	(7*8)(X20), X17		// h = H7
   174  
   175  loop:
   176  	SHA512ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
   177  	SHA512ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
   178  	SHA512ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
   179  	SHA512ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
   180  	SHA512ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
   181  	SHA512ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
   182  	SHA512ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
   183  	SHA512ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
   184  	SHA512ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
   185  	SHA512ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
   186  	SHA512ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
   187  	SHA512ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
   188  	SHA512ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
   189  	SHA512ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
   190  	SHA512ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
   191  	SHA512ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
   192  
   193  	SHA512ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
   194  	SHA512ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
   195  	SHA512ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
   196  	SHA512ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
   197  	SHA512ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
   198  	SHA512ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
   199  	SHA512ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
   200  	SHA512ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
   201  	SHA512ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
   202  	SHA512ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
   203  	SHA512ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
   204  	SHA512ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
   205  	SHA512ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
   206  	SHA512ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
   207  	SHA512ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
   208  	SHA512ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
   209  	SHA512ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
   210  	SHA512ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
   211  	SHA512ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
   212  	SHA512ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
   213  	SHA512ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
   214  	SHA512ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
   215  	SHA512ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
   216  	SHA512ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
   217  	SHA512ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
   218  	SHA512ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
   219  	SHA512ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
   220  	SHA512ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
   221  	SHA512ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
   222  	SHA512ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
   223  	SHA512ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
   224  	SHA512ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
   225  	SHA512ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
   226  	SHA512ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
   227  	SHA512ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
   228  	SHA512ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
   229  	SHA512ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
   230  	SHA512ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
   231  	SHA512ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
   232  	SHA512ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
   233  	SHA512ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
   234  	SHA512ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
   235  	SHA512ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
   236  	SHA512ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
   237  	SHA512ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
   238  	SHA512ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
   239  	SHA512ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
   240  	SHA512ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
   241  	SHA512ROUND1(64, X10, X11, X12, X13, X14, X15, X16, X17)
   242  	SHA512ROUND1(65, X17, X10, X11, X12, X13, X14, X15, X16)
   243  	SHA512ROUND1(66, X16, X17, X10, X11, X12, X13, X14, X15)
   244  	SHA512ROUND1(67, X15, X16, X17, X10, X11, X12, X13, X14)
   245  	SHA512ROUND1(68, X14, X15, X16, X17, X10, X11, X12, X13)
   246  	SHA512ROUND1(69, X13, X14, X15, X16, X17, X10, X11, X12)
   247  	SHA512ROUND1(70, X12, X13, X14, X15, X16, X17, X10, X11)
   248  	SHA512ROUND1(71, X11, X12, X13, X14, X15, X16, X17, X10)
   249  	SHA512ROUND1(72, X10, X11, X12, X13, X14, X15, X16, X17)
   250  	SHA512ROUND1(73, X17, X10, X11, X12, X13, X14, X15, X16)
   251  	SHA512ROUND1(74, X16, X17, X10, X11, X12, X13, X14, X15)
   252  	SHA512ROUND1(75, X15, X16, X17, X10, X11, X12, X13, X14)
   253  	SHA512ROUND1(76, X14, X15, X16, X17, X10, X11, X12, X13)
   254  	SHA512ROUND1(77, X13, X14, X15, X16, X17, X10, X11, X12)
   255  	SHA512ROUND1(78, X12, X13, X14, X15, X16, X17, X10, X11)
   256  	SHA512ROUND1(79, X11, X12, X13, X14, X15, X16, X17, X10)
   257  
   258  	MOV	(0*8)(X20), X5
   259  	MOV	(1*8)(X20), X6
   260  	MOV	(2*8)(X20), X7
   261  	MOV	(3*8)(X20), X8
   262  	ADD	X5, X10		// H0 = a + H0
   263  	ADD	X6, X11		// H1 = b + H1
   264  	ADD	X7, X12		// H2 = c + H2
   265  	ADD	X8, X13		// H3 = d + H3
   266  	MOV	X10, (0*8)(X20)
   267  	MOV	X11, (1*8)(X20)
   268  	MOV	X12, (2*8)(X20)
   269  	MOV	X13, (3*8)(X20)
   270  	MOV	(4*8)(X20), X5
   271  	MOV	(5*8)(X20), X6
   272  	MOV	(6*8)(X20), X7
   273  	MOV	(7*8)(X20), X8
   274  	ADD	X5, X14		// H4 = e + H4
   275  	ADD	X6, X15		// H5 = f + H5
   276  	ADD	X7, X16		// H6 = g + H6
   277  	ADD	X8, X17		// H7 = h + H7
   278  	MOV	X14, (4*8)(X20)
   279  	MOV	X15, (5*8)(X20)
   280  	MOV	X16, (6*8)(X20)
   281  	MOV	X17, (7*8)(X20)
   282  
   283  	ADD	$128, X29
   284  	BNE	X28, X29, loop
   285  
   286  end:
   287  	RET
   288  

View as plain text