Text file src/crypto/internal/fips140/sha256/sha256block_riscv64.s

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA256 block routine. See sha256block.go for Go equivalent.
    10  //
    11  // The algorithm is detailed in FIPS 180-4:
    12  //
    13  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14  //
    15  // Wt = Mt; for 0 <= t <= 15
    16  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    17  //
    18  // a = H0
    19  // b = H1
    20  // c = H2
    21  // d = H3
    22  // e = H4
    23  // f = H5
    24  // g = H6
    25  // h = H7
    26  //
    27  // for t = 0 to 63 {
    28  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    29  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30  //    h = g
    31  //    g = f
    32  //    f = e
    33  //    e = d + T1
    34  //    d = c
    35  //    c = b
    36  //    b = a
    37  //    a = T1 + T2
    38  // }
    39  //
    40  // H0 = a + H0
    41  // H1 = b + H1
    42  // H2 = c + H2
    43  // H3 = d + H3
    44  // H4 = e + H4
    45  // H5 = f + H5
    46  // H6 = g + H6
    47  // H7 = h + H7
    48  
    49  // Wt = Mt; for 0 <= t <= 15
    50  #define MSGSCHEDULE0(index) \
    51  	MOVBU	((index*4)+0)(X29), X5; \
    52  	MOVBU	((index*4)+1)(X29), X6; \
    53  	MOVBU	((index*4)+2)(X29), X7; \
    54  	MOVBU	((index*4)+3)(X29), X8; \
    55  	SLL	$24, X5; \
    56  	SLL	$16, X6; \
    57  	OR	X5, X6, X5; \
    58  	SLL	$8, X7; \
    59  	OR	X5, X7, X5; \
    60  	OR	X5, X8, X5; \
    61  	MOVW	X5, (index*4)(X19)
    62  
    63  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    64  //   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    65  //   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    66  #define MSGSCHEDULE1(index) \
    67  	MOVWU	(((index-2)&0xf)*4)(X19), X5; \
    68  	MOVWU	(((index-15)&0xf)*4)(X19), X6; \
    69  	MOVWU	(((index-7)&0xf)*4)(X19), X9; \
    70  	MOVWU	(((index-16)&0xf)*4)(X19), X21; \
    71  	RORW	$17, X5, X7; \
    72  	RORW	$19, X5, X8; \
    73  	SRL	$10, X5; \
    74  	XOR	X7, X5; \
    75  	XOR	X8, X5; \
    76  	ADD	X9, X5; \
    77  	RORW	$7, X6, X7; \
    78  	RORW	$18, X6, X8; \
    79  	SRL	$3, X6; \
    80  	XOR	X7, X6; \
    81  	XOR	X8, X6; \
    82  	ADD	X6, X5; \
    83  	ADD	X21, X5; \
    84  	MOVW	X5, ((index&0xf)*4)(X19)
    85  
    86  // Calculate T1 in X5.
    87  // h is also used as an accumulator. Wt is passed in X5.
    88  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
    89  //     BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    90  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    91  //                 = ((y XOR z) AND x) XOR z
    92  #define SHA256T1(index, e, f, g, h) \
    93  	MOVWU	(index*4)(X18), X8; \
    94  	ADD	X5, h; \
    95  	RORW	$6, e, X6; \
    96  	ADD	X8, h; \
    97  	RORW	$11, e, X7; \
    98  	RORW	$25, e, X8; \
    99  	XOR	X7, X6; \
   100  	XOR	f, g, X5; \
   101  	XOR	X8, X6; \
   102  	AND	e, X5; \
   103  	ADD	X6, h; \
   104  	XOR	g, X5; \
   105  	ADD	h, X5
   106  
   107  // Calculate T2 in X6.
   108  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   109  //     BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   110  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   111  //                  = ((y XOR z) AND x) XOR (y AND z)
   112  #define SHA256T2(a, b, c) \
   113  	RORW	$2, a, X6; \
   114  	RORW	$13, a, X7; \
   115  	RORW	$22, a, X8; \
   116  	XOR	X7, X6; \
   117  	XOR	b, c, X9; \
   118  	AND	b, c, X7; \
   119  	AND	a, X9; \
   120  	XOR	X8, X6; \
   121  	XOR	X7, X9; \
   122  	ADD	X9, X6
   123  
   124  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   125  // The values for e and a are stored in d and h, ready for rotation.
   126  #define SHA256ROUND(index, a, b, c, d, e, f, g, h) \
   127  	SHA256T1(index, e, f, g, h); \
   128  	SHA256T2(a, b, c); \
   129  	ADD	X5, d; \
   130  	ADD	X6, X5, h
   131  
   132  #define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \
   133  	MSGSCHEDULE0(index); \
   134  	SHA256ROUND(index, a, b, c, d, e, f, g, h)
   135  
   136  #define SHA256ROUND1(index, a, b, c, d, e, f, g, h) \
   137  	MSGSCHEDULE1(index); \
   138  	SHA256ROUND(index, a, b, c, d, e, f, g, h)
   139  
   140  // Note that 64 bytes of stack space is used as a circular buffer
   141  // for the message schedule (4 bytes * 16 entries).
   142  //
   143  // func block(dig *Digest, p []byte)
   144  TEXT ·block(SB),0,$64-32
   145  	MOV	p_base+8(FP), X29
   146  	MOV	p_len+16(FP), X30
   147  	SRL	$6, X30
   148  	SLL	$6, X30
   149  
   150  	ADD	X29, X30, X28
   151  	BEQ	X28, X29, end
   152  
   153  	MOV	$·_K(SB), X18		// const table
   154  	ADD	$8, X2, X19		// message schedule
   155  
   156  	MOV	dig+0(FP), X20
   157  	MOVWU	(0*4)(X20), X10		// a = H0
   158  	MOVWU	(1*4)(X20), X11		// b = H1
   159  	MOVWU	(2*4)(X20), X12		// c = H2
   160  	MOVWU	(3*4)(X20), X13		// d = H3
   161  	MOVWU	(4*4)(X20), X14		// e = H4
   162  	MOVWU	(5*4)(X20), X15		// f = H5
   163  	MOVWU	(6*4)(X20), X16		// g = H6
   164  	MOVWU	(7*4)(X20), X17		// h = H7
   165  
   166  loop:
   167  	SHA256ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
   168  	SHA256ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
   169  	SHA256ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
   170  	SHA256ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
   171  	SHA256ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
   172  	SHA256ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
   173  	SHA256ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
   174  	SHA256ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
   175  	SHA256ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
   176  	SHA256ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
   177  	SHA256ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
   178  	SHA256ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
   179  	SHA256ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
   180  	SHA256ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
   181  	SHA256ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
   182  	SHA256ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
   183  
   184  	SHA256ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
   185  	SHA256ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
   186  	SHA256ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
   187  	SHA256ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
   188  	SHA256ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
   189  	SHA256ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
   190  	SHA256ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
   191  	SHA256ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
   192  	SHA256ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
   193  	SHA256ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
   194  	SHA256ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
   195  	SHA256ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
   196  	SHA256ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
   197  	SHA256ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
   198  	SHA256ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
   199  	SHA256ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
   200  	SHA256ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
   201  	SHA256ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
   202  	SHA256ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
   203  	SHA256ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
   204  	SHA256ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
   205  	SHA256ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
   206  	SHA256ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
   207  	SHA256ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
   208  	SHA256ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
   209  	SHA256ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
   210  	SHA256ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
   211  	SHA256ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
   212  	SHA256ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
   213  	SHA256ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
   214  	SHA256ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
   215  	SHA256ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
   216  	SHA256ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
   217  	SHA256ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
   218  	SHA256ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
   219  	SHA256ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
   220  	SHA256ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
   221  	SHA256ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
   222  	SHA256ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
   223  	SHA256ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
   224  	SHA256ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
   225  	SHA256ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
   226  	SHA256ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
   227  	SHA256ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
   228  	SHA256ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
   229  	SHA256ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
   230  	SHA256ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
   231  	SHA256ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
   232  
   233  	MOVWU	(0*4)(X20), X5
   234  	MOVWU	(1*4)(X20), X6
   235  	MOVWU	(2*4)(X20), X7
   236  	MOVWU	(3*4)(X20), X8
   237  	ADD	X5, X10		// H0 = a + H0
   238  	ADD	X6, X11		// H1 = b + H1
   239  	ADD	X7, X12		// H2 = c + H2
   240  	ADD	X8, X13		// H3 = d + H3
   241  	MOVW	X10, (0*4)(X20)
   242  	MOVW	X11, (1*4)(X20)
   243  	MOVW	X12, (2*4)(X20)
   244  	MOVW	X13, (3*4)(X20)
   245  	MOVWU	(4*4)(X20), X5
   246  	MOVWU	(5*4)(X20), X6
   247  	MOVWU	(6*4)(X20), X7
   248  	MOVWU	(7*4)(X20), X8
   249  	ADD	X5, X14		// H4 = e + H4
   250  	ADD	X6, X15		// H5 = f + H5
   251  	ADD	X7, X16		// H6 = g + H6
   252  	ADD	X8, X17		// H7 = h + H7
   253  	MOVW	X14, (4*4)(X20)
   254  	MOVW	X15, (5*4)(X20)
   255  	MOVW	X16, (6*4)(X20)
   256  	MOVW	X17, (7*4)(X20)
   257  
   258  	ADD	$64, X29
   259  	BNE	X28, X29, loop
   260  
   261  end:
   262  	RET
   263  

View as plain text