Text file src/crypto/internal/fips140/sha512/sha512block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA512 block routine. See sha512block.go for Go equivalent.
    10  
    11  #define REGTMP	R30
    12  #define REGTMP1	R16
    13  #define REGTMP2	R17
    14  #define REGTMP3	R18
    15  #define REGTMP4	R7
    16  #define REGTMP5	R6
    17  
    18  // W[i] = M[i]; for 0 <= i <= 15
    19  #define LOAD0(index) \
    20  	MOVV	(index*8)(R5), REGTMP4; \
    21  	REVBV	REGTMP4, REGTMP4; \
    22  	MOVV	REGTMP4, (index*8)(R3)
    23  
    24  // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    25  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    26  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    27  #define LOAD1(index) \
    28  	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    29  	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    30  	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    31  	MOVV	REGTMP4, REGTMP2; \
    32  	MOVV	REGTMP4, REGTMP3; \
    33  	ROTRV	$19, REGTMP4; \
    34  	ROTRV	$61, REGTMP2; \
    35  	SRLV	$6, REGTMP3; \
    36  	XOR	REGTMP2, REGTMP4; \
    37  	XOR	REGTMP3, REGTMP4; \
    38  	ROTRV	$1, REGTMP1, REGTMP5; \
    39  	SRLV	$7, REGTMP1, REGTMP2; \
    40  	ROTRV	$8, REGTMP1; \
    41  	ADDV	REGTMP, REGTMP4; \
    42  	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    43  	XOR	REGTMP1, REGTMP5; \
    44  	XOR	REGTMP2, REGTMP5; \
    45  	ADDV	REGTMP, REGTMP5; \
    46  	ADDV	REGTMP5, REGTMP4; \
    47  	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    48  
    49  // h is also used as an accumulator. Wt is passed in REGTMP4.
    50  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    51  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    52  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    53  //                 = ((y XOR z) AND x) XOR z
    54  // Calculate T1 in REGTMP4
    55  #define SHA512T1(const, e, f, g, h) \
    56  	ADDV	$const, h; \
    57  	ADDV	REGTMP4, h; \
    58  	ROTRV	$14, e, REGTMP5; \
    59  	ROTRV	$18, e, REGTMP; \
    60  	ROTRV	$41, e, REGTMP3; \
    61  	XOR	f, g, REGTMP2; \
    62  	XOR	REGTMP, REGTMP5; \
    63  	AND	e, REGTMP2; \
    64  	XOR	REGTMP5, REGTMP3; \
    65  	XOR	g, REGTMP2; \
    66  	ADDV	REGTMP3, h; \
    67  	ADDV	h, REGTMP2, REGTMP4
    68  
    69  // T2 = BIGSIGMA0(a) + Maj(a, b, c)
    70  // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    71  // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    72  //              = ((y XOR z) AND x) XOR (y AND z)
    73  // Calculate T2 in REGTMP1
    74  #define SHA512T2(a, b, c) \
    75  	ROTRV	$28, a, REGTMP5; \
    76  	ROTRV	$34, a, REGTMP3; \
    77  	ROTRV	$39, a, REGTMP2; \
    78  	XOR	b, c, REGTMP; \
    79  	AND	b, c, REGTMP1; \
    80  	XOR	REGTMP3, REGTMP5; \
    81  	AND	REGTMP, a, REGTMP; \
    82  	XOR	REGTMP2, REGTMP5; \
    83  	XOR	REGTMP, REGTMP1; \
    84  	ADDV	REGTMP5, REGTMP1
    85  
    86  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    87  // The values for e and a are stored in d and h, ready for rotation.
    88  #define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
    89  	SHA512T1(const, e, f, g, h); \
    90  	SHA512T2(a, b, c); \
    91  	ADDV	REGTMP4, d; \
    92  	ADDV	REGTMP1, REGTMP4, h
    93  
    94  #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
    95  	LOAD0(index); \
    96  	SHA512ROUND(const, a, b, c, d, e, f, g, h)
    97  
    98  #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
    99  	LOAD1(index); \
   100  	SHA512ROUND(const, a, b, c, d, e, f, g, h)
   101  
   102  // A stack frame size of 128 bytes is required here, because
   103  // the frame size used for data expansion is 128 bytes.
   104  // See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   105  //
   106  // func block(dig *Digest, p []byte)
   107  TEXT ·block(SB),NOSPLIT,$128-32
   108  	MOVV	p_len+16(FP), R6
   109  	MOVV	p_base+8(FP), R5
   110  	AND	$~127, R6
   111  	BEQ	R6, end
   112  
   113  	// p_len >= 128
   114  	MOVV	dig+0(FP), R4
   115  	ADDV	R5, R6, R25
   116  	MOVV	(0*8)(R4), R8	// a = H0
   117  	MOVV	(1*8)(R4), R9	// b = H1
   118  	MOVV	(2*8)(R4), R10	// c = H2
   119  	MOVV	(3*8)(R4), R11	// d = H3
   120  	MOVV	(4*8)(R4), R12	// e = H4
   121  	MOVV	(5*8)(R4), R13	// f = H5
   122  	MOVV	(6*8)(R4), R14	// g = H6
   123  	MOVV	(7*8)(R4), R15	// h = H7
   124  
   125  loop:
   126  	SHA512ROUND0( 0, 0x428a2f98d728ae22, R8,  R9,  R10, R11, R12, R13, R14, R15)
   127  	SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8,  R9,  R10, R11, R12, R13, R14)
   128  	SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   129  	SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   130  	SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8,  R9,  R10, R11)
   131  	SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8,  R9,  R10)
   132  	SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8,  R9)
   133  	SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9,  R10, R11, R12, R13, R14, R15, R8)
   134  	SHA512ROUND0( 8, 0xd807aa98a3030242, R8,  R9,  R10, R11, R12, R13, R14, R15)
   135  	SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8,  R9,  R10, R11, R12, R13, R14)
   136  	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   137  	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8,  R9,  R10, R11, R12)
   138  	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   139  	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   140  	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8,  R9)
   141  	SHA512ROUND0(15, 0xc19bf174cf692694, R9,  R10, R11, R12, R13, R14, R15, R8)
   142  
   143  	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8,  R9,  R10, R11, R12, R13, R14, R15)
   144  	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8,  R9,  R10, R11, R12, R13, R14)
   145  	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8,  R9,  R10, R11, R12, R13)
   146  	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8,  R9,  R10, R11, R12)
   147  	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8,  R9,  R10, R11)
   148  	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8,  R9,  R10)
   149  	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8,  R9)
   150  	SHA512ROUND1(23, 0x76f988da831153b5, R9,  R10, R11, R12, R13, R14, R15, R8)
   151  	SHA512ROUND1(24, 0x983e5152ee66dfab, R8,  R9,  R10, R11, R12, R13, R14, R15)
   152  	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8,  R9,  R10, R11, R12, R13, R14)
   153  	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   154  	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8,  R9,  R10, R11, R12)
   155  	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8,  R9,  R10, R11)
   156  	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8,  R9,  R10)
   157  	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8,  R9)
   158  	SHA512ROUND1(31, 0x142929670a0e6e70, R9,  R10, R11, R12, R13, R14, R15, R8)
   159  	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   160  	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8,  R9,  R10, R11, R12, R13, R14)
   161  	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8,  R9,  R10, R11, R12, R13)
   162  	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8,  R9,  R10, R11, R12)
   163  	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8,  R9,  R10, R11)
   164  	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8,  R9,  R10)
   165  	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8,  R9)
   166  	SHA512ROUND1(39, 0x92722c851482353b, R9,  R10, R11, R12, R13, R14, R15, R8)
   167  	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8,  R9,  R10, R11, R12, R13, R14, R15)
   168  	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8,  R9,  R10, R11, R12, R13, R14)
   169  	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8,  R9,  R10, R11, R12, R13)
   170  	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8,  R9,  R10, R11, R12)
   171  	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8,  R9,  R10, R11)
   172  	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8,  R9,  R10)
   173  	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8,  R9)
   174  	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9,  R10, R11, R12, R13, R14, R15, R8)
   175  	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   176  	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8,  R9,  R10, R11, R12, R13, R14)
   177  	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8,  R9,  R10, R11, R12, R13)
   178  	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8,  R9,  R10, R11, R12)
   179  	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8,  R9,  R10, R11)
   180  	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   181  	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8,  R9)
   182  	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9,  R10, R11, R12, R13, R14, R15, R8)
   183  	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   184  	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8,  R9,  R10, R11, R12, R13, R14)
   185  	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8,  R9,  R10, R11, R12, R13)
   186  	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8,  R9,  R10, R11, R12)
   187  	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   188  	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8,  R9,  R10)
   189  	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8,  R9)
   190  	SHA512ROUND1(63, 0xc67178f2e372532b, R9,  R10, R11, R12, R13, R14, R15, R8)
   191  	SHA512ROUND1(64, 0xca273eceea26619c, R8,  R9,  R10, R11, R12, R13, R14, R15)
   192  	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8,  R9,  R10, R11, R12, R13, R14)
   193  	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8,  R9,  R10, R11, R12, R13)
   194  	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8,  R9,  R10, R11, R12)
   195  	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8,  R9,  R10, R11)
   196  	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8,  R9,  R10)
   197  	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8,  R9)
   198  	SHA512ROUND1(71, 0x1b710b35131c471b, R9,  R10, R11, R12, R13, R14, R15, R8)
   199  	SHA512ROUND1(72, 0x28db77f523047d84, R8,  R9,  R10, R11, R12, R13, R14, R15)
   200  	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8,  R9,  R10, R11, R12, R13, R14)
   201  	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   202  	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8,  R9,  R10, R11, R12)
   203  	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8,  R9,  R10, R11)
   204  	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   205  	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8,  R9)
   206  	SHA512ROUND1(79, 0x6c44198c4a475817, R9,  R10, R11, R12, R13, R14, R15, R8)
   207  
   208  	MOVV	(0*8)(R4), REGTMP
   209  	MOVV	(1*8)(R4), REGTMP1
   210  	MOVV	(2*8)(R4), REGTMP2
   211  	MOVV	(3*8)(R4), REGTMP3
   212  	ADDV	REGTMP, R8	// H0 = a + H0
   213  	ADDV	REGTMP1, R9	// H1 = b + H1
   214  	ADDV	REGTMP2, R10	// H2 = c + H2
   215  	ADDV	REGTMP3, R11	// H3 = d + H3
   216  	MOVV	R8, (0*8)(R4)
   217  	MOVV	R9, (1*8)(R4)
   218  	MOVV	R10, (2*8)(R4)
   219  	MOVV	R11, (3*8)(R4)
   220  	MOVV	(4*8)(R4), REGTMP
   221  	MOVV	(5*8)(R4), REGTMP1
   222  	MOVV	(6*8)(R4), REGTMP2
   223  	MOVV	(7*8)(R4), REGTMP3
   224  	ADDV	REGTMP, R12	// H4 = e + H4
   225  	ADDV	REGTMP1, R13	// H5 = f + H5
   226  	ADDV	REGTMP2, R14	// H6 = g + H6
   227  	ADDV	REGTMP3, R15	// H7 = h + H7
   228  	MOVV	R12, (4*8)(R4)
   229  	MOVV	R13, (5*8)(R4)
   230  	MOVV	R14, (6*8)(R4)
   231  	MOVV	R15, (7*8)(R4)
   232  
   233  	ADDV	$128, R5
   234  	BNE	R5, R25, loop
   235  
   236  end:
   237  	RET
   238  

View as plain text