File: | crypto/aes.c |
Warning: | line 864, column 6 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: aes.c,v 1.2 2020/07/22 13:54:30 tobhe Exp $ */ | |||
2 | /* | |||
3 | * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> | |||
4 | * | |||
5 | * Modified for OpenBSD by Thomas Pornin and Mike Belopuhov. | |||
6 | * | |||
7 | * Permission is hereby granted, free of charge, to any person obtaining | |||
8 | * a copy of this software and associated documentation files (the | |||
9 | * "Software"), to deal in the Software without restriction, including | |||
10 | * without limitation the rights to use, copy, modify, merge, publish, | |||
11 | * distribute, sublicense, and/or sell copies of the Software, and to | |||
12 | * permit persons to whom the Software is furnished to do so, subject to | |||
13 | * the following conditions: | |||
14 | * | |||
15 | * The above copyright notice and this permission notice shall be | |||
16 | * included in all copies or substantial portions of the Software. | |||
17 | * | |||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |||
21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |||
22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |||
23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |||
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
25 | * SOFTWARE. | |||
26 | */ | |||
27 | ||||
28 | #include <sys/types.h> | |||
29 | #include <sys/systm.h> | |||
30 | #include <sys/stdint.h> | |||
31 | ||||
32 | #include "aes.h" | |||
33 | ||||
34 | static inline void | |||
35 | enc32le(void *dst, uint32_t x) | |||
36 | { | |||
37 | unsigned char *buf = dst; | |||
38 | ||||
39 | buf[0] = (unsigned char)x; | |||
40 | buf[1] = (unsigned char)(x >> 8); | |||
41 | buf[2] = (unsigned char)(x >> 16); | |||
42 | buf[3] = (unsigned char)(x >> 24); | |||
43 | } | |||
44 | ||||
45 | static inline uint32_t | |||
46 | dec32le(const void *src) | |||
47 | { | |||
48 | const unsigned char *buf = src; | |||
49 | ||||
50 | return (uint32_t)buf[0] | |||
51 | | ((uint32_t)buf[1] << 8) | |||
52 | | ((uint32_t)buf[2] << 16) | |||
53 | | ((uint32_t)buf[3] << 24); | |||
54 | } | |||
55 | ||||
56 | /* | |||
57 | * This constant-time implementation is "bitsliced": the 128-bit state is | |||
58 | * split over eight 32-bit words q* in the following way: | |||
59 | * | |||
60 | * -- Input block consists in 16 bytes: | |||
61 | * a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a33 | |||
62 | * In the terminology of FIPS 197, this is a 4x4 matrix which is read | |||
63 | * column by column. | |||
64 | * | |||
65 | * -- Each byte is split into eight bits which are distributed over the | |||
66 | * eight words, at the same rank. Thus, for a byte x at rank k, bit 0 | |||
67 | * (least significant) of x will be at rank k in q0 (if that bit is b, | |||
68 | * then it contributes "b << k" to the value of q0), bit 1 of x will be | |||
69 | * at rank k in q1, and so on. | |||
70 | * | |||
71 | * -- Ranks given to bits are in "row order" and are either all even, or | |||
72 | * all odd. Two independent AES states are thus interleaved, one using | |||
73 | * the even ranks, the other the odd ranks. Row order means: | |||
74 | * a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a33 | |||
75 | * | |||
76 | * Converting input bytes from two AES blocks to bitslice representation | |||
77 | * is done in the following way: | |||
78 | * -- Decode first block into the four words q0 q2 q4 q6, in that order, | |||
79 | * using little-endian convention. | |||
80 | * -- Decode second block into the four words q1 q3 q5 q7, in that order, | |||
81 | * using little-endian convention. | |||
82 | * -- Call aes_ct_ortho(). | |||
83 | * | |||
84 | * Converting back to bytes is done by using the reverse operations. Note | |||
85 | * that aes_ct_ortho() is its own inverse. | |||
86 | */ | |||
87 | ||||
88 | /* | |||
89 | * The AES S-box, as a bitsliced constant-time version. The input array | |||
90 | * consists in eight 32-bit words; 32 S-box instances are computed in | |||
91 | * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant) | |||
92 | * are spread over the words 0 to 7, at the same rank. | |||
93 | */ | |||
94 | static void | |||
95 | aes_ct_bitslice_Sbox(uint32_t *q) | |||
96 | { | |||
97 | /* | |||
98 | * This S-box implementation is a straightforward translation of | |||
99 | * the circuit described by Boyar and Peralta in "A new | |||
100 | * combinational logic minimization technique with applications | |||
101 | * to cryptology" (https://eprint.iacr.org/2009/191.pdf). | |||
102 | * | |||
103 | * Note that variables x* (input) and s* (output) are numbered | |||
104 | * in "reverse" order (x0 is the high bit, x7 is the low bit). | |||
105 | */ | |||
106 | ||||
107 | uint32_t x0, x1, x2, x3, x4, x5, x6, x7; | |||
108 | uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||
109 | uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; | |||
110 | uint32_t y20, y21; | |||
111 | uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; | |||
112 | uint32_t z10, z11, z12, z13, z14, z15, z16, z17; | |||
113 | uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; | |||
114 | uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; | |||
115 | uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; | |||
116 | uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; | |||
117 | uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; | |||
118 | uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; | |||
119 | uint32_t t60, t61, t62, t63, t64, t65, t66, t67; | |||
120 | uint32_t s0, s1, s2, s3, s4, s5, s6, s7; | |||
121 | ||||
122 | x0 = q[7]; | |||
123 | x1 = q[6]; | |||
124 | x2 = q[5]; | |||
125 | x3 = q[4]; | |||
126 | x4 = q[3]; | |||
127 | x5 = q[2]; | |||
128 | x6 = q[1]; | |||
129 | x7 = q[0]; | |||
130 | ||||
131 | /* | |||
132 | * Top linear transformation. | |||
133 | */ | |||
134 | y14 = x3 ^ x5; | |||
135 | y13 = x0 ^ x6; | |||
136 | y9 = x0 ^ x3; | |||
137 | y8 = x0 ^ x5; | |||
138 | t0 = x1 ^ x2; | |||
139 | y1 = t0 ^ x7; | |||
140 | y4 = y1 ^ x3; | |||
141 | y12 = y13 ^ y14; | |||
142 | y2 = y1 ^ x0; | |||
143 | y5 = y1 ^ x6; | |||
144 | y3 = y5 ^ y8; | |||
145 | t1 = x4 ^ y12; | |||
146 | y15 = t1 ^ x5; | |||
147 | y20 = t1 ^ x1; | |||
148 | y6 = y15 ^ x7; | |||
149 | y10 = y15 ^ t0; | |||
150 | y11 = y20 ^ y9; | |||
151 | y7 = x7 ^ y11; | |||
152 | y17 = y10 ^ y11; | |||
153 | y19 = y10 ^ y8; | |||
154 | y16 = t0 ^ y11; | |||
155 | y21 = y13 ^ y16; | |||
156 | y18 = x0 ^ y16; | |||
157 | ||||
158 | /* | |||
159 | * Non-linear section. | |||
160 | */ | |||
161 | t2 = y12 & y15; | |||
162 | t3 = y3 & y6; | |||
163 | t4 = t3 ^ t2; | |||
164 | t5 = y4 & x7; | |||
165 | t6 = t5 ^ t2; | |||
166 | t7 = y13 & y16; | |||
167 | t8 = y5 & y1; | |||
168 | t9 = t8 ^ t7; | |||
169 | t10 = y2 & y7; | |||
170 | t11 = t10 ^ t7; | |||
171 | t12 = y9 & y11; | |||
172 | t13 = y14 & y17; | |||
173 | t14 = t13 ^ t12; | |||
174 | t15 = y8 & y10; | |||
175 | t16 = t15 ^ t12; | |||
176 | t17 = t4 ^ t14; | |||
177 | t18 = t6 ^ t16; | |||
178 | t19 = t9 ^ t14; | |||
179 | t20 = t11 ^ t16; | |||
180 | t21 = t17 ^ y20; | |||
181 | t22 = t18 ^ y19; | |||
182 | t23 = t19 ^ y21; | |||
183 | t24 = t20 ^ y18; | |||
184 | ||||
185 | t25 = t21 ^ t22; | |||
186 | t26 = t21 & t23; | |||
187 | t27 = t24 ^ t26; | |||
188 | t28 = t25 & t27; | |||
189 | t29 = t28 ^ t22; | |||
190 | t30 = t23 ^ t24; | |||
191 | t31 = t22 ^ t26; | |||
192 | t32 = t31 & t30; | |||
193 | t33 = t32 ^ t24; | |||
194 | t34 = t23 ^ t33; | |||
195 | t35 = t27 ^ t33; | |||
196 | t36 = t24 & t35; | |||
197 | t37 = t36 ^ t34; | |||
198 | t38 = t27 ^ t36; | |||
199 | t39 = t29 & t38; | |||
200 | t40 = t25 ^ t39; | |||
201 | ||||
202 | t41 = t40 ^ t37; | |||
203 | t42 = t29 ^ t33; | |||
204 | t43 = t29 ^ t40; | |||
205 | t44 = t33 ^ t37; | |||
206 | t45 = t42 ^ t41; | |||
207 | z0 = t44 & y15; | |||
208 | z1 = t37 & y6; | |||
209 | z2 = t33 & x7; | |||
210 | z3 = t43 & y16; | |||
211 | z4 = t40 & y1; | |||
212 | z5 = t29 & y7; | |||
213 | z6 = t42 & y11; | |||
214 | z7 = t45 & y17; | |||
215 | z8 = t41 & y10; | |||
216 | z9 = t44 & y12; | |||
217 | z10 = t37 & y3; | |||
218 | z11 = t33 & y4; | |||
219 | z12 = t43 & y13; | |||
220 | z13 = t40 & y5; | |||
221 | z14 = t29 & y2; | |||
222 | z15 = t42 & y9; | |||
223 | z16 = t45 & y14; | |||
224 | z17 = t41 & y8; | |||
225 | ||||
226 | /* | |||
227 | * Bottom linear transformation. | |||
228 | */ | |||
229 | t46 = z15 ^ z16; | |||
230 | t47 = z10 ^ z11; | |||
231 | t48 = z5 ^ z13; | |||
232 | t49 = z9 ^ z10; | |||
233 | t50 = z2 ^ z12; | |||
234 | t51 = z2 ^ z5; | |||
235 | t52 = z7 ^ z8; | |||
236 | t53 = z0 ^ z3; | |||
237 | t54 = z6 ^ z7; | |||
238 | t55 = z16 ^ z17; | |||
239 | t56 = z12 ^ t48; | |||
240 | t57 = t50 ^ t53; | |||
241 | t58 = z4 ^ t46; | |||
242 | t59 = z3 ^ t54; | |||
243 | t60 = t46 ^ t57; | |||
244 | t61 = z14 ^ t57; | |||
245 | t62 = t52 ^ t58; | |||
246 | t63 = t49 ^ t58; | |||
247 | t64 = z4 ^ t59; | |||
248 | t65 = t61 ^ t62; | |||
249 | t66 = z1 ^ t63; | |||
250 | s0 = t59 ^ t63; | |||
251 | s6 = t56 ^ ~t62; | |||
252 | s7 = t48 ^ ~t60; | |||
253 | t67 = t64 ^ t65; | |||
254 | s3 = t53 ^ t66; | |||
255 | s4 = t51 ^ t66; | |||
256 | s5 = t47 ^ t65; | |||
257 | s1 = t64 ^ ~s3; | |||
258 | s2 = t55 ^ ~t67; | |||
259 | ||||
260 | q[7] = s0; | |||
261 | q[6] = s1; | |||
262 | q[5] = s2; | |||
263 | q[4] = s3; | |||
264 | q[3] = s4; | |||
265 | q[2] = s5; | |||
266 | q[1] = s6; | |||
267 | q[0] = s7; | |||
268 | } | |||
269 | ||||
270 | /* | |||
271 | * Perform bytewise orthogonalization of eight 32-bit words. Bytes | |||
272 | * of q0..q7 are spread over all words: for a byte x that occurs | |||
273 | * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit | |||
274 | * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j. | |||
275 | * | |||
276 | * This operation is an involution. | |||
277 | */ | |||
278 | static void | |||
279 | aes_ct_ortho(uint32_t *q) | |||
280 | { | |||
281 | #define SWAPN(cl, ch, s, x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )cl) | ((b & (uint32_t)cl) << (s)); (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); } while (0) do { \ | |||
282 | uint32_t a, b; \ | |||
283 | a = (x); \ | |||
284 | b = (y); \ | |||
285 | (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \ | |||
286 | (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \ | |||
287 | } while (0) | |||
288 | ||||
289 | #define SWAP2(x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (y) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0) SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (y) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0) | |||
290 | #define SWAP4(x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (y) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0) SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (y) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0) | |||
291 | #define SWAP8(x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (y) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0) SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)do { uint32_t a, b; a = (x); b = (y); (x) = (a & (uint32_t )0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (y) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0) | |||
292 | ||||
293 | SWAP2(q[0], q[1])do { uint32_t a, b; a = (q[0]); b = (q[1]); (q[0]) = (a & (uint32_t)0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (q[1]) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0); | |||
294 | SWAP2(q[2], q[3])do { uint32_t a, b; a = (q[2]); b = (q[3]); (q[2]) = (a & (uint32_t)0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (q[3]) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0); | |||
295 | SWAP2(q[4], q[5])do { uint32_t a, b; a = (q[4]); b = (q[5]); (q[4]) = (a & (uint32_t)0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (q[5]) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0); | |||
296 | SWAP2(q[6], q[7])do { uint32_t a, b; a = (q[6]); b = (q[7]); (q[6]) = (a & (uint32_t)0x55555555) | ((b & (uint32_t)0x55555555) << (1)); (q[7]) = ((a & (uint32_t)0xAAAAAAAA) >> (1)) | (b & (uint32_t)0xAAAAAAAA); } while (0); | |||
297 | ||||
298 | SWAP4(q[0], q[2])do { uint32_t a, b; a = (q[0]); b = (q[2]); (q[0]) = (a & (uint32_t)0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (q[2]) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0); | |||
299 | SWAP4(q[1], q[3])do { uint32_t a, b; a = (q[1]); b = (q[3]); (q[1]) = (a & (uint32_t)0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (q[3]) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0); | |||
300 | SWAP4(q[4], q[6])do { uint32_t a, b; a = (q[4]); b = (q[6]); (q[4]) = (a & (uint32_t)0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (q[6]) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0); | |||
301 | SWAP4(q[5], q[7])do { uint32_t a, b; a = (q[5]); b = (q[7]); (q[5]) = (a & (uint32_t)0x33333333) | ((b & (uint32_t)0x33333333) << (2)); (q[7]) = ((a & (uint32_t)0xCCCCCCCC) >> (2)) | (b & (uint32_t)0xCCCCCCCC); } while (0); | |||
302 | ||||
303 | SWAP8(q[0], q[4])do { uint32_t a, b; a = (q[0]); b = (q[4]); (q[0]) = (a & (uint32_t)0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (q[4]) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0); | |||
304 | SWAP8(q[1], q[5])do { uint32_t a, b; a = (q[1]); b = (q[5]); (q[1]) = (a & (uint32_t)0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (q[5]) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0); | |||
305 | SWAP8(q[2], q[6])do { uint32_t a, b; a = (q[2]); b = (q[6]); (q[2]) = (a & (uint32_t)0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (q[6]) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0); | |||
306 | SWAP8(q[3], q[7])do { uint32_t a, b; a = (q[3]); b = (q[7]); (q[3]) = (a & (uint32_t)0x0F0F0F0F) | ((b & (uint32_t)0x0F0F0F0F) << (4)); (q[7]) = ((a & (uint32_t)0xF0F0F0F0) >> (4)) | (b & (uint32_t)0xF0F0F0F0); } while (0); | |||
307 | } | |||
308 | ||||
309 | static inline uint32_t | |||
310 | sub_word(uint32_t x) | |||
311 | { | |||
312 | uint32_t q[8]; | |||
313 | int i; | |||
314 | ||||
315 | for (i = 0; i < 8; i ++) { | |||
316 | q[i] = x; | |||
317 | } | |||
318 | aes_ct_ortho(q); | |||
319 | aes_ct_bitslice_Sbox(q); | |||
320 | aes_ct_ortho(q); | |||
321 | return q[0]; | |||
322 | } | |||
323 | ||||
324 | static const unsigned char Rcon[] = { | |||
325 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36 | |||
326 | }; | |||
327 | ||||
328 | /* | |||
329 | * Base key schedule code. The function sub_word() must be defined | |||
330 | * below. Subkeys are produced in little-endian convention (but not | |||
331 | * bitsliced). Key length is expressed in bytes. | |||
332 | */ | |||
333 | static unsigned | |||
334 | aes_keysched_base(uint32_t *skey, const void *key, size_t key_len) | |||
335 | { | |||
336 | unsigned num_rounds; | |||
337 | int i, j, k, nk, nkf; | |||
338 | uint32_t tmp; | |||
339 | ||||
340 | switch (key_len) { | |||
341 | case 16: | |||
342 | num_rounds = 10; | |||
343 | break; | |||
344 | case 24: | |||
345 | num_rounds = 12; | |||
346 | break; | |||
347 | case 32: | |||
348 | num_rounds = 14; | |||
349 | break; | |||
350 | default: | |||
351 | return 0; | |||
352 | } | |||
353 | nk = (int)(key_len >> 2); | |||
354 | nkf = (int)((num_rounds + 1) << 2); | |||
355 | for (i = 0; i < nk; i ++) { | |||
356 | tmp = dec32le((const unsigned char *)key + (i << 2)); | |||
357 | skey[i] = tmp; | |||
358 | } | |||
359 | tmp = skey[(key_len >> 2) - 1]; | |||
360 | for (i = nk, j = 0, k = 0; i < nkf; i ++) { | |||
361 | if (j == 0) { | |||
362 | tmp = (tmp << 24) | (tmp >> 8); | |||
363 | tmp = sub_word(tmp) ^ Rcon[k]; | |||
364 | } else if (nk > 6 && j == 4) { | |||
365 | tmp = sub_word(tmp); | |||
366 | } | |||
367 | tmp ^= skey[i - nk]; | |||
368 | skey[i] = tmp; | |||
369 | if (++ j == nk) { | |||
370 | j = 0; | |||
371 | k ++; | |||
372 | } | |||
373 | } | |||
374 | return num_rounds; | |||
375 | } | |||
376 | ||||
377 | /* | |||
378 | * AES key schedule, constant-time version. skey[] is filled with n+1 | |||
379 | * 128-bit subkeys, where n is the number of rounds (10 to 14, depending | |||
380 | * on key size). The number of rounds is returned. If the key size is | |||
381 | * invalid (not 16, 24 or 32), then 0 is returned. | |||
382 | */ | |||
383 | unsigned | |||
384 | aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len) | |||
385 | { | |||
386 | uint32_t skey[60]; | |||
387 | unsigned u, num_rounds; | |||
388 | ||||
389 | num_rounds = aes_keysched_base(skey, key, key_len); | |||
390 | for (u = 0; u <= num_rounds; u ++) { | |||
391 | uint32_t q[8]; | |||
392 | ||||
393 | q[0] = q[1] = skey[(u << 2) + 0]; | |||
394 | q[2] = q[3] = skey[(u << 2) + 1]; | |||
395 | q[4] = q[5] = skey[(u << 2) + 2]; | |||
396 | q[6] = q[7] = skey[(u << 2) + 3]; | |||
397 | aes_ct_ortho(q); | |||
398 | comp_skey[(u << 2) + 0] = | |||
399 | (q[0] & 0x55555555) | (q[1] & 0xAAAAAAAA); | |||
400 | comp_skey[(u << 2) + 1] = | |||
401 | (q[2] & 0x55555555) | (q[3] & 0xAAAAAAAA); | |||
402 | comp_skey[(u << 2) + 2] = | |||
403 | (q[4] & 0x55555555) | (q[5] & 0xAAAAAAAA); | |||
404 | comp_skey[(u << 2) + 3] = | |||
405 | (q[6] & 0x55555555) | (q[7] & 0xAAAAAAAA); | |||
406 | } | |||
407 | return num_rounds; | |||
408 | } | |||
409 | ||||
410 | /* | |||
411 | * Expand AES subkeys as produced by aes_ct_keysched(), into | |||
412 | * a larger array suitable for aes_ct_bitslice_encrypt() and | |||
413 | * aes_ct_bitslice_decrypt(). | |||
414 | */ | |||
415 | void | |||
416 | aes_ct_skey_expand(uint32_t *skey, | |||
417 | unsigned num_rounds, const uint32_t *comp_skey) | |||
418 | { | |||
419 | unsigned u, v, n; | |||
420 | ||||
421 | n = (num_rounds + 1) << 2; | |||
422 | for (u = 0, v = 0; u < n; u ++, v += 2) { | |||
423 | uint32_t x, y; | |||
424 | ||||
425 | x = y = comp_skey[u]; | |||
426 | x &= 0x55555555; | |||
427 | skey[v + 0] = x | (x << 1); | |||
428 | y &= 0xAAAAAAAA; | |||
429 | skey[v + 1] = y | (y >> 1); | |||
430 | } | |||
431 | } | |||
432 | ||||
433 | static inline void | |||
434 | add_round_key(uint32_t *q, const uint32_t *sk) | |||
435 | { | |||
436 | q[0] ^= sk[0]; | |||
437 | q[1] ^= sk[1]; | |||
438 | q[2] ^= sk[2]; | |||
439 | q[3] ^= sk[3]; | |||
440 | q[4] ^= sk[4]; | |||
441 | q[5] ^= sk[5]; | |||
442 | q[6] ^= sk[6]; | |||
443 | q[7] ^= sk[7]; | |||
444 | } | |||
445 | ||||
446 | static inline void | |||
447 | shift_rows(uint32_t *q) | |||
448 | { | |||
449 | int i; | |||
450 | ||||
451 | for (i = 0; i < 8; i ++) { | |||
452 | uint32_t x; | |||
453 | ||||
454 | x = q[i]; | |||
455 | q[i] = (x & 0x000000FF) | |||
456 | | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6) | |||
457 | | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4) | |||
458 | | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2); | |||
459 | } | |||
460 | } | |||
461 | ||||
462 | static inline uint32_t | |||
463 | rotr16(uint32_t x) | |||
464 | { | |||
465 | return (x << 16) | (x >> 16); | |||
466 | } | |||
467 | ||||
468 | static inline void | |||
469 | mix_columns(uint32_t *q) | |||
470 | { | |||
471 | uint32_t q0, q1, q2, q3, q4, q5, q6, q7; | |||
472 | uint32_t r0, r1, r2, r3, r4, r5, r6, r7; | |||
473 | ||||
474 | q0 = q[0]; | |||
475 | q1 = q[1]; | |||
476 | q2 = q[2]; | |||
477 | q3 = q[3]; | |||
478 | q4 = q[4]; | |||
479 | q5 = q[5]; | |||
480 | q6 = q[6]; | |||
481 | q7 = q[7]; | |||
482 | r0 = (q0 >> 8) | (q0 << 24); | |||
483 | r1 = (q1 >> 8) | (q1 << 24); | |||
484 | r2 = (q2 >> 8) | (q2 << 24); | |||
485 | r3 = (q3 >> 8) | (q3 << 24); | |||
486 | r4 = (q4 >> 8) | (q4 << 24); | |||
487 | r5 = (q5 >> 8) | (q5 << 24); | |||
488 | r6 = (q6 >> 8) | (q6 << 24); | |||
489 | r7 = (q7 >> 8) | (q7 << 24); | |||
490 | ||||
491 | q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0); | |||
492 | q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1); | |||
493 | q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2); | |||
494 | q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3); | |||
495 | q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4); | |||
496 | q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5); | |||
497 | q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6); | |||
498 | q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7); | |||
499 | } | |||
500 | ||||
501 | /* | |||
502 | * Compute AES encryption on bitsliced data. Since input is stored on | |||
503 | * eight 32-bit words, two block encryptions are actually performed | |||
504 | * in parallel. | |||
505 | */ | |||
506 | void | |||
507 | aes_ct_bitslice_encrypt(unsigned num_rounds, | |||
508 | const uint32_t *skey, uint32_t *q) | |||
509 | { | |||
510 | unsigned u; | |||
511 | ||||
512 | add_round_key(q, skey); | |||
513 | for (u = 1; u < num_rounds; u ++) { | |||
514 | aes_ct_bitslice_Sbox(q); | |||
515 | shift_rows(q); | |||
516 | mix_columns(q); | |||
517 | add_round_key(q, skey + (u << 3)); | |||
518 | } | |||
519 | aes_ct_bitslice_Sbox(q); | |||
520 | shift_rows(q); | |||
521 | add_round_key(q, skey + (num_rounds << 3)); | |||
522 | } | |||
523 | ||||
524 | /* | |||
525 | * Like aes_ct_bitslice_Sbox(), but for the inverse S-box. | |||
526 | */ | |||
527 | void | |||
528 | aes_ct_bitslice_invSbox(uint32_t *q) | |||
529 | { | |||
530 | /* | |||
531 | * AES S-box is: | |||
532 | * S(x) = A(I(x)) ^ 0x63 | |||
533 | * where I() is inversion in GF(256), and A() is a linear | |||
534 | * transform (0 is formally defined to be its own inverse). | |||
535 | * Since inversion is an involution, the inverse S-box can be | |||
536 | * computed from the S-box as: | |||
537 | * iS(x) = B(S(B(x ^ 0x63)) ^ 0x63) | |||
538 | * where B() is the inverse of A(). Indeed, for any y in GF(256): | |||
539 | * iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y | |||
540 | * | |||
541 | * Note: we reuse the implementation of the forward S-box, | |||
542 | * instead of duplicating it here, so that total code size is | |||
543 | * lower. By merging the B() transforms into the S-box circuit | |||
544 | * we could make faster CBC decryption, but CBC decryption is | |||
545 | * already quite faster than CBC encryption because we can | |||
546 | * process two blocks in parallel. | |||
547 | */ | |||
548 | uint32_t q0, q1, q2, q3, q4, q5, q6, q7; | |||
549 | ||||
550 | q0 = ~q[0]; | |||
551 | q1 = ~q[1]; | |||
552 | q2 = q[2]; | |||
553 | q3 = q[3]; | |||
554 | q4 = q[4]; | |||
555 | q5 = ~q[5]; | |||
556 | q6 = ~q[6]; | |||
557 | q7 = q[7]; | |||
558 | q[7] = q1 ^ q4 ^ q6; | |||
559 | q[6] = q0 ^ q3 ^ q5; | |||
560 | q[5] = q7 ^ q2 ^ q4; | |||
561 | q[4] = q6 ^ q1 ^ q3; | |||
562 | q[3] = q5 ^ q0 ^ q2; | |||
563 | q[2] = q4 ^ q7 ^ q1; | |||
564 | q[1] = q3 ^ q6 ^ q0; | |||
565 | q[0] = q2 ^ q5 ^ q7; | |||
566 | ||||
567 | aes_ct_bitslice_Sbox(q); | |||
568 | ||||
569 | q0 = ~q[0]; | |||
570 | q1 = ~q[1]; | |||
571 | q2 = q[2]; | |||
572 | q3 = q[3]; | |||
573 | q4 = q[4]; | |||
574 | q5 = ~q[5]; | |||
575 | q6 = ~q[6]; | |||
576 | q7 = q[7]; | |||
577 | q[7] = q1 ^ q4 ^ q6; | |||
578 | q[6] = q0 ^ q3 ^ q5; | |||
579 | q[5] = q7 ^ q2 ^ q4; | |||
580 | q[4] = q6 ^ q1 ^ q3; | |||
581 | q[3] = q5 ^ q0 ^ q2; | |||
582 | q[2] = q4 ^ q7 ^ q1; | |||
583 | q[1] = q3 ^ q6 ^ q0; | |||
584 | q[0] = q2 ^ q5 ^ q7; | |||
585 | } | |||
586 | ||||
587 | static inline void | |||
588 | inv_shift_rows(uint32_t *q) | |||
589 | { | |||
590 | int i; | |||
591 | ||||
592 | for (i = 0; i < 8; i ++) { | |||
593 | uint32_t x; | |||
594 | ||||
595 | x = q[i]; | |||
596 | q[i] = (x & 0x000000FF) | |||
597 | | ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6) | |||
598 | | ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4) | |||
599 | | ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2); | |||
600 | } | |||
601 | } | |||
602 | ||||
603 | static void | |||
604 | inv_mix_columns(uint32_t *q) | |||
605 | { | |||
606 | uint32_t q0, q1, q2, q3, q4, q5, q6, q7; | |||
607 | uint32_t r0, r1, r2, r3, r4, r5, r6, r7; | |||
608 | ||||
609 | q0 = q[0]; | |||
610 | q1 = q[1]; | |||
611 | q2 = q[2]; | |||
612 | q3 = q[3]; | |||
613 | q4 = q[4]; | |||
614 | q5 = q[5]; | |||
615 | q6 = q[6]; | |||
616 | q7 = q[7]; | |||
617 | r0 = (q0 >> 8) | (q0 << 24); | |||
618 | r1 = (q1 >> 8) | (q1 << 24); | |||
619 | r2 = (q2 >> 8) | (q2 << 24); | |||
620 | r3 = (q3 >> 8) | (q3 << 24); | |||
621 | r4 = (q4 >> 8) | (q4 << 24); | |||
622 | r5 = (q5 >> 8) | (q5 << 24); | |||
623 | r6 = (q6 >> 8) | (q6 << 24); | |||
624 | r7 = (q7 >> 8) | (q7 << 24); | |||
625 | ||||
626 | q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5); | |||
627 | q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6); | |||
628 | q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7); | |||
629 | q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7); | |||
630 | q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6); | |||
631 | q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7); | |||
632 | q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7); | |||
633 | q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7); | |||
634 | } | |||
635 | ||||
636 | /* | |||
637 | * Compute AES decryption on bitsliced data. Since input is stored on | |||
638 | * eight 32-bit words, two block decryptions are actually performed | |||
639 | * in parallel. | |||
640 | */ | |||
641 | void | |||
642 | aes_ct_bitslice_decrypt(unsigned num_rounds, | |||
643 | const uint32_t *skey, uint32_t *q) | |||
644 | { | |||
645 | unsigned u; | |||
646 | ||||
647 | add_round_key(q, skey + (num_rounds << 3)); | |||
648 | for (u = num_rounds - 1; u > 0; u --) { | |||
649 | inv_shift_rows(q); | |||
650 | aes_ct_bitslice_invSbox(q); | |||
651 | add_round_key(q, skey + (u << 3)); | |||
652 | inv_mix_columns(q); | |||
653 | } | |||
654 | inv_shift_rows(q); | |||
655 | aes_ct_bitslice_invSbox(q); | |||
656 | add_round_key(q, skey); | |||
657 | } | |||
658 | ||||
659 | ||||
660 | int | |||
661 | AES_Setkey(AES_CTX *ctx, const uint8_t *key, int len) | |||
662 | { | |||
663 | ctx->num_rounds = aes_ct_keysched(ctx->sk, key, len); | |||
664 | if (ctx->num_rounds == 0) | |||
665 | return -1; | |||
666 | aes_ct_skey_expand(ctx->sk_exp, ctx->num_rounds, ctx->sk); | |||
667 | return 0; | |||
668 | } | |||
669 | ||||
670 | void | |||
671 | AES_Encrypt_ECB(AES_CTX *ctx, const uint8_t *src, | |||
672 | uint8_t *dst, size_t num_blocks) | |||
673 | { | |||
674 | while (num_blocks > 0) { | |||
675 | uint32_t q[8]; | |||
676 | ||||
677 | q[0] = dec32le(src); | |||
678 | q[2] = dec32le(src + 4); | |||
679 | q[4] = dec32le(src + 8); | |||
680 | q[6] = dec32le(src + 12); | |||
681 | if (num_blocks > 1) { | |||
682 | q[1] = dec32le(src + 16); | |||
683 | q[3] = dec32le(src + 20); | |||
684 | q[5] = dec32le(src + 24); | |||
685 | q[7] = dec32le(src + 28); | |||
686 | } else { | |||
687 | q[1] = 0; | |||
688 | q[3] = 0; | |||
689 | q[5] = 0; | |||
690 | q[7] = 0; | |||
691 | } | |||
692 | aes_ct_ortho(q); | |||
693 | aes_ct_bitslice_encrypt(ctx->num_rounds, ctx->sk_exp, q); | |||
694 | aes_ct_ortho(q); | |||
695 | enc32le(dst, q[0]); | |||
696 | enc32le(dst + 4, q[2]); | |||
697 | enc32le(dst + 8, q[4]); | |||
698 | enc32le(dst + 12, q[6]); | |||
699 | if (num_blocks > 1) { | |||
700 | enc32le(dst + 16, q[1]); | |||
701 | enc32le(dst + 20, q[3]); | |||
702 | enc32le(dst + 24, q[5]); | |||
703 | enc32le(dst + 28, q[7]); | |||
704 | src += 32; | |||
705 | dst += 32; | |||
706 | num_blocks -= 2; | |||
707 | } else { | |||
708 | break; | |||
709 | } | |||
710 | } | |||
711 | } | |||
712 | ||||
713 | void | |||
714 | AES_Decrypt_ECB(AES_CTX *ctx, const uint8_t *src, | |||
715 | uint8_t *dst, size_t num_blocks) | |||
716 | { | |||
717 | while (num_blocks > 0) { | |||
718 | uint32_t q[8]; | |||
719 | ||||
720 | q[0] = dec32le(src); | |||
721 | q[2] = dec32le(src + 4); | |||
722 | q[4] = dec32le(src + 8); | |||
723 | q[6] = dec32le(src + 12); | |||
724 | if (num_blocks > 1) { | |||
725 | q[1] = dec32le(src + 16); | |||
726 | q[3] = dec32le(src + 20); | |||
727 | q[5] = dec32le(src + 24); | |||
728 | q[7] = dec32le(src + 28); | |||
729 | } else { | |||
730 | q[1] = 0; | |||
731 | q[3] = 0; | |||
732 | q[5] = 0; | |||
733 | q[7] = 0; | |||
734 | } | |||
735 | aes_ct_ortho(q); | |||
736 | aes_ct_bitslice_decrypt(ctx->num_rounds, ctx->sk_exp, q); | |||
737 | aes_ct_ortho(q); | |||
738 | enc32le(dst, q[0]); | |||
739 | enc32le(dst + 4, q[2]); | |||
740 | enc32le(dst + 8, q[4]); | |||
741 | enc32le(dst + 12, q[6]); | |||
742 | if (num_blocks > 1) { | |||
743 | enc32le(dst + 16, q[1]); | |||
744 | enc32le(dst + 20, q[3]); | |||
745 | enc32le(dst + 24, q[5]); | |||
746 | enc32le(dst + 28, q[7]); | |||
747 | src += 32; | |||
748 | dst += 32; | |||
749 | num_blocks -= 2; | |||
750 | } else { | |||
751 | break; | |||
752 | } | |||
753 | } | |||
754 | } | |||
755 | ||||
756 | void | |||
757 | AES_Encrypt(AES_CTX *ctx, const uint8_t *src, uint8_t *dst) | |||
758 | { | |||
759 | AES_Encrypt_ECB(ctx, src, dst, 1); | |||
760 | } | |||
761 | ||||
762 | void | |||
763 | AES_Decrypt(AES_CTX *ctx, const uint8_t *src, uint8_t *dst) | |||
764 | { | |||
765 | AES_Decrypt_ECB(ctx, src, dst, 1); | |||
766 | } | |||
767 | ||||
768 | int | |||
769 | AES_KeySetup_Encrypt(uint32_t *skey, const uint8_t *key, int len) | |||
770 | { | |||
771 | unsigned r, u; | |||
772 | uint32_t tkey[60]; | |||
773 | ||||
774 | r = aes_keysched_base(tkey, key, len); | |||
775 | if (r == 0) { | |||
776 | return 0; | |||
777 | } | |||
778 | for (u = 0; u < ((r + 1) << 2); u ++) { | |||
779 | uint32_t w; | |||
780 | ||||
781 | w = tkey[u]; | |||
782 | skey[u] = (w << 24) | |||
783 | | ((w & 0x0000FF00) << 8) | |||
784 | | ((w & 0x00FF0000) >> 8) | |||
785 | | (w >> 24); | |||
786 | } | |||
787 | return r; | |||
788 | } | |||
789 | ||||
790 | /* | |||
791 | * Reduce value x modulo polynomial x^8+x^4+x^3+x+1. This works as | |||
792 | * long as x fits on 12 bits at most. | |||
793 | */ | |||
794 | static inline uint32_t | |||
795 | redgf256(uint32_t x) | |||
796 | { | |||
797 | uint32_t h; | |||
798 | ||||
799 | h = x >> 8; | |||
800 | return (x ^ h ^ (h << 1) ^ (h << 3) ^ (h << 4)) & 0xFF; | |||
801 | } | |||
802 | ||||
803 | /* | |||
804 | * Multiplication by 0x09 in GF(256). | |||
805 | */ | |||
806 | static inline uint32_t | |||
807 | mul9(uint32_t x) | |||
808 | { | |||
809 | return redgf256(x ^ (x << 3)); | |||
810 | } | |||
811 | ||||
812 | /* | |||
813 | * Multiplication by 0x0B in GF(256). | |||
814 | */ | |||
815 | static inline uint32_t | |||
816 | mulb(uint32_t x) | |||
817 | { | |||
818 | return redgf256(x ^ (x << 1) ^ (x << 3)); | |||
819 | } | |||
820 | ||||
821 | /* | |||
822 | * Multiplication by 0x0D in GF(256). | |||
823 | */ | |||
824 | static inline uint32_t | |||
825 | muld(uint32_t x) | |||
826 | { | |||
827 | return redgf256(x ^ (x << 2) ^ (x << 3)); | |||
828 | } | |||
829 | ||||
830 | /* | |||
831 | * Multiplication by 0x0E in GF(256). | |||
832 | */ | |||
833 | static inline uint32_t | |||
834 | mule(uint32_t x) | |||
835 | { | |||
836 | return redgf256((x << 1) ^ (x << 2) ^ (x << 3)); | |||
837 | } | |||
838 | ||||
839 | int | |||
840 | AES_KeySetup_Decrypt(uint32_t *skey, const uint8_t *key, int len) | |||
841 | { | |||
842 | unsigned r, u; | |||
843 | uint32_t tkey[60]; | |||
844 | ||||
845 | /* | |||
846 | * Compute encryption subkeys. We get them in big-endian | |||
847 | * notation. | |||
848 | */ | |||
849 | r = AES_KeySetup_Encrypt(tkey, key, len); | |||
| ||||
850 | if (r
| |||
851 | return 0; | |||
852 | } | |||
853 | ||||
854 | /* | |||
855 | * Copy the subkeys in reverse order. Also, apply InvMixColumns() | |||
856 | * on the subkeys (except first and last). | |||
857 | */ | |||
858 | memcpy(skey + (r << 2), tkey, 4 * sizeof(uint32_t))__builtin_memcpy((skey + (r << 2)), (tkey), (4 * sizeof (uint32_t))); | |||
859 | memcpy(skey, tkey + (r << 2), 4 * sizeof(uint32_t))__builtin_memcpy((skey), (tkey + (r << 2)), (4 * sizeof (uint32_t))); | |||
860 | for (u = 4; u < (r << 2); u ++) { | |||
861 | uint32_t sk, sk0, sk1, sk2, sk3; | |||
862 | uint32_t tk, tk0, tk1, tk2, tk3; | |||
863 | ||||
864 | sk = tkey[u]; | |||
| ||||
865 | sk0 = sk >> 24; | |||
866 | sk1 = (sk >> 16) & 0xFF; | |||
867 | sk2 = (sk >> 8) & 0xFF; | |||
868 | sk3 = sk & 0xFF; | |||
869 | tk0 = mule(sk0) ^ mulb(sk1) ^ muld(sk2) ^ mul9(sk3); | |||
870 | tk1 = mul9(sk0) ^ mule(sk1) ^ mulb(sk2) ^ muld(sk3); | |||
871 | tk2 = muld(sk0) ^ mul9(sk1) ^ mule(sk2) ^ mulb(sk3); | |||
872 | tk3 = mulb(sk0) ^ muld(sk1) ^ mul9(sk2) ^ mule(sk3); | |||
873 | tk = (tk0 << 24) ^ (tk1 << 16) ^ (tk2 << 8) ^ tk3; | |||
874 | skey[((r - (u >> 2)) << 2) + (u & 3)] = tk; | |||
875 | } | |||
876 | ||||
877 | return r; | |||
878 | } |