File: | dev/softraid_raid6.c |
Warning: | line 736, column 29 Access to field 'swu_dis' results in a dereference of a null pointer (loaded from variable 'wu') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: softraid_raid6.c,v 1.72 2021/05/16 15:12:37 deraadt Exp $ */ | |||
2 | /* | |||
3 | * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> | |||
4 | * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include "bio.h" | |||
20 | ||||
21 | #include <sys/param.h> | |||
22 | #include <sys/systm.h> | |||
23 | #include <sys/buf.h> | |||
24 | #include <sys/device.h> | |||
25 | #include <sys/ioctl.h> | |||
26 | #include <sys/malloc.h> | |||
27 | #include <sys/kernel.h> | |||
28 | #include <sys/disk.h> | |||
29 | #include <sys/rwlock.h> | |||
30 | #include <sys/queue.h> | |||
31 | #include <sys/fcntl.h> | |||
32 | #include <sys/mount.h> | |||
33 | #include <sys/sensors.h> | |||
34 | #include <sys/stat.h> | |||
35 | #include <sys/task.h> | |||
36 | #include <sys/conf.h> | |||
37 | #include <sys/uio.h> | |||
38 | ||||
39 | #include <scsi/scsi_all.h> | |||
40 | #include <scsi/scsiconf.h> | |||
41 | #include <scsi/scsi_disk.h> | |||
42 | ||||
43 | #include <dev/softraidvar.h> | |||
44 | ||||
45 | uint8_t *gf_map[256]; | |||
46 | uint8_t gf_pow[768]; | |||
47 | int gf_log[256]; | |||
48 | ||||
49 | /* RAID 6 functions. */ | |||
50 | int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, | |||
51 | int, int64_t); | |||
52 | int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, | |||
53 | int, void *); | |||
54 | int sr_raid6_init(struct sr_discipline *); | |||
55 | int sr_raid6_rw(struct sr_workunit *); | |||
56 | int sr_raid6_openings(struct sr_discipline *); | |||
57 | void sr_raid6_intr(struct buf *); | |||
58 | int sr_raid6_wu_done(struct sr_workunit *); | |||
59 | void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); | |||
60 | void sr_raid6_set_vol_state(struct sr_discipline *); | |||
61 | ||||
62 | void sr_raid6_xorp(void *, void *, int); | |||
63 | void sr_raid6_xorq(void *, void *, int, int); | |||
64 | int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long, | |||
65 | void *, int, int, void *, void *, int); | |||
66 | void sr_raid6_scrub(struct sr_discipline *); | |||
67 | int sr_failio(struct sr_workunit *); | |||
68 | ||||
69 | void gf_init(void); | |||
70 | uint8_t gf_inv(uint8_t); | |||
71 | int gf_premul(uint8_t); | |||
72 | uint8_t gf_mul(uint8_t, uint8_t); | |||
73 | ||||
74 | #define SR_NOFAIL0x00 0x00 | |||
75 | #define SR_FAILX(1L << 0) (1L << 0) | |||
76 | #define SR_FAILY(1L << 1) (1L << 1) | |||
77 | #define SR_FAILP(1L << 2) (1L << 2) | |||
78 | #define SR_FAILQ(1L << 3) (1L << 3) | |||
79 | ||||
80 | struct sr_raid6_opaque { | |||
81 | int gn; | |||
82 | void *pbuf; | |||
83 | void *qbuf; | |||
84 | }; | |||
85 | ||||
86 | /* discipline initialisation. */ | |||
87 | void | |||
88 | sr_raid6_discipline_init(struct sr_discipline *sd) | |||
89 | { | |||
90 | /* Initialize GF256 tables. */ | |||
91 | gf_init(); | |||
92 | ||||
93 | /* Fill out discipline members. */ | |||
94 | sd->sd_type = SR_MD_RAID68; | |||
95 | strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); | |||
96 | sd->sd_capabilities = SR_CAP_SYSTEM_DISK0x00000001 | SR_CAP_AUTO_ASSEMBLE0x00000002 | | |||
97 | SR_CAP_REDUNDANT0x00000010; | |||
98 | sd->sd_max_wu = SR_RAID6_NOWU16; | |||
99 | ||||
100 | /* Setup discipline specific function pointers. */ | |||
101 | sd->sd_assemble = sr_raid6_assemble; | |||
102 | sd->sd_create = sr_raid6_create; | |||
103 | sd->sd_openings = sr_raid6_openings; | |||
104 | sd->sd_scsi_rw = sr_raid6_rw; | |||
105 | sd->sd_scsi_intr = sr_raid6_intr; | |||
106 | sd->sd_scsi_wu_done = sr_raid6_wu_done; | |||
107 | sd->sd_set_chunk_state = sr_raid6_set_chunk_state; | |||
108 | sd->sd_set_vol_state = sr_raid6_set_vol_state; | |||
109 | } | |||
110 | ||||
111 | int | |||
112 | sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, | |||
113 | int no_chunk, int64_t coerced_size) | |||
114 | { | |||
115 | if (no_chunk < 4) { | |||
116 | sr_error(sd->sd_sc, "%s requires four or more chunks", | |||
117 | sd->sd_name); | |||
118 | return EINVAL22; | |||
119 | } | |||
120 | ||||
121 | /* | |||
122 | * XXX add variable strip size later even though MAXPHYS is really | |||
123 | * the clever value, users like * to tinker with that type of stuff. | |||
124 | */ | |||
125 | sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size = MAXPHYS(64 * 1024); | |||
126 | sd->sd_meta->ssdi_sdd_invariant.ssd_size = (coerced_size & | |||
127 | ~(((u_int64_t)sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size >> | |||
128 | DEV_BSHIFT9) - 1)) * (no_chunk - 2); | |||
129 | ||||
130 | return sr_raid6_init(sd); | |||
131 | } | |||
132 | ||||
133 | int | |||
134 | sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, | |||
135 | int no_chunk, void *data) | |||
136 | { | |||
137 | return sr_raid6_init(sd); | |||
138 | } | |||
139 | ||||
140 | int | |||
141 | sr_raid6_init(struct sr_discipline *sd) | |||
142 | { | |||
143 | /* Initialise runtime values. */ | |||
144 | sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits = | |||
145 | sr_validate_stripsize(sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size); | |||
146 | if (sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits == -1) { | |||
147 | sr_error(sd->sd_sc, "invalid strip size"); | |||
148 | return EINVAL22; | |||
149 | } | |||
150 | ||||
151 | /* only if stripsize <= MAXPHYS */ | |||
152 | sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no); | |||
153 | ||||
154 | return 0; | |||
155 | } | |||
156 | ||||
157 | int | |||
158 | sr_raid6_openings(struct sr_discipline *sd) | |||
159 | { | |||
160 | return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ | |||
161 | } | |||
162 | ||||
163 | void | |||
164 | sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) | |||
165 | { | |||
166 | int old_state, s; | |||
167 | ||||
168 | /* XXX this is for RAID 0 */ | |||
169 | DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", | |||
170 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, | |||
171 | sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); | |||
172 | ||||
173 | /* ok to go to splbio since this only happens in error path */ | |||
174 | s = splbio()splraise(0x6); | |||
175 | old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; | |||
176 | ||||
177 | /* multiple IOs to the same chunk that fail will come through here */ | |||
178 | if (old_state == new_state) | |||
179 | goto done; | |||
180 | ||||
181 | switch (old_state) { | |||
182 | case BIOC_SDONLINE0x00: | |||
183 | switch (new_state) { | |||
184 | case BIOC_SDOFFLINE0x01: | |||
185 | case BIOC_SDSCRUB0x06: | |||
186 | break; | |||
187 | default: | |||
188 | goto die; | |||
189 | } | |||
190 | break; | |||
191 | ||||
192 | case BIOC_SDOFFLINE0x01: | |||
193 | if (new_state == BIOC_SDREBUILD0x03) { | |||
194 | ; | |||
195 | } else | |||
196 | goto die; | |||
197 | break; | |||
198 | ||||
199 | case BIOC_SDSCRUB0x06: | |||
200 | switch (new_state) { | |||
201 | case BIOC_SDONLINE0x00: | |||
202 | case BIOC_SDOFFLINE0x01: | |||
203 | break; | |||
204 | default: | |||
205 | goto die; | |||
206 | } | |||
207 | break; | |||
208 | ||||
209 | case BIOC_SDREBUILD0x03: | |||
210 | switch (new_state) { | |||
211 | case BIOC_SDONLINE0x00: | |||
212 | case BIOC_SDOFFLINE0x01: | |||
213 | break; | |||
214 | default: | |||
215 | goto die; | |||
216 | } | |||
217 | break; | |||
218 | ||||
219 | default: | |||
220 | die: | |||
221 | splx(s)spllower(s); /* XXX */ | |||
222 | panic("%s: %s: %s: invalid chunk state transition %d -> %d", | |||
223 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), | |||
224 | sd->sd_meta->ssd_devname, | |||
225 | sd->sd_vol.sv_chunks[c]->src_meta.scmi_scm_invariant.scm_devname, | |||
226 | old_state, new_state); | |||
227 | /* NOTREACHED */ | |||
228 | } | |||
229 | ||||
230 | sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; | |||
231 | sd->sd_set_vol_state(sd); | |||
232 | ||||
233 | sd->sd_must_flush = 1; | |||
234 | task_add(systq, &sd->sd_meta_save_task); | |||
235 | done: | |||
236 | splx(s)spllower(s); | |||
237 | } | |||
238 | ||||
239 | void | |||
240 | sr_raid6_set_vol_state(struct sr_discipline *sd) | |||
241 | { | |||
242 | int states[SR_MAX_STATES7]; | |||
243 | int new_state, i, s, nd; | |||
244 | int old_state = sd->sd_vol_status; | |||
245 | ||||
246 | /* XXX this is for RAID 0 */ | |||
247 | ||||
248 | DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", | |||
249 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); | |||
250 | ||||
251 | nd = sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no; | |||
252 | ||||
253 | for (i = 0; i < SR_MAX_STATES7; i++) | |||
254 | states[i] = 0; | |||
255 | ||||
256 | for (i = 0; i < nd; i++) { | |||
257 | s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; | |||
258 | if (s >= SR_MAX_STATES7) | |||
259 | panic("%s: %s: %s: invalid chunk state", | |||
260 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), | |||
261 | sd->sd_meta->ssd_devname, | |||
262 | sd->sd_vol.sv_chunks[i]->src_meta.scmi_scm_invariant.scm_devname); | |||
263 | states[s]++; | |||
264 | } | |||
265 | ||||
266 | if (states[BIOC_SDONLINE0x00] == nd) | |||
267 | new_state = BIOC_SVONLINE0x00; | |||
268 | else if (states[BIOC_SDONLINE0x00] < nd - 2) | |||
269 | new_state = BIOC_SVOFFLINE0x01; | |||
270 | else if (states[BIOC_SDSCRUB0x06] != 0) | |||
271 | new_state = BIOC_SVSCRUB0x04; | |||
272 | else if (states[BIOC_SDREBUILD0x03] != 0) | |||
273 | new_state = BIOC_SVREBUILD0x05; | |||
274 | else if (states[BIOC_SDONLINE0x00] < nd) | |||
275 | new_state = BIOC_SVDEGRADED0x02; | |||
276 | else { | |||
277 | printf("old_state = %d, ", old_state); | |||
278 | for (i = 0; i < nd; i++) | |||
279 | printf("%d = %d, ", i, | |||
280 | sd->sd_vol.sv_chunks[i]->src_meta.scm_status); | |||
281 | panic("invalid new_state"); | |||
282 | } | |||
283 | ||||
284 | DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", | |||
285 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, | |||
286 | old_state, new_state); | |||
287 | ||||
288 | switch (old_state) { | |||
289 | case BIOC_SVONLINE0x00: | |||
290 | switch (new_state) { | |||
291 | case BIOC_SVONLINE0x00: /* can go to same state */ | |||
292 | case BIOC_SVOFFLINE0x01: | |||
293 | case BIOC_SVDEGRADED0x02: | |||
294 | case BIOC_SVREBUILD0x05: /* happens on boot */ | |||
295 | break; | |||
296 | default: | |||
297 | goto die; | |||
298 | } | |||
299 | break; | |||
300 | ||||
301 | case BIOC_SVOFFLINE0x01: | |||
302 | /* XXX this might be a little too much */ | |||
303 | goto die; | |||
304 | ||||
305 | case BIOC_SVDEGRADED0x02: | |||
306 | switch (new_state) { | |||
307 | case BIOC_SVOFFLINE0x01: | |||
308 | case BIOC_SVREBUILD0x05: | |||
309 | case BIOC_SVDEGRADED0x02: /* can go to the same state */ | |||
310 | break; | |||
311 | default: | |||
312 | goto die; | |||
313 | } | |||
314 | break; | |||
315 | ||||
316 | case BIOC_SVBUILDING0x03: | |||
317 | switch (new_state) { | |||
318 | case BIOC_SVONLINE0x00: | |||
319 | case BIOC_SVOFFLINE0x01: | |||
320 | case BIOC_SVBUILDING0x03: /* can go to the same state */ | |||
321 | break; | |||
322 | default: | |||
323 | goto die; | |||
324 | } | |||
325 | break; | |||
326 | ||||
327 | case BIOC_SVSCRUB0x04: | |||
328 | switch (new_state) { | |||
329 | case BIOC_SVONLINE0x00: | |||
330 | case BIOC_SVOFFLINE0x01: | |||
331 | case BIOC_SVDEGRADED0x02: | |||
332 | case BIOC_SVSCRUB0x04: /* can go to same state */ | |||
333 | break; | |||
334 | default: | |||
335 | goto die; | |||
336 | } | |||
337 | break; | |||
338 | ||||
339 | case BIOC_SVREBUILD0x05: | |||
340 | switch (new_state) { | |||
341 | case BIOC_SVONLINE0x00: | |||
342 | case BIOC_SVOFFLINE0x01: | |||
343 | case BIOC_SVDEGRADED0x02: | |||
344 | case BIOC_SVREBUILD0x05: /* can go to the same state */ | |||
345 | break; | |||
346 | default: | |||
347 | goto die; | |||
348 | } | |||
349 | break; | |||
350 | ||||
351 | default: | |||
352 | die: | |||
353 | panic("%s: %s: invalid volume state transition %d -> %d", | |||
354 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), sd->sd_meta->ssd_devname, | |||
355 | old_state, new_state); | |||
356 | /* NOTREACHED */ | |||
357 | } | |||
358 | ||||
359 | sd->sd_vol_status = new_state; | |||
360 | } | |||
361 | ||||
362 | /* modes: | |||
363 | * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
364 | * 0, qbuf, NULL, 0); | |||
365 | * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
366 | * 0, pbuf, NULL, 0); | |||
367 | * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
368 | * 0, pbuf, qbuf, gf_pow[i]); | |||
369 | */ | |||
370 | ||||
371 | int | |||
372 | sr_raid6_rw(struct sr_workunit *wu) | |||
373 | { | |||
374 | struct sr_workunit *wu_r = NULL((void *)0); | |||
| ||||
375 | struct sr_discipline *sd = wu->swu_dis; | |||
376 | struct scsi_xfer *xs = wu->swu_xs; | |||
377 | struct sr_chunk *scp; | |||
378 | int s, fail, i, gxinv, pxinv; | |||
379 | daddr_t blkno, lba; | |||
380 | int64_t chunk_offs, lbaoffs, offset, strip_offs; | |||
381 | int64_t strip_no, strip_size, strip_bits, row_size; | |||
382 | int64_t fchunk, no_chunk, chunk, qchunk, pchunk; | |||
383 | long length, datalen; | |||
384 | void *pbuf, *data, *qbuf; | |||
385 | ||||
386 | /* blkno and scsi error will be handled by sr_validate_io */ | |||
387 | if (sr_validate_io(wu, &blkno, "sr_raid6_rw")) | |||
388 | goto bad; | |||
389 | ||||
390 | strip_size = sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size; | |||
391 | strip_bits = sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits; | |||
392 | no_chunk = sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no - 2; | |||
393 | row_size = (no_chunk << strip_bits) >> DEV_BSHIFT9; | |||
394 | ||||
395 | data = xs->data; | |||
396 | datalen = xs->datalen; | |||
397 | lbaoffs = blkno << DEV_BSHIFT9; | |||
398 | ||||
399 | if (xs->flags & SCSI_DATA_OUT0x01000) { | |||
400 | if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP0x00001)) == NULL((void *)0)){ | |||
401 | printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname)); | |||
402 | goto bad; | |||
403 | } | |||
404 | wu_r->swu_state = SR_WU_INPROGRESS1; | |||
405 | wu_r->swu_flags |= SR_WUF_DISCIPLINE(1<<5); | |||
406 | } | |||
407 | ||||
408 | wu->swu_blk_start = 0; | |||
409 | while (datalen != 0) { | |||
410 | strip_no = lbaoffs >> strip_bits; | |||
411 | strip_offs = lbaoffs & (strip_size - 1); | |||
412 | chunk_offs = (strip_no / no_chunk) << strip_bits; | |||
413 | offset = chunk_offs + strip_offs; | |||
414 | ||||
415 | /* get size remaining in this stripe */ | |||
416 | length = MIN(strip_size - strip_offs, datalen)(((strip_size - strip_offs)<(datalen))?(strip_size - strip_offs ):(datalen)); | |||
417 | ||||
418 | /* map disk offset to parity/data drive */ | |||
419 | chunk = strip_no % no_chunk; | |||
420 | ||||
421 | qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); | |||
422 | if (qchunk == 0) | |||
423 | pchunk = no_chunk + 1; | |||
424 | else | |||
425 | pchunk = qchunk - 1; | |||
426 | if (chunk >= pchunk) | |||
427 | chunk++; | |||
428 | if (chunk >= qchunk) | |||
429 | chunk++; | |||
430 | ||||
431 | lba = offset >> DEV_BSHIFT9; | |||
432 | ||||
433 | /* XXX big hammer.. exclude I/O from entire stripe */ | |||
434 | if (wu->swu_blk_start
| |||
435 | wu->swu_blk_start = (strip_no / no_chunk) * row_size; | |||
436 | wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); | |||
437 | ||||
438 | fail = 0; | |||
439 | fchunk = -1; | |||
440 | ||||
441 | /* Get disk-fail flags */ | |||
442 | for (i=0; i< no_chunk+2; i++) { | |||
443 | scp = sd->sd_vol.sv_chunks[i]; | |||
444 | switch (scp->src_meta.scm_status) { | |||
445 | case BIOC_SDOFFLINE0x01: | |||
446 | case BIOC_SDREBUILD0x03: | |||
447 | case BIOC_SDHOTSPARE0x04: | |||
448 | if (i == qchunk) | |||
449 | fail |= SR_FAILQ(1L << 3); | |||
450 | else if (i == pchunk) | |||
451 | fail |= SR_FAILP(1L << 2); | |||
452 | else if (i == chunk) | |||
453 | fail |= SR_FAILX(1L << 0); | |||
454 | else { | |||
455 | /* dual data-disk failure */ | |||
456 | fail |= SR_FAILY(1L << 1); | |||
457 | fchunk = i; | |||
458 | } | |||
459 | break; | |||
460 | } | |||
461 | } | |||
462 | if (xs->flags & SCSI_DATA_IN0x00800) { | |||
463 | if (!(fail & SR_FAILX(1L << 0))) { | |||
464 | /* drive is good. issue single read request */ | |||
465 | if (sr_raid6_addio(wu, chunk, lba, length, | |||
466 | data, xs->flags, 0, NULL((void *)0), NULL((void *)0), 0)) | |||
467 | goto bad; | |||
468 | } else if (fail & SR_FAILP(1L << 2)) { | |||
469 | /* Dx, P failed */ | |||
470 | printf("Disk %llx offline, " | |||
471 | "regenerating Dx+P\n", chunk); | |||
472 | ||||
473 | gxinv = gf_inv(gf_pow[chunk]); | |||
474 | ||||
475 | /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ | |||
476 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
477 | if (sr_raid6_addio(wu, qchunk, lba, length, | |||
478 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, gxinv)) | |||
479 | goto bad; | |||
480 | ||||
481 | /* Read Dz * gz * inv(gx) */ | |||
482 | for (i = 0; i < no_chunk+2; i++) { | |||
483 | if (i == qchunk || i == pchunk || i == chunk) | |||
484 | continue; | |||
485 | ||||
486 | if (sr_raid6_addio(wu, i, lba, length, | |||
487 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, | |||
488 | gf_mul(gf_pow[i], gxinv))) | |||
489 | goto bad; | |||
490 | } | |||
491 | ||||
492 | /* data will contain correct value on completion */ | |||
493 | } else if (fail & SR_FAILY(1L << 1)) { | |||
494 | /* Dx, Dy failed */ | |||
495 | printf("Disk %llx & %llx offline, " | |||
496 | "regenerating Dx+Dy\n", chunk, fchunk); | |||
497 | ||||
498 | gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); | |||
499 | pxinv = gf_mul(gf_pow[fchunk], gxinv); | |||
500 | ||||
501 | /* read Q * inv(gx + gy) */ | |||
502 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
503 | if (sr_raid6_addio(wu, qchunk, lba, length, | |||
504 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, gxinv)) | |||
505 | goto bad; | |||
506 | ||||
507 | /* read P * gy * inv(gx + gy) */ | |||
508 | if (sr_raid6_addio(wu, pchunk, lba, length, | |||
509 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, pxinv)) | |||
510 | goto bad; | |||
511 | ||||
512 | /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz | |||
513 | * Q: sr_raid6_xorp(qbuf, --, length); | |||
514 | * P: sr_raid6_xorp(pbuf, --, length); | |||
515 | * Dz: sr_raid6_xorp(pbuf, --, length); | |||
516 | * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); | |||
517 | */ | |||
518 | for (i = 0; i < no_chunk+2; i++) { | |||
519 | if (i == qchunk || i == pchunk || | |||
520 | i == chunk || i == fchunk) | |||
521 | continue; | |||
522 | ||||
523 | /* read Dz * (gz + gy) * inv(gx + gy) */ | |||
524 | if (sr_raid6_addio(wu, i, lba, length, | |||
525 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, | |||
526 | pxinv ^ gf_mul(gf_pow[i], gxinv))) | |||
527 | goto bad; | |||
528 | } | |||
529 | } else { | |||
530 | /* Two cases: single disk (Dx) or (Dx+Q) | |||
531 | * Dx = Dz ^ P (same as RAID5) | |||
532 | */ | |||
533 | printf("Disk %llx offline, " | |||
534 | "regenerating Dx%s\n", chunk, | |||
535 | fail & SR_FAILQ(1L << 3) ? "+Q" : " single"); | |||
536 | ||||
537 | /* Calculate: Dx = P^Dz | |||
538 | * P: sr_raid6_xorp(data, ---, length); | |||
539 | * Dz: sr_raid6_xorp(data, ---, length); | |||
540 | */ | |||
541 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
542 | for (i = 0; i < no_chunk+2; i++) { | |||
543 | if (i != chunk && i != qchunk) { | |||
544 | /* Read Dz */ | |||
545 | if (sr_raid6_addio(wu, i, lba, | |||
546 | length, NULL((void *)0), SCSI_DATA_IN0x00800, | |||
547 | 0, data, NULL((void *)0), 0)) | |||
548 | goto bad; | |||
549 | } | |||
550 | } | |||
551 | ||||
552 | /* data will contain correct value on completion */ | |||
553 | } | |||
554 | } else { | |||
555 | /* XXX handle writes to failed/offline disk? */ | |||
556 | if (fail & (SR_FAILX(1L << 0)|SR_FAILQ(1L << 3)|SR_FAILP(1L << 2))) | |||
557 | goto bad; | |||
558 | ||||
559 | /* | |||
560 | * initialize pbuf with contents of new data to be | |||
561 | * written. This will be XORed with old data and old | |||
562 | * parity in the intr routine. The result in pbuf | |||
563 | * is the new parity data. | |||
564 | */ | |||
565 | qbuf = sr_block_get(sd, length); | |||
566 | if (qbuf == NULL((void *)0)) | |||
567 | goto bad; | |||
568 | ||||
569 | pbuf = sr_block_get(sd, length); | |||
570 | if (pbuf == NULL((void *)0)) | |||
571 | goto bad; | |||
572 | ||||
573 | /* Calculate P = Dn; Q = gn * Dn */ | |||
574 | if (gf_premul(gf_pow[chunk])) | |||
575 | goto bad; | |||
576 | sr_raid6_xorp(pbuf, data, length); | |||
577 | sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); | |||
578 | ||||
579 | /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ | |||
580 | if (sr_raid6_addio(wu_r, chunk, lba, length, NULL((void *)0), | |||
581 | SCSI_DATA_IN0x00800, 0, pbuf, qbuf, gf_pow[chunk])) | |||
582 | goto bad; | |||
583 | ||||
584 | /* Read old xor-parity: P ^= P' */ | |||
585 | if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL((void *)0), | |||
586 | SCSI_DATA_IN0x00800, 0, pbuf, NULL((void *)0), 0)) | |||
587 | goto bad; | |||
588 | ||||
589 | /* Read old q-parity: Q ^= Q' */ | |||
590 | if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL((void *)0), | |||
591 | SCSI_DATA_IN0x00800, 0, qbuf, NULL((void *)0), 0)) | |||
592 | goto bad; | |||
593 | ||||
594 | /* write new data */ | |||
595 | if (sr_raid6_addio(wu, chunk, lba, length, data, | |||
596 | xs->flags, 0, NULL((void *)0), NULL((void *)0), 0)) | |||
597 | goto bad; | |||
598 | ||||
599 | /* write new xor-parity */ | |||
600 | if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, | |||
601 | xs->flags, SR_CCBF_FREEBUF(1<<0), NULL((void *)0), NULL((void *)0), 0)) | |||
602 | goto bad; | |||
603 | ||||
604 | /* write new q-parity */ | |||
605 | if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, | |||
606 | xs->flags, SR_CCBF_FREEBUF(1<<0), NULL((void *)0), NULL((void *)0), 0)) | |||
607 | goto bad; | |||
608 | } | |||
609 | ||||
610 | /* advance to next block */ | |||
611 | lbaoffs += length; | |||
612 | datalen -= length; | |||
613 | data += length; | |||
614 | } | |||
615 | ||||
616 | s = splbio()splraise(0x6); | |||
617 | if (wu_r) { | |||
618 | /* collide write request with reads */ | |||
619 | wu_r->swu_blk_start = wu->swu_blk_start; | |||
620 | wu_r->swu_blk_end = wu->swu_blk_end; | |||
621 | ||||
622 | wu->swu_state = SR_WU_DEFERRED5; | |||
623 | wu_r->swu_collider = wu; | |||
624 | TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link)do { (wu)->swu_link.tqe_next = ((void *)0); (wu)->swu_link .tqe_prev = (&sd->sd_wu_defq)->tqh_last; *(&sd-> sd_wu_defq)->tqh_last = (wu); (&sd->sd_wu_defq)-> tqh_last = &(wu)->swu_link.tqe_next; } while (0); | |||
625 | ||||
626 | wu = wu_r; | |||
627 | } | |||
628 | splx(s)spllower(s); | |||
629 | ||||
630 | sr_schedule_wu(wu); | |||
631 | ||||
632 | return (0); | |||
633 | bad: | |||
634 | /* XXX - can leak pbuf/qbuf on error. */ | |||
635 | /* wu is unwound by sr_wu_put */ | |||
636 | if (wu_r) | |||
637 | sr_scsi_wu_put(sd, wu_r); | |||
638 | return (1); | |||
639 | } | |||
640 | ||||
641 | /* Handle failure I/O completion */ | |||
642 | int | |||
643 | sr_failio(struct sr_workunit *wu) | |||
644 | { | |||
645 | struct sr_discipline *sd = wu->swu_dis; | |||
646 | struct sr_ccb *ccb; | |||
647 | ||||
648 | if (!(wu->swu_flags & SR_WUF_FAIL(1<<2))) | |||
649 | return (0); | |||
650 | ||||
651 | /* Wu is a 'fake'.. don't do real I/O just intr */ | |||
652 | TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link)do { (wu)->swu_link.tqe_next = ((void *)0); (wu)->swu_link .tqe_prev = (&sd->sd_wu_pendq)->tqh_last; *(&sd ->sd_wu_pendq)->tqh_last = (wu); (&sd->sd_wu_pendq )->tqh_last = &(wu)->swu_link.tqe_next; } while (0); | |||
653 | TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)for((ccb) = ((&wu->swu_ccb)->tqh_first); (ccb) != ( (void *)0); (ccb) = ((ccb)->ccb_link.tqe_next)) | |||
654 | sr_raid6_intr(&ccb->ccb_buf); | |||
655 | return (1); | |||
656 | } | |||
657 | ||||
658 | void | |||
659 | sr_raid6_intr(struct buf *bp) | |||
660 | { | |||
661 | struct sr_ccb *ccb = (struct sr_ccb *)bp; | |||
662 | struct sr_workunit *wu = ccb->ccb_wu; | |||
663 | struct sr_discipline *sd = wu->swu_dis; | |||
664 | struct sr_raid6_opaque *pq = ccb->ccb_opaque; | |||
665 | int s; | |||
666 | ||||
667 | DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", | |||
668 | DEVNAME(sd->sd_sc), bp, wu->swu_xs); | |||
669 | ||||
670 | s = splbio()splraise(0x6); | |||
671 | sr_ccb_done(ccb); | |||
672 | ||||
673 | /* XOR data to result. */ | |||
674 | if (ccb->ccb_state == SR_CCB_OK2 && pq) { | |||
675 | if (pq->pbuf) | |||
676 | /* Calculate xor-parity */ | |||
677 | sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, | |||
678 | ccb->ccb_buf.b_bcount); | |||
679 | if (pq->qbuf) | |||
680 | /* Calculate q-parity */ | |||
681 | sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, | |||
682 | ccb->ccb_buf.b_bcount, pq->gn); | |||
683 | free(pq, M_DEVBUF2, 0); | |||
684 | ccb->ccb_opaque = NULL((void *)0); | |||
685 | } | |||
686 | ||||
687 | /* Free allocated data buffer. */ | |||
688 | if (ccb->ccb_flags & SR_CCBF_FREEBUF(1<<0)) { | |||
689 | sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); | |||
690 | ccb->ccb_buf.b_data = NULL((void *)0); | |||
691 | } | |||
692 | ||||
693 | sr_wu_done(wu); | |||
694 | splx(s)spllower(s); | |||
695 | } | |||
696 | ||||
697 | int | |||
698 | sr_raid6_wu_done(struct sr_workunit *wu) | |||
699 | { | |||
700 | struct sr_discipline *sd = wu->swu_dis; | |||
701 | struct scsi_xfer *xs = wu->swu_xs; | |||
702 | ||||
703 | /* XXX - we have no way of propagating errors... */ | |||
704 | if (wu->swu_flags & SR_WUF_DISCIPLINE(1<<5)) | |||
705 | return SR_WU_OK2; | |||
706 | ||||
707 | /* XXX - This is insufficient for RAID 6. */ | |||
708 | if (wu->swu_ios_succeeded > 0) { | |||
709 | xs->error = XS_NOERROR0; | |||
710 | return SR_WU_OK2; | |||
711 | } | |||
712 | ||||
713 | if (xs->flags & SCSI_DATA_IN0x00800) { | |||
714 | printf("%s: retrying read on block %lld\n", | |||
715 | sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); | |||
716 | sr_wu_release_ccbs(wu); | |||
717 | wu->swu_state = SR_WU_RESTART7; | |||
718 | if (sd->sd_scsi_rw(wu) == 0) | |||
719 | return SR_WU_RESTART7; | |||
720 | } else { | |||
721 | printf("%s: permanently fail write on block %lld\n", | |||
722 | sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); | |||
723 | } | |||
724 | ||||
725 | wu->swu_state = SR_WU_FAILED3; | |||
726 | xs->error = XS_DRIVER_STUFFUP2; | |||
727 | ||||
728 | return SR_WU_FAILED3; | |||
729 | } | |||
730 | ||||
731 | int | |||
732 | sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, | |||
733 | long len, void *data, int xsflags, int ccbflags, void *pbuf, | |||
734 | void *qbuf, int gn) | |||
735 | { | |||
736 | struct sr_discipline *sd = wu->swu_dis; | |||
| ||||
737 | struct sr_ccb *ccb; | |||
738 | struct sr_raid6_opaque *pqbuf; | |||
739 | ||||
740 | DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n", | |||
741 | (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, | |||
742 | (long long)blkno, len, pbuf, qbuf); | |||
743 | ||||
744 | /* Allocate temporary buffer. */ | |||
745 | if (data == NULL((void *)0)) { | |||
746 | data = sr_block_get(sd, len); | |||
747 | if (data == NULL((void *)0)) | |||
748 | return (-1); | |||
749 | ccbflags |= SR_CCBF_FREEBUF(1<<0); | |||
750 | } | |||
751 | ||||
752 | ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); | |||
753 | if (ccb == NULL((void *)0)) { | |||
754 | if (ccbflags & SR_CCBF_FREEBUF(1<<0)) | |||
755 | sr_block_put(sd, data, len); | |||
756 | return (-1); | |||
757 | } | |||
758 | if (pbuf || qbuf) { | |||
759 | /* XXX - can leak data and ccb on failure. */ | |||
760 | if (qbuf && gf_premul(gn)) | |||
761 | return (-1); | |||
762 | ||||
763 | /* XXX - should be preallocated? */ | |||
764 | pqbuf = malloc(sizeof(struct sr_raid6_opaque), | |||
765 | M_DEVBUF2, M_ZERO0x0008 | M_NOWAIT0x0002); | |||
766 | if (pqbuf == NULL((void *)0)) { | |||
767 | sr_ccb_put(ccb); | |||
768 | return (-1); | |||
769 | } | |||
770 | pqbuf->pbuf = pbuf; | |||
771 | pqbuf->qbuf = qbuf; | |||
772 | pqbuf->gn = gn; | |||
773 | ccb->ccb_opaque = pqbuf; | |||
774 | } | |||
775 | sr_wu_enqueue_ccb(wu, ccb); | |||
776 | ||||
777 | return (0); | |||
778 | } | |||
779 | ||||
780 | /* Perform RAID6 parity calculation. | |||
781 | * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ | |||
782 | void | |||
783 | sr_raid6_xorp(void *p, void *d, int len) | |||
784 | { | |||
785 | uint32_t *pbuf = p, *data = d; | |||
786 | ||||
787 | len >>= 2; | |||
788 | while (len--) | |||
789 | *pbuf++ ^= *data++; | |||
790 | } | |||
791 | ||||
792 | void | |||
793 | sr_raid6_xorq(void *q, void *d, int len, int gn) | |||
794 | { | |||
795 | uint32_t *qbuf = q, *data = d, x; | |||
796 | uint8_t *gn_map = gf_map[gn]; | |||
797 | ||||
798 | len >>= 2; | |||
799 | while (len--) { | |||
800 | x = *data++; | |||
801 | *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | | |||
802 | ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | | |||
803 | ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | | |||
804 | ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); | |||
805 | } | |||
806 | } | |||
807 | ||||
808 | /* Create GF256 log/pow tables: polynomial = 0x11D */ | |||
809 | void | |||
810 | gf_init(void) | |||
811 | { | |||
812 | int i; | |||
813 | uint8_t p = 1; | |||
814 | ||||
815 | /* use 2N pow table to avoid using % in multiply */ | |||
816 | for (i=0; i<256; i++) { | |||
817 | gf_log[p] = i; | |||
818 | gf_pow[i] = gf_pow[i+255] = p; | |||
819 | p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); | |||
820 | } | |||
821 | gf_log[0] = 512; | |||
822 | } | |||
823 | ||||
824 | uint8_t | |||
825 | gf_inv(uint8_t a) | |||
826 | { | |||
827 | return gf_pow[255 - gf_log[a]]; | |||
828 | } | |||
829 | ||||
830 | uint8_t | |||
831 | gf_mul(uint8_t a, uint8_t b) | |||
832 | { | |||
833 | return gf_pow[gf_log[a] + gf_log[b]]; | |||
834 | } | |||
835 | ||||
836 | /* Precalculate multiplication tables for drive gn */ | |||
837 | int | |||
838 | gf_premul(uint8_t gn) | |||
839 | { | |||
840 | int i; | |||
841 | ||||
842 | if (gf_map[gn] != NULL((void *)0)) | |||
843 | return (0); | |||
844 | ||||
845 | if ((gf_map[gn] = malloc(256, M_DEVBUF2, M_ZERO0x0008 | M_NOWAIT0x0002)) == NULL((void *)0)) | |||
846 | return (-1); | |||
847 | ||||
848 | for (i=0; i<256; i++) | |||
849 | gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; | |||
850 | return (0); | |||
851 | } |