| File: | dev/softraid_raid6.c |
| Warning: | line 736, column 29 Access to field 'swu_dis' results in a dereference of a null pointer (loaded from variable 'wu') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: softraid_raid6.c,v 1.72 2021/05/16 15:12:37 deraadt Exp $ */ | |||
| 2 | /* | |||
| 3 | * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> | |||
| 4 | * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> | |||
| 5 | * | |||
| 6 | * Permission to use, copy, modify, and distribute this software for any | |||
| 7 | * purpose with or without fee is hereby granted, provided that the above | |||
| 8 | * copyright notice and this permission notice appear in all copies. | |||
| 9 | * | |||
| 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
| 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
| 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
| 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
| 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
| 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| 17 | */ | |||
| 18 | ||||
| 19 | #include "bio.h" | |||
| 20 | ||||
| 21 | #include <sys/param.h> | |||
| 22 | #include <sys/systm.h> | |||
| 23 | #include <sys/buf.h> | |||
| 24 | #include <sys/device.h> | |||
| 25 | #include <sys/ioctl.h> | |||
| 26 | #include <sys/malloc.h> | |||
| 27 | #include <sys/kernel.h> | |||
| 28 | #include <sys/disk.h> | |||
| 29 | #include <sys/rwlock.h> | |||
| 30 | #include <sys/queue.h> | |||
| 31 | #include <sys/fcntl.h> | |||
| 32 | #include <sys/mount.h> | |||
| 33 | #include <sys/sensors.h> | |||
| 34 | #include <sys/stat.h> | |||
| 35 | #include <sys/task.h> | |||
| 36 | #include <sys/conf.h> | |||
| 37 | #include <sys/uio.h> | |||
| 38 | ||||
| 39 | #include <scsi/scsi_all.h> | |||
| 40 | #include <scsi/scsiconf.h> | |||
| 41 | #include <scsi/scsi_disk.h> | |||
| 42 | ||||
| 43 | #include <dev/softraidvar.h> | |||
| 44 | ||||
| 45 | uint8_t *gf_map[256]; | |||
| 46 | uint8_t gf_pow[768]; | |||
| 47 | int gf_log[256]; | |||
| 48 | ||||
| 49 | /* RAID 6 functions. */ | |||
| 50 | int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, | |||
| 51 | int, int64_t); | |||
| 52 | int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, | |||
| 53 | int, void *); | |||
| 54 | int sr_raid6_init(struct sr_discipline *); | |||
| 55 | int sr_raid6_rw(struct sr_workunit *); | |||
| 56 | int sr_raid6_openings(struct sr_discipline *); | |||
| 57 | void sr_raid6_intr(struct buf *); | |||
| 58 | int sr_raid6_wu_done(struct sr_workunit *); | |||
| 59 | void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); | |||
| 60 | void sr_raid6_set_vol_state(struct sr_discipline *); | |||
| 61 | ||||
| 62 | void sr_raid6_xorp(void *, void *, int); | |||
| 63 | void sr_raid6_xorq(void *, void *, int, int); | |||
| 64 | int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long, | |||
| 65 | void *, int, int, void *, void *, int); | |||
| 66 | void sr_raid6_scrub(struct sr_discipline *); | |||
| 67 | int sr_failio(struct sr_workunit *); | |||
| 68 | ||||
| 69 | void gf_init(void); | |||
| 70 | uint8_t gf_inv(uint8_t); | |||
| 71 | int gf_premul(uint8_t); | |||
| 72 | uint8_t gf_mul(uint8_t, uint8_t); | |||
| 73 | ||||
| 74 | #define SR_NOFAIL0x00 0x00 | |||
| 75 | #define SR_FAILX(1L << 0) (1L << 0) | |||
| 76 | #define SR_FAILY(1L << 1) (1L << 1) | |||
| 77 | #define SR_FAILP(1L << 2) (1L << 2) | |||
| 78 | #define SR_FAILQ(1L << 3) (1L << 3) | |||
| 79 | ||||
| 80 | struct sr_raid6_opaque { | |||
| 81 | int gn; | |||
| 82 | void *pbuf; | |||
| 83 | void *qbuf; | |||
| 84 | }; | |||
| 85 | ||||
| 86 | /* discipline initialisation. */ | |||
| 87 | void | |||
| 88 | sr_raid6_discipline_init(struct sr_discipline *sd) | |||
| 89 | { | |||
| 90 | /* Initialize GF256 tables. */ | |||
| 91 | gf_init(); | |||
| 92 | ||||
| 93 | /* Fill out discipline members. */ | |||
| 94 | sd->sd_type = SR_MD_RAID68; | |||
| 95 | strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); | |||
| 96 | sd->sd_capabilities = SR_CAP_SYSTEM_DISK0x00000001 | SR_CAP_AUTO_ASSEMBLE0x00000002 | | |||
| 97 | SR_CAP_REDUNDANT0x00000010; | |||
| 98 | sd->sd_max_wu = SR_RAID6_NOWU16; | |||
| 99 | ||||
| 100 | /* Setup discipline specific function pointers. */ | |||
| 101 | sd->sd_assemble = sr_raid6_assemble; | |||
| 102 | sd->sd_create = sr_raid6_create; | |||
| 103 | sd->sd_openings = sr_raid6_openings; | |||
| 104 | sd->sd_scsi_rw = sr_raid6_rw; | |||
| 105 | sd->sd_scsi_intr = sr_raid6_intr; | |||
| 106 | sd->sd_scsi_wu_done = sr_raid6_wu_done; | |||
| 107 | sd->sd_set_chunk_state = sr_raid6_set_chunk_state; | |||
| 108 | sd->sd_set_vol_state = sr_raid6_set_vol_state; | |||
| 109 | } | |||
| 110 | ||||
| 111 | int | |||
| 112 | sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, | |||
| 113 | int no_chunk, int64_t coerced_size) | |||
| 114 | { | |||
| 115 | if (no_chunk < 4) { | |||
| 116 | sr_error(sd->sd_sc, "%s requires four or more chunks", | |||
| 117 | sd->sd_name); | |||
| 118 | return EINVAL22; | |||
| 119 | } | |||
| 120 | ||||
| 121 | /* | |||
| 122 | * XXX add variable strip size later even though MAXPHYS is really | |||
| 123 | * the clever value, users like * to tinker with that type of stuff. | |||
| 124 | */ | |||
| 125 | sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size = MAXPHYS(64 * 1024); | |||
| 126 | sd->sd_meta->ssdi_sdd_invariant.ssd_size = (coerced_size & | |||
| 127 | ~(((u_int64_t)sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size >> | |||
| 128 | DEV_BSHIFT9) - 1)) * (no_chunk - 2); | |||
| 129 | ||||
| 130 | return sr_raid6_init(sd); | |||
| 131 | } | |||
| 132 | ||||
| 133 | int | |||
| 134 | sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, | |||
| 135 | int no_chunk, void *data) | |||
| 136 | { | |||
| 137 | return sr_raid6_init(sd); | |||
| 138 | } | |||
| 139 | ||||
| 140 | int | |||
| 141 | sr_raid6_init(struct sr_discipline *sd) | |||
| 142 | { | |||
| 143 | /* Initialise runtime values. */ | |||
| 144 | sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits = | |||
| 145 | sr_validate_stripsize(sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size); | |||
| 146 | if (sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits == -1) { | |||
| 147 | sr_error(sd->sd_sc, "invalid strip size"); | |||
| 148 | return EINVAL22; | |||
| 149 | } | |||
| 150 | ||||
| 151 | /* only if stripsize <= MAXPHYS */ | |||
| 152 | sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no); | |||
| 153 | ||||
| 154 | return 0; | |||
| 155 | } | |||
| 156 | ||||
| 157 | int | |||
| 158 | sr_raid6_openings(struct sr_discipline *sd) | |||
| 159 | { | |||
| 160 | return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ | |||
| 161 | } | |||
| 162 | ||||
| 163 | void | |||
| 164 | sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) | |||
| 165 | { | |||
| 166 | int old_state, s; | |||
| 167 | ||||
| 168 | /* XXX this is for RAID 0 */ | |||
| 169 | DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", | |||
| 170 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, | |||
| 171 | sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); | |||
| 172 | ||||
| 173 | /* ok to go to splbio since this only happens in error path */ | |||
| 174 | s = splbio()splraise(0x6); | |||
| 175 | old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; | |||
| 176 | ||||
| 177 | /* multiple IOs to the same chunk that fail will come through here */ | |||
| 178 | if (old_state == new_state) | |||
| 179 | goto done; | |||
| 180 | ||||
| 181 | switch (old_state) { | |||
| 182 | case BIOC_SDONLINE0x00: | |||
| 183 | switch (new_state) { | |||
| 184 | case BIOC_SDOFFLINE0x01: | |||
| 185 | case BIOC_SDSCRUB0x06: | |||
| 186 | break; | |||
| 187 | default: | |||
| 188 | goto die; | |||
| 189 | } | |||
| 190 | break; | |||
| 191 | ||||
| 192 | case BIOC_SDOFFLINE0x01: | |||
| 193 | if (new_state == BIOC_SDREBUILD0x03) { | |||
| 194 | ; | |||
| 195 | } else | |||
| 196 | goto die; | |||
| 197 | break; | |||
| 198 | ||||
| 199 | case BIOC_SDSCRUB0x06: | |||
| 200 | switch (new_state) { | |||
| 201 | case BIOC_SDONLINE0x00: | |||
| 202 | case BIOC_SDOFFLINE0x01: | |||
| 203 | break; | |||
| 204 | default: | |||
| 205 | goto die; | |||
| 206 | } | |||
| 207 | break; | |||
| 208 | ||||
| 209 | case BIOC_SDREBUILD0x03: | |||
| 210 | switch (new_state) { | |||
| 211 | case BIOC_SDONLINE0x00: | |||
| 212 | case BIOC_SDOFFLINE0x01: | |||
| 213 | break; | |||
| 214 | default: | |||
| 215 | goto die; | |||
| 216 | } | |||
| 217 | break; | |||
| 218 | ||||
| 219 | default: | |||
| 220 | die: | |||
| 221 | splx(s)spllower(s); /* XXX */ | |||
| 222 | panic("%s: %s: %s: invalid chunk state transition %d -> %d", | |||
| 223 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), | |||
| 224 | sd->sd_meta->ssd_devname, | |||
| 225 | sd->sd_vol.sv_chunks[c]->src_meta.scmi_scm_invariant.scm_devname, | |||
| 226 | old_state, new_state); | |||
| 227 | /* NOTREACHED */ | |||
| 228 | } | |||
| 229 | ||||
| 230 | sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; | |||
| 231 | sd->sd_set_vol_state(sd); | |||
| 232 | ||||
| 233 | sd->sd_must_flush = 1; | |||
| 234 | task_add(systq, &sd->sd_meta_save_task); | |||
| 235 | done: | |||
| 236 | splx(s)spllower(s); | |||
| 237 | } | |||
| 238 | ||||
| 239 | void | |||
| 240 | sr_raid6_set_vol_state(struct sr_discipline *sd) | |||
| 241 | { | |||
| 242 | int states[SR_MAX_STATES7]; | |||
| 243 | int new_state, i, s, nd; | |||
| 244 | int old_state = sd->sd_vol_status; | |||
| 245 | ||||
| 246 | /* XXX this is for RAID 0 */ | |||
| 247 | ||||
| 248 | DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", | |||
| 249 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); | |||
| 250 | ||||
| 251 | nd = sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no; | |||
| 252 | ||||
| 253 | for (i = 0; i < SR_MAX_STATES7; i++) | |||
| 254 | states[i] = 0; | |||
| 255 | ||||
| 256 | for (i = 0; i < nd; i++) { | |||
| 257 | s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; | |||
| 258 | if (s >= SR_MAX_STATES7) | |||
| 259 | panic("%s: %s: %s: invalid chunk state", | |||
| 260 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), | |||
| 261 | sd->sd_meta->ssd_devname, | |||
| 262 | sd->sd_vol.sv_chunks[i]->src_meta.scmi_scm_invariant.scm_devname); | |||
| 263 | states[s]++; | |||
| 264 | } | |||
| 265 | ||||
| 266 | if (states[BIOC_SDONLINE0x00] == nd) | |||
| 267 | new_state = BIOC_SVONLINE0x00; | |||
| 268 | else if (states[BIOC_SDONLINE0x00] < nd - 2) | |||
| 269 | new_state = BIOC_SVOFFLINE0x01; | |||
| 270 | else if (states[BIOC_SDSCRUB0x06] != 0) | |||
| 271 | new_state = BIOC_SVSCRUB0x04; | |||
| 272 | else if (states[BIOC_SDREBUILD0x03] != 0) | |||
| 273 | new_state = BIOC_SVREBUILD0x05; | |||
| 274 | else if (states[BIOC_SDONLINE0x00] < nd) | |||
| 275 | new_state = BIOC_SVDEGRADED0x02; | |||
| 276 | else { | |||
| 277 | printf("old_state = %d, ", old_state); | |||
| 278 | for (i = 0; i < nd; i++) | |||
| 279 | printf("%d = %d, ", i, | |||
| 280 | sd->sd_vol.sv_chunks[i]->src_meta.scm_status); | |||
| 281 | panic("invalid new_state"); | |||
| 282 | } | |||
| 283 | ||||
| 284 | DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", | |||
| 285 | DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, | |||
| 286 | old_state, new_state); | |||
| 287 | ||||
| 288 | switch (old_state) { | |||
| 289 | case BIOC_SVONLINE0x00: | |||
| 290 | switch (new_state) { | |||
| 291 | case BIOC_SVONLINE0x00: /* can go to same state */ | |||
| 292 | case BIOC_SVOFFLINE0x01: | |||
| 293 | case BIOC_SVDEGRADED0x02: | |||
| 294 | case BIOC_SVREBUILD0x05: /* happens on boot */ | |||
| 295 | break; | |||
| 296 | default: | |||
| 297 | goto die; | |||
| 298 | } | |||
| 299 | break; | |||
| 300 | ||||
| 301 | case BIOC_SVOFFLINE0x01: | |||
| 302 | /* XXX this might be a little too much */ | |||
| 303 | goto die; | |||
| 304 | ||||
| 305 | case BIOC_SVDEGRADED0x02: | |||
| 306 | switch (new_state) { | |||
| 307 | case BIOC_SVOFFLINE0x01: | |||
| 308 | case BIOC_SVREBUILD0x05: | |||
| 309 | case BIOC_SVDEGRADED0x02: /* can go to the same state */ | |||
| 310 | break; | |||
| 311 | default: | |||
| 312 | goto die; | |||
| 313 | } | |||
| 314 | break; | |||
| 315 | ||||
| 316 | case BIOC_SVBUILDING0x03: | |||
| 317 | switch (new_state) { | |||
| 318 | case BIOC_SVONLINE0x00: | |||
| 319 | case BIOC_SVOFFLINE0x01: | |||
| 320 | case BIOC_SVBUILDING0x03: /* can go to the same state */ | |||
| 321 | break; | |||
| 322 | default: | |||
| 323 | goto die; | |||
| 324 | } | |||
| 325 | break; | |||
| 326 | ||||
| 327 | case BIOC_SVSCRUB0x04: | |||
| 328 | switch (new_state) { | |||
| 329 | case BIOC_SVONLINE0x00: | |||
| 330 | case BIOC_SVOFFLINE0x01: | |||
| 331 | case BIOC_SVDEGRADED0x02: | |||
| 332 | case BIOC_SVSCRUB0x04: /* can go to same state */ | |||
| 333 | break; | |||
| 334 | default: | |||
| 335 | goto die; | |||
| 336 | } | |||
| 337 | break; | |||
| 338 | ||||
| 339 | case BIOC_SVREBUILD0x05: | |||
| 340 | switch (new_state) { | |||
| 341 | case BIOC_SVONLINE0x00: | |||
| 342 | case BIOC_SVOFFLINE0x01: | |||
| 343 | case BIOC_SVDEGRADED0x02: | |||
| 344 | case BIOC_SVREBUILD0x05: /* can go to the same state */ | |||
| 345 | break; | |||
| 346 | default: | |||
| 347 | goto die; | |||
| 348 | } | |||
| 349 | break; | |||
| 350 | ||||
| 351 | default: | |||
| 352 | die: | |||
| 353 | panic("%s: %s: invalid volume state transition %d -> %d", | |||
| 354 | DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname), sd->sd_meta->ssd_devname, | |||
| 355 | old_state, new_state); | |||
| 356 | /* NOTREACHED */ | |||
| 357 | } | |||
| 358 | ||||
| 359 | sd->sd_vol_status = new_state; | |||
| 360 | } | |||
| 361 | ||||
| 362 | /* modes: | |||
| 363 | * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
| 364 | * 0, qbuf, NULL, 0); | |||
| 365 | * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
| 366 | * 0, pbuf, NULL, 0); | |||
| 367 | * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, | |||
| 368 | * 0, pbuf, qbuf, gf_pow[i]); | |||
| 369 | */ | |||
| 370 | ||||
| 371 | int | |||
| 372 | sr_raid6_rw(struct sr_workunit *wu) | |||
| 373 | { | |||
| 374 | struct sr_workunit *wu_r = NULL((void *)0); | |||
| ||||
| 375 | struct sr_discipline *sd = wu->swu_dis; | |||
| 376 | struct scsi_xfer *xs = wu->swu_xs; | |||
| 377 | struct sr_chunk *scp; | |||
| 378 | int s, fail, i, gxinv, pxinv; | |||
| 379 | daddr_t blkno, lba; | |||
| 380 | int64_t chunk_offs, lbaoffs, offset, strip_offs; | |||
| 381 | int64_t strip_no, strip_size, strip_bits, row_size; | |||
| 382 | int64_t fchunk, no_chunk, chunk, qchunk, pchunk; | |||
| 383 | long length, datalen; | |||
| 384 | void *pbuf, *data, *qbuf; | |||
| 385 | ||||
| 386 | /* blkno and scsi error will be handled by sr_validate_io */ | |||
| 387 | if (sr_validate_io(wu, &blkno, "sr_raid6_rw")) | |||
| 388 | goto bad; | |||
| 389 | ||||
| 390 | strip_size = sd->sd_meta->ssdi_sdd_invariant.ssd_strip_size; | |||
| 391 | strip_bits = sd->mdssd_dis_specific.mdd_raid6.sr6_strip_bits; | |||
| 392 | no_chunk = sd->sd_meta->ssdi_sdd_invariant.ssd_chunk_no - 2; | |||
| 393 | row_size = (no_chunk << strip_bits) >> DEV_BSHIFT9; | |||
| 394 | ||||
| 395 | data = xs->data; | |||
| 396 | datalen = xs->datalen; | |||
| 397 | lbaoffs = blkno << DEV_BSHIFT9; | |||
| 398 | ||||
| 399 | if (xs->flags & SCSI_DATA_OUT0x01000) { | |||
| 400 | if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP0x00001)) == NULL((void *)0)){ | |||
| 401 | printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)((sd->sd_sc)->sc_dev.dv_xname)); | |||
| 402 | goto bad; | |||
| 403 | } | |||
| 404 | wu_r->swu_state = SR_WU_INPROGRESS1; | |||
| 405 | wu_r->swu_flags |= SR_WUF_DISCIPLINE(1<<5); | |||
| 406 | } | |||
| 407 | ||||
| 408 | wu->swu_blk_start = 0; | |||
| 409 | while (datalen != 0) { | |||
| 410 | strip_no = lbaoffs >> strip_bits; | |||
| 411 | strip_offs = lbaoffs & (strip_size - 1); | |||
| 412 | chunk_offs = (strip_no / no_chunk) << strip_bits; | |||
| 413 | offset = chunk_offs + strip_offs; | |||
| 414 | ||||
| 415 | /* get size remaining in this stripe */ | |||
| 416 | length = MIN(strip_size - strip_offs, datalen)(((strip_size - strip_offs)<(datalen))?(strip_size - strip_offs ):(datalen)); | |||
| 417 | ||||
| 418 | /* map disk offset to parity/data drive */ | |||
| 419 | chunk = strip_no % no_chunk; | |||
| 420 | ||||
| 421 | qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); | |||
| 422 | if (qchunk == 0) | |||
| 423 | pchunk = no_chunk + 1; | |||
| 424 | else | |||
| 425 | pchunk = qchunk - 1; | |||
| 426 | if (chunk >= pchunk) | |||
| 427 | chunk++; | |||
| 428 | if (chunk >= qchunk) | |||
| 429 | chunk++; | |||
| 430 | ||||
| 431 | lba = offset >> DEV_BSHIFT9; | |||
| 432 | ||||
| 433 | /* XXX big hammer.. exclude I/O from entire stripe */ | |||
| 434 | if (wu->swu_blk_start
| |||
| 435 | wu->swu_blk_start = (strip_no / no_chunk) * row_size; | |||
| 436 | wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); | |||
| 437 | ||||
| 438 | fail = 0; | |||
| 439 | fchunk = -1; | |||
| 440 | ||||
| 441 | /* Get disk-fail flags */ | |||
| 442 | for (i=0; i< no_chunk+2; i++) { | |||
| 443 | scp = sd->sd_vol.sv_chunks[i]; | |||
| 444 | switch (scp->src_meta.scm_status) { | |||
| 445 | case BIOC_SDOFFLINE0x01: | |||
| 446 | case BIOC_SDREBUILD0x03: | |||
| 447 | case BIOC_SDHOTSPARE0x04: | |||
| 448 | if (i == qchunk) | |||
| 449 | fail |= SR_FAILQ(1L << 3); | |||
| 450 | else if (i == pchunk) | |||
| 451 | fail |= SR_FAILP(1L << 2); | |||
| 452 | else if (i == chunk) | |||
| 453 | fail |= SR_FAILX(1L << 0); | |||
| 454 | else { | |||
| 455 | /* dual data-disk failure */ | |||
| 456 | fail |= SR_FAILY(1L << 1); | |||
| 457 | fchunk = i; | |||
| 458 | } | |||
| 459 | break; | |||
| 460 | } | |||
| 461 | } | |||
| 462 | if (xs->flags & SCSI_DATA_IN0x00800) { | |||
| 463 | if (!(fail & SR_FAILX(1L << 0))) { | |||
| 464 | /* drive is good. issue single read request */ | |||
| 465 | if (sr_raid6_addio(wu, chunk, lba, length, | |||
| 466 | data, xs->flags, 0, NULL((void *)0), NULL((void *)0), 0)) | |||
| 467 | goto bad; | |||
| 468 | } else if (fail & SR_FAILP(1L << 2)) { | |||
| 469 | /* Dx, P failed */ | |||
| 470 | printf("Disk %llx offline, " | |||
| 471 | "regenerating Dx+P\n", chunk); | |||
| 472 | ||||
| 473 | gxinv = gf_inv(gf_pow[chunk]); | |||
| 474 | ||||
| 475 | /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ | |||
| 476 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
| 477 | if (sr_raid6_addio(wu, qchunk, lba, length, | |||
| 478 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, gxinv)) | |||
| 479 | goto bad; | |||
| 480 | ||||
| 481 | /* Read Dz * gz * inv(gx) */ | |||
| 482 | for (i = 0; i < no_chunk+2; i++) { | |||
| 483 | if (i == qchunk || i == pchunk || i == chunk) | |||
| 484 | continue; | |||
| 485 | ||||
| 486 | if (sr_raid6_addio(wu, i, lba, length, | |||
| 487 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, | |||
| 488 | gf_mul(gf_pow[i], gxinv))) | |||
| 489 | goto bad; | |||
| 490 | } | |||
| 491 | ||||
| 492 | /* data will contain correct value on completion */ | |||
| 493 | } else if (fail & SR_FAILY(1L << 1)) { | |||
| 494 | /* Dx, Dy failed */ | |||
| 495 | printf("Disk %llx & %llx offline, " | |||
| 496 | "regenerating Dx+Dy\n", chunk, fchunk); | |||
| 497 | ||||
| 498 | gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); | |||
| 499 | pxinv = gf_mul(gf_pow[fchunk], gxinv); | |||
| 500 | ||||
| 501 | /* read Q * inv(gx + gy) */ | |||
| 502 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
| 503 | if (sr_raid6_addio(wu, qchunk, lba, length, | |||
| 504 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, gxinv)) | |||
| 505 | goto bad; | |||
| 506 | ||||
| 507 | /* read P * gy * inv(gx + gy) */ | |||
| 508 | if (sr_raid6_addio(wu, pchunk, lba, length, | |||
| 509 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, pxinv)) | |||
| 510 | goto bad; | |||
| 511 | ||||
| 512 | /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz | |||
| 513 | * Q: sr_raid6_xorp(qbuf, --, length); | |||
| 514 | * P: sr_raid6_xorp(pbuf, --, length); | |||
| 515 | * Dz: sr_raid6_xorp(pbuf, --, length); | |||
| 516 | * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); | |||
| 517 | */ | |||
| 518 | for (i = 0; i < no_chunk+2; i++) { | |||
| 519 | if (i == qchunk || i == pchunk || | |||
| 520 | i == chunk || i == fchunk) | |||
| 521 | continue; | |||
| 522 | ||||
| 523 | /* read Dz * (gz + gy) * inv(gx + gy) */ | |||
| 524 | if (sr_raid6_addio(wu, i, lba, length, | |||
| 525 | NULL((void *)0), SCSI_DATA_IN0x00800, 0, NULL((void *)0), data, | |||
| 526 | pxinv ^ gf_mul(gf_pow[i], gxinv))) | |||
| 527 | goto bad; | |||
| 528 | } | |||
| 529 | } else { | |||
| 530 | /* Two cases: single disk (Dx) or (Dx+Q) | |||
| 531 | * Dx = Dz ^ P (same as RAID5) | |||
| 532 | */ | |||
| 533 | printf("Disk %llx offline, " | |||
| 534 | "regenerating Dx%s\n", chunk, | |||
| 535 | fail & SR_FAILQ(1L << 3) ? "+Q" : " single"); | |||
| 536 | ||||
| 537 | /* Calculate: Dx = P^Dz | |||
| 538 | * P: sr_raid6_xorp(data, ---, length); | |||
| 539 | * Dz: sr_raid6_xorp(data, ---, length); | |||
| 540 | */ | |||
| 541 | memset(data, 0, length)__builtin_memset((data), (0), (length)); | |||
| 542 | for (i = 0; i < no_chunk+2; i++) { | |||
| 543 | if (i != chunk && i != qchunk) { | |||
| 544 | /* Read Dz */ | |||
| 545 | if (sr_raid6_addio(wu, i, lba, | |||
| 546 | length, NULL((void *)0), SCSI_DATA_IN0x00800, | |||
| 547 | 0, data, NULL((void *)0), 0)) | |||
| 548 | goto bad; | |||
| 549 | } | |||
| 550 | } | |||
| 551 | ||||
| 552 | /* data will contain correct value on completion */ | |||
| 553 | } | |||
| 554 | } else { | |||
| 555 | /* XXX handle writes to failed/offline disk? */ | |||
| 556 | if (fail & (SR_FAILX(1L << 0)|SR_FAILQ(1L << 3)|SR_FAILP(1L << 2))) | |||
| 557 | goto bad; | |||
| 558 | ||||
| 559 | /* | |||
| 560 | * initialize pbuf with contents of new data to be | |||
| 561 | * written. This will be XORed with old data and old | |||
| 562 | * parity in the intr routine. The result in pbuf | |||
| 563 | * is the new parity data. | |||
| 564 | */ | |||
| 565 | qbuf = sr_block_get(sd, length); | |||
| 566 | if (qbuf == NULL((void *)0)) | |||
| 567 | goto bad; | |||
| 568 | ||||
| 569 | pbuf = sr_block_get(sd, length); | |||
| 570 | if (pbuf == NULL((void *)0)) | |||
| 571 | goto bad; | |||
| 572 | ||||
| 573 | /* Calculate P = Dn; Q = gn * Dn */ | |||
| 574 | if (gf_premul(gf_pow[chunk])) | |||
| 575 | goto bad; | |||
| 576 | sr_raid6_xorp(pbuf, data, length); | |||
| 577 | sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); | |||
| 578 | ||||
| 579 | /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ | |||
| 580 | if (sr_raid6_addio(wu_r, chunk, lba, length, NULL((void *)0), | |||
| 581 | SCSI_DATA_IN0x00800, 0, pbuf, qbuf, gf_pow[chunk])) | |||
| 582 | goto bad; | |||
| 583 | ||||
| 584 | /* Read old xor-parity: P ^= P' */ | |||
| 585 | if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL((void *)0), | |||
| 586 | SCSI_DATA_IN0x00800, 0, pbuf, NULL((void *)0), 0)) | |||
| 587 | goto bad; | |||
| 588 | ||||
| 589 | /* Read old q-parity: Q ^= Q' */ | |||
| 590 | if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL((void *)0), | |||
| 591 | SCSI_DATA_IN0x00800, 0, qbuf, NULL((void *)0), 0)) | |||
| 592 | goto bad; | |||
| 593 | ||||
| 594 | /* write new data */ | |||
| 595 | if (sr_raid6_addio(wu, chunk, lba, length, data, | |||
| 596 | xs->flags, 0, NULL((void *)0), NULL((void *)0), 0)) | |||
| 597 | goto bad; | |||
| 598 | ||||
| 599 | /* write new xor-parity */ | |||
| 600 | if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, | |||
| 601 | xs->flags, SR_CCBF_FREEBUF(1<<0), NULL((void *)0), NULL((void *)0), 0)) | |||
| 602 | goto bad; | |||
| 603 | ||||
| 604 | /* write new q-parity */ | |||
| 605 | if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, | |||
| 606 | xs->flags, SR_CCBF_FREEBUF(1<<0), NULL((void *)0), NULL((void *)0), 0)) | |||
| 607 | goto bad; | |||
| 608 | } | |||
| 609 | ||||
| 610 | /* advance to next block */ | |||
| 611 | lbaoffs += length; | |||
| 612 | datalen -= length; | |||
| 613 | data += length; | |||
| 614 | } | |||
| 615 | ||||
| 616 | s = splbio()splraise(0x6); | |||
| 617 | if (wu_r) { | |||
| 618 | /* collide write request with reads */ | |||
| 619 | wu_r->swu_blk_start = wu->swu_blk_start; | |||
| 620 | wu_r->swu_blk_end = wu->swu_blk_end; | |||
| 621 | ||||
| 622 | wu->swu_state = SR_WU_DEFERRED5; | |||
| 623 | wu_r->swu_collider = wu; | |||
| 624 | TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link)do { (wu)->swu_link.tqe_next = ((void *)0); (wu)->swu_link .tqe_prev = (&sd->sd_wu_defq)->tqh_last; *(&sd-> sd_wu_defq)->tqh_last = (wu); (&sd->sd_wu_defq)-> tqh_last = &(wu)->swu_link.tqe_next; } while (0); | |||
| 625 | ||||
| 626 | wu = wu_r; | |||
| 627 | } | |||
| 628 | splx(s)spllower(s); | |||
| 629 | ||||
| 630 | sr_schedule_wu(wu); | |||
| 631 | ||||
| 632 | return (0); | |||
| 633 | bad: | |||
| 634 | /* XXX - can leak pbuf/qbuf on error. */ | |||
| 635 | /* wu is unwound by sr_wu_put */ | |||
| 636 | if (wu_r) | |||
| 637 | sr_scsi_wu_put(sd, wu_r); | |||
| 638 | return (1); | |||
| 639 | } | |||
| 640 | ||||
| 641 | /* Handle failure I/O completion */ | |||
| 642 | int | |||
| 643 | sr_failio(struct sr_workunit *wu) | |||
| 644 | { | |||
| 645 | struct sr_discipline *sd = wu->swu_dis; | |||
| 646 | struct sr_ccb *ccb; | |||
| 647 | ||||
| 648 | if (!(wu->swu_flags & SR_WUF_FAIL(1<<2))) | |||
| 649 | return (0); | |||
| 650 | ||||
| 651 | /* Wu is a 'fake'.. don't do real I/O just intr */ | |||
| 652 | TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link)do { (wu)->swu_link.tqe_next = ((void *)0); (wu)->swu_link .tqe_prev = (&sd->sd_wu_pendq)->tqh_last; *(&sd ->sd_wu_pendq)->tqh_last = (wu); (&sd->sd_wu_pendq )->tqh_last = &(wu)->swu_link.tqe_next; } while (0); | |||
| 653 | TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)for((ccb) = ((&wu->swu_ccb)->tqh_first); (ccb) != ( (void *)0); (ccb) = ((ccb)->ccb_link.tqe_next)) | |||
| 654 | sr_raid6_intr(&ccb->ccb_buf); | |||
| 655 | return (1); | |||
| 656 | } | |||
| 657 | ||||
| 658 | void | |||
| 659 | sr_raid6_intr(struct buf *bp) | |||
| 660 | { | |||
| 661 | struct sr_ccb *ccb = (struct sr_ccb *)bp; | |||
| 662 | struct sr_workunit *wu = ccb->ccb_wu; | |||
| 663 | struct sr_discipline *sd = wu->swu_dis; | |||
| 664 | struct sr_raid6_opaque *pq = ccb->ccb_opaque; | |||
| 665 | int s; | |||
| 666 | ||||
| 667 | DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", | |||
| 668 | DEVNAME(sd->sd_sc), bp, wu->swu_xs); | |||
| 669 | ||||
| 670 | s = splbio()splraise(0x6); | |||
| 671 | sr_ccb_done(ccb); | |||
| 672 | ||||
| 673 | /* XOR data to result. */ | |||
| 674 | if (ccb->ccb_state == SR_CCB_OK2 && pq) { | |||
| 675 | if (pq->pbuf) | |||
| 676 | /* Calculate xor-parity */ | |||
| 677 | sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, | |||
| 678 | ccb->ccb_buf.b_bcount); | |||
| 679 | if (pq->qbuf) | |||
| 680 | /* Calculate q-parity */ | |||
| 681 | sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, | |||
| 682 | ccb->ccb_buf.b_bcount, pq->gn); | |||
| 683 | free(pq, M_DEVBUF2, 0); | |||
| 684 | ccb->ccb_opaque = NULL((void *)0); | |||
| 685 | } | |||
| 686 | ||||
| 687 | /* Free allocated data buffer. */ | |||
| 688 | if (ccb->ccb_flags & SR_CCBF_FREEBUF(1<<0)) { | |||
| 689 | sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); | |||
| 690 | ccb->ccb_buf.b_data = NULL((void *)0); | |||
| 691 | } | |||
| 692 | ||||
| 693 | sr_wu_done(wu); | |||
| 694 | splx(s)spllower(s); | |||
| 695 | } | |||
| 696 | ||||
| 697 | int | |||
| 698 | sr_raid6_wu_done(struct sr_workunit *wu) | |||
| 699 | { | |||
| 700 | struct sr_discipline *sd = wu->swu_dis; | |||
| 701 | struct scsi_xfer *xs = wu->swu_xs; | |||
| 702 | ||||
| 703 | /* XXX - we have no way of propagating errors... */ | |||
| 704 | if (wu->swu_flags & SR_WUF_DISCIPLINE(1<<5)) | |||
| 705 | return SR_WU_OK2; | |||
| 706 | ||||
| 707 | /* XXX - This is insufficient for RAID 6. */ | |||
| 708 | if (wu->swu_ios_succeeded > 0) { | |||
| 709 | xs->error = XS_NOERROR0; | |||
| 710 | return SR_WU_OK2; | |||
| 711 | } | |||
| 712 | ||||
| 713 | if (xs->flags & SCSI_DATA_IN0x00800) { | |||
| 714 | printf("%s: retrying read on block %lld\n", | |||
| 715 | sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); | |||
| 716 | sr_wu_release_ccbs(wu); | |||
| 717 | wu->swu_state = SR_WU_RESTART7; | |||
| 718 | if (sd->sd_scsi_rw(wu) == 0) | |||
| 719 | return SR_WU_RESTART7; | |||
| 720 | } else { | |||
| 721 | printf("%s: permanently fail write on block %lld\n", | |||
| 722 | sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); | |||
| 723 | } | |||
| 724 | ||||
| 725 | wu->swu_state = SR_WU_FAILED3; | |||
| 726 | xs->error = XS_DRIVER_STUFFUP2; | |||
| 727 | ||||
| 728 | return SR_WU_FAILED3; | |||
| 729 | } | |||
| 730 | ||||
| 731 | int | |||
| 732 | sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, | |||
| 733 | long len, void *data, int xsflags, int ccbflags, void *pbuf, | |||
| 734 | void *qbuf, int gn) | |||
| 735 | { | |||
| 736 | struct sr_discipline *sd = wu->swu_dis; | |||
| ||||
| 737 | struct sr_ccb *ccb; | |||
| 738 | struct sr_raid6_opaque *pqbuf; | |||
| 739 | ||||
| 740 | DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n", | |||
| 741 | (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, | |||
| 742 | (long long)blkno, len, pbuf, qbuf); | |||
| 743 | ||||
| 744 | /* Allocate temporary buffer. */ | |||
| 745 | if (data == NULL((void *)0)) { | |||
| 746 | data = sr_block_get(sd, len); | |||
| 747 | if (data == NULL((void *)0)) | |||
| 748 | return (-1); | |||
| 749 | ccbflags |= SR_CCBF_FREEBUF(1<<0); | |||
| 750 | } | |||
| 751 | ||||
| 752 | ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); | |||
| 753 | if (ccb == NULL((void *)0)) { | |||
| 754 | if (ccbflags & SR_CCBF_FREEBUF(1<<0)) | |||
| 755 | sr_block_put(sd, data, len); | |||
| 756 | return (-1); | |||
| 757 | } | |||
| 758 | if (pbuf || qbuf) { | |||
| 759 | /* XXX - can leak data and ccb on failure. */ | |||
| 760 | if (qbuf && gf_premul(gn)) | |||
| 761 | return (-1); | |||
| 762 | ||||
| 763 | /* XXX - should be preallocated? */ | |||
| 764 | pqbuf = malloc(sizeof(struct sr_raid6_opaque), | |||
| 765 | M_DEVBUF2, M_ZERO0x0008 | M_NOWAIT0x0002); | |||
| 766 | if (pqbuf == NULL((void *)0)) { | |||
| 767 | sr_ccb_put(ccb); | |||
| 768 | return (-1); | |||
| 769 | } | |||
| 770 | pqbuf->pbuf = pbuf; | |||
| 771 | pqbuf->qbuf = qbuf; | |||
| 772 | pqbuf->gn = gn; | |||
| 773 | ccb->ccb_opaque = pqbuf; | |||
| 774 | } | |||
| 775 | sr_wu_enqueue_ccb(wu, ccb); | |||
| 776 | ||||
| 777 | return (0); | |||
| 778 | } | |||
| 779 | ||||
| 780 | /* Perform RAID6 parity calculation. | |||
| 781 | * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ | |||
| 782 | void | |||
| 783 | sr_raid6_xorp(void *p, void *d, int len) | |||
| 784 | { | |||
| 785 | uint32_t *pbuf = p, *data = d; | |||
| 786 | ||||
| 787 | len >>= 2; | |||
| 788 | while (len--) | |||
| 789 | *pbuf++ ^= *data++; | |||
| 790 | } | |||
| 791 | ||||
| 792 | void | |||
| 793 | sr_raid6_xorq(void *q, void *d, int len, int gn) | |||
| 794 | { | |||
| 795 | uint32_t *qbuf = q, *data = d, x; | |||
| 796 | uint8_t *gn_map = gf_map[gn]; | |||
| 797 | ||||
| 798 | len >>= 2; | |||
| 799 | while (len--) { | |||
| 800 | x = *data++; | |||
| 801 | *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | | |||
| 802 | ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | | |||
| 803 | ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | | |||
| 804 | ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); | |||
| 805 | } | |||
| 806 | } | |||
| 807 | ||||
| 808 | /* Create GF256 log/pow tables: polynomial = 0x11D */ | |||
| 809 | void | |||
| 810 | gf_init(void) | |||
| 811 | { | |||
| 812 | int i; | |||
| 813 | uint8_t p = 1; | |||
| 814 | ||||
| 815 | /* use 2N pow table to avoid using % in multiply */ | |||
| 816 | for (i=0; i<256; i++) { | |||
| 817 | gf_log[p] = i; | |||
| 818 | gf_pow[i] = gf_pow[i+255] = p; | |||
| 819 | p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); | |||
| 820 | } | |||
| 821 | gf_log[0] = 512; | |||
| 822 | } | |||
| 823 | ||||
| 824 | uint8_t | |||
| 825 | gf_inv(uint8_t a) | |||
| 826 | { | |||
| 827 | return gf_pow[255 - gf_log[a]]; | |||
| 828 | } | |||
| 829 | ||||
| 830 | uint8_t | |||
| 831 | gf_mul(uint8_t a, uint8_t b) | |||
| 832 | { | |||
| 833 | return gf_pow[gf_log[a] + gf_log[b]]; | |||
| 834 | } | |||
| 835 | ||||
| 836 | /* Precalculate multiplication tables for drive gn */ | |||
| 837 | int | |||
| 838 | gf_premul(uint8_t gn) | |||
| 839 | { | |||
| 840 | int i; | |||
| 841 | ||||
| 842 | if (gf_map[gn] != NULL((void *)0)) | |||
| 843 | return (0); | |||
| 844 | ||||
| 845 | if ((gf_map[gn] = malloc(256, M_DEVBUF2, M_ZERO0x0008 | M_NOWAIT0x0002)) == NULL((void *)0)) | |||
| 846 | return (-1); | |||
| 847 | ||||
| 848 | for (i=0; i<256; i++) | |||
| 849 | gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; | |||
| 850 | return (0); | |||
| 851 | } |