| File: | kern/subr_hibernate.c |
| Warning: | line 1723, column 2 Value stored to 'blkctr' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: subr_hibernate.c,v 1.138 2022/09/03 18:17:15 mlarkin Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> |
| 5 | * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> |
| 6 | * |
| 7 | * Permission to use, copy, modify, and distribute this software for any |
| 8 | * purpose with or without fee is hereby granted, provided that the above |
| 9 | * copyright notice and this permission notice appear in all copies. |
| 10 | * |
| 11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 18 | */ |
| 19 | |
| 20 | #include <sys/hibernate.h> |
| 21 | #include <sys/malloc.h> |
| 22 | #include <sys/param.h> |
| 23 | #include <sys/tree.h> |
| 24 | #include <sys/systm.h> |
| 25 | #include <sys/disklabel.h> |
| 26 | #include <sys/disk.h> |
| 27 | #include <sys/conf.h> |
| 28 | #include <sys/buf.h> |
| 29 | #include <sys/fcntl.h> |
| 30 | #include <sys/stat.h> |
| 31 | #include <sys/atomic.h> |
| 32 | |
| 33 | #include <uvm/uvm.h> |
| 34 | #include <uvm/uvm_swap.h> |
| 35 | |
| 36 | #include <machine/hibernate.h> |
| 37 | |
| 38 | /* Make sure the signature can fit in one block */ |
| 39 | CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE)extern char _ctassert[(sizeof(union hibernate_info) <= (1 << 9)) ? 1 : -1 ] __attribute__((__unused__)); |
| 40 | |
| 41 | /* |
| 42 | * Hibernate piglet layout information |
| 43 | * |
| 44 | * The piglet is a scratch area of memory allocated by the suspending kernel. |
| 45 | * Its phys and virt addrs are recorded in the signature block. The piglet is |
| 46 | * used to guarantee an unused area of memory that can be used by the resuming |
| 47 | * kernel for various things. The piglet is excluded during unpack operations. |
| 48 | * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). |
| 49 | * |
| 50 | * Offset from piglet_base Purpose |
| 51 | * ---------------------------------------------------------------------------- |
| 52 | * 0 Private page for suspend I/O write functions |
| 53 | * 1*PAGE_SIZE I/O page used during hibernate suspend |
| 54 | * 2*PAGE_SIZE I/O page used during hibernate suspend |
| 55 | * 3*PAGE_SIZE copy page used during hibernate suspend |
| 56 | * 4*PAGE_SIZE final chunk ordering list (24 pages) |
| 57 | * 28*PAGE_SIZE RLE utility page |
| 58 | * 29*PAGE_SIZE start of hiballoc area |
| 59 | * 30*PAGE_SIZE preserved entropy |
| 60 | * 110*PAGE_SIZE end of hiballoc area (80 pages) |
| 61 | * 366*PAGE_SIZE end of retguard preservation region (256 pages) |
| 62 | * ... unused |
| 63 | * HIBERNATE_CHUNK_SIZE start of hibernate chunk table |
| 64 | * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked |
| 65 | * 4*HIBERNATE_CHUNK_SIZE end of piglet |
| 66 | */ |
| 67 | |
| 68 | /* Temporary vaddr ranges used during hibernate */ |
| 69 | vaddr_t hibernate_temp_page; |
| 70 | vaddr_t hibernate_copy_page; |
| 71 | vaddr_t hibernate_rle_page; |
| 72 | |
| 73 | /* Hibernate info as read from disk during resume */ |
| 74 | union hibernate_info disk_hib; |
| 75 | |
| 76 | /* |
| 77 | * Global copy of the pig start address. This needs to be a global as we |
| 78 | * switch stacks after computing it - it can't be stored on the stack. |
| 79 | */ |
| 80 | paddr_t global_pig_start; |
| 81 | |
| 82 | /* |
| 83 | * Global copies of the piglet start addresses (PA/VA). We store these |
| 84 | * as globals to avoid having to carry them around as parameters, as the |
| 85 | * piglet is allocated early and freed late - its lifecycle extends beyond |
| 86 | * that of the hibernate info union which is calculated on suspend/resume. |
| 87 | */ |
| 88 | vaddr_t global_piglet_va; |
| 89 | paddr_t global_piglet_pa; |
| 90 | |
| 91 | /* #define HIB_DEBUG */ |
| 92 | #ifdef HIB_DEBUG |
| 93 | int hib_debug = 99; |
| 94 | #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) |
| 95 | #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) |
| 96 | #else |
| 97 | #define DPRINTF(x...) |
| 98 | #define DNPRINTF(n,x...) |
| 99 | #endif |
| 100 | |
| 101 | #ifndef NO_PROPOLICE |
| 102 | extern long __guard_local; |
| 103 | #endif /* ! NO_PROPOLICE */ |
| 104 | |
| 105 | /* Retguard phys address (need to skip this region during unpack) */ |
| 106 | paddr_t retguard_start_phys, retguard_end_phys; |
| 107 | extern char __retguard_start, __retguard_end; |
| 108 | |
| 109 | void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); |
| 110 | int hibernate_calc_rle(paddr_t, paddr_t); |
| 111 | int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, |
| 112 | size_t *); |
| 113 | |
| 114 | #define MAX_RLE(0x400000 / (1 << 12)) (HIBERNATE_CHUNK_SIZE0x400000 / PAGE_SIZE(1 << 12)) |
| 115 | |
| 116 | /* |
| 117 | * Hib alloc enforced alignment. |
| 118 | */ |
| 119 | #define HIB_ALIGN8 8 /* bytes alignment */ |
| 120 | |
| 121 | /* |
| 122 | * sizeof builtin operation, but with alignment constraint. |
| 123 | */ |
| 124 | #define HIB_SIZEOF(_type)((((sizeof(_type))+((8)-1))/(8))*(8)) roundup(sizeof(_type), HIB_ALIGN)((((sizeof(_type))+((8)-1))/(8))*(8)) |
| 125 | |
| 126 | struct hiballoc_entry { |
| 127 | size_t hibe_use; |
| 128 | size_t hibe_space; |
| 129 | RBT_ENTRY(hiballoc_entry)struct rb_entry hibe_entry; |
| 130 | }; |
| 131 | |
| 132 | /* |
| 133 | * Sort hibernate memory ranges by ascending PA |
| 134 | */ |
| 135 | void |
| 136 | hibernate_sort_ranges(union hibernate_info *hib_info) |
| 137 | { |
| 138 | int i, j; |
| 139 | struct hibernate_memory_range *ranges; |
| 140 | paddr_t base, end; |
| 141 | |
| 142 | ranges = hib_info->ranges; |
| 143 | |
| 144 | for (i = 1; i < hib_info->nranges; i++) { |
| 145 | j = i; |
| 146 | while (j > 0 && ranges[j - 1].base > ranges[j].base) { |
| 147 | base = ranges[j].base; |
| 148 | end = ranges[j].end; |
| 149 | ranges[j].base = ranges[j - 1].base; |
| 150 | ranges[j].end = ranges[j - 1].end; |
| 151 | ranges[j - 1].base = base; |
| 152 | ranges[j - 1].end = end; |
| 153 | j--; |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | /* |
| 159 | * Compare hiballoc entries based on the address they manage. |
| 160 | * |
| 161 | * Since the address is fixed, relative to struct hiballoc_entry, |
| 162 | * we just compare the hiballoc_entry pointers. |
| 163 | */ |
| 164 | static __inline int |
| 165 | hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) |
| 166 | { |
| 167 | vaddr_t vl = (vaddr_t)l; |
| 168 | vaddr_t vr = (vaddr_t)r; |
| 169 | |
| 170 | return vl < vr ? -1 : (vl > vr); |
| 171 | } |
| 172 | |
| 173 | RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)extern const struct rb_type *const hiballoc_addr_RBT_TYPE; __attribute__ ((__unused__)) static inline void hiballoc_addr_RBT_INIT(struct hiballoc_addr *head) { _rb_init(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_INSERT (struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_insert(hiballoc_addr_RBT_TYPE, &head->rbh_root, elm ); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_REMOVE(struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_remove(hiballoc_addr_RBT_TYPE , &head->rbh_root, elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_FIND (struct hiballoc_addr *head, const struct hiballoc_entry *key ) { return _rb_find(hiballoc_addr_RBT_TYPE, &head->rbh_root , key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NFIND(struct hiballoc_addr *head, const struct hiballoc_entry *key) { return _rb_nfind(hiballoc_addr_RBT_TYPE , &head->rbh_root, key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_ROOT (struct hiballoc_addr *head) { return _rb_root(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_EMPTY(struct hiballoc_addr *head ) { return _rb_empty(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MIN (struct hiballoc_addr *head) { return _rb_min(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MAX(struct hiballoc_addr *head) { return _rb_max(hiballoc_addr_RBT_TYPE, &head-> rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NEXT(struct hiballoc_entry *elm) { return _rb_next(hiballoc_addr_RBT_TYPE, elm); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PREV (struct hiballoc_entry *elm) { return _rb_prev(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_LEFT(struct hiballoc_entry *elm) { return _rb_left(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline struct hiballoc_entry * hiballoc_addr_RBT_RIGHT (struct hiballoc_entry *elm) { return _rb_right(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PARENT(struct hiballoc_entry *elm) { return _rb_parent(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline void hiballoc_addr_RBT_SET_LEFT(struct hiballoc_entry *elm, struct hiballoc_entry *left) { _rb_set_left(hiballoc_addr_RBT_TYPE , elm, left); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_RIGHT(struct hiballoc_entry *elm, struct hiballoc_entry *right) { _rb_set_right(hiballoc_addr_RBT_TYPE , elm, right); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_PARENT(struct hiballoc_entry *elm, struct hiballoc_entry *parent) { _rb_set_parent(hiballoc_addr_RBT_TYPE , elm, parent); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_POISON(struct hiballoc_entry *elm, unsigned long poison) { _rb_poison(hiballoc_addr_RBT_TYPE, elm, poison ); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_CHECK (struct hiballoc_entry *elm, unsigned long poison) { return _rb_check (hiballoc_addr_RBT_TYPE, elm, poison); } |
| 174 | |
| 175 | /* |
| 176 | * Given a hiballoc entry, return the address it manages. |
| 177 | */ |
| 178 | static __inline void * |
| 179 | hib_entry_to_addr(struct hiballoc_entry *entry) |
| 180 | { |
| 181 | caddr_t addr; |
| 182 | |
| 183 | addr = (caddr_t)entry; |
| 184 | addr += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); |
| 185 | return addr; |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * Given an address, find the hiballoc that corresponds. |
| 190 | */ |
| 191 | static __inline struct hiballoc_entry* |
| 192 | hib_addr_to_entry(void *addr_param) |
| 193 | { |
| 194 | caddr_t addr; |
| 195 | |
| 196 | addr = (caddr_t)addr_param; |
| 197 | addr -= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); |
| 198 | return (struct hiballoc_entry*)addr; |
| 199 | } |
| 200 | |
| 201 | RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)static int hiballoc_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct hiballoc_entry *l = lptr, *r = rptr; return hibe_cmp(l, r); } static const struct rb_type hiballoc_addr_RBT_INFO = { hiballoc_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof (struct hiballoc_entry, hibe_entry), }; const struct rb_type * const hiballoc_addr_RBT_TYPE = &hiballoc_addr_RBT_INFO; |
| 202 | |
| 203 | /* |
| 204 | * Allocate memory from the arena. |
| 205 | * |
| 206 | * Returns NULL if no memory is available. |
| 207 | */ |
| 208 | void * |
| 209 | hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) |
| 210 | { |
| 211 | struct hiballoc_entry *entry, *new_entry; |
| 212 | size_t find_sz; |
| 213 | |
| 214 | /* |
| 215 | * Enforce alignment of HIB_ALIGN bytes. |
| 216 | * |
| 217 | * Note that, because the entry is put in front of the allocation, |
| 218 | * 0-byte allocations are guaranteed a unique address. |
| 219 | */ |
| 220 | alloc_sz = roundup(alloc_sz, HIB_ALIGN)((((alloc_sz)+((8)-1))/(8))*(8)); |
| 221 | |
| 222 | /* |
| 223 | * Find an entry with hibe_space >= find_sz. |
| 224 | * |
| 225 | * If the root node is not large enough, we switch to tree traversal. |
| 226 | * Because all entries are made at the bottom of the free space, |
| 227 | * traversal from the end has a slightly better chance of yielding |
| 228 | * a sufficiently large space. |
| 229 | */ |
| 230 | find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); |
| 231 | entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_ROOT(&arena->hib_addrs); |
| 232 | if (entry != NULL((void *)0) && entry->hibe_space < find_sz) { |
| 233 | RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs)for ((entry) = hiballoc_addr_RBT_MAX((&arena->hib_addrs )); (entry) != ((void *)0); (entry) = hiballoc_addr_RBT_PREV( (entry))) { |
| 234 | if (entry->hibe_space >= find_sz) |
| 235 | break; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | /* |
| 240 | * Insufficient or too fragmented memory. |
| 241 | */ |
| 242 | if (entry == NULL((void *)0)) |
| 243 | return NULL((void *)0); |
| 244 | |
| 245 | /* |
| 246 | * Create new entry in allocated space. |
| 247 | */ |
| 248 | new_entry = (struct hiballoc_entry*)( |
| 249 | (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); |
| 250 | new_entry->hibe_space = entry->hibe_space - find_sz; |
| 251 | new_entry->hibe_use = alloc_sz; |
| 252 | |
| 253 | /* |
| 254 | * Insert entry. |
| 255 | */ |
| 256 | if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, new_entry) != NULL((void *)0)) |
| 257 | panic("hib_alloc: insert failure"); |
| 258 | entry->hibe_space = 0; |
| 259 | |
| 260 | /* Return address managed by entry. */ |
| 261 | return hib_entry_to_addr(new_entry); |
| 262 | } |
| 263 | |
| 264 | void |
| 265 | hib_getentropy(char **bufp, size_t *bufplen) |
| 266 | { |
| 267 | if (!bufp || !bufplen) |
| 268 | return; |
| 269 | |
| 270 | *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE(1 << 12))); |
| 271 | *bufplen = PAGE_SIZE(1 << 12); |
| 272 | } |
| 273 | |
| 274 | /* |
| 275 | * Free a pointer previously allocated from this arena. |
| 276 | * |
| 277 | * If addr is NULL, this will be silently accepted. |
| 278 | */ |
| 279 | void |
| 280 | hib_free(struct hiballoc_arena *arena, void *addr) |
| 281 | { |
| 282 | struct hiballoc_entry *entry, *prev; |
| 283 | |
| 284 | if (addr == NULL((void *)0)) |
| 285 | return; |
| 286 | |
| 287 | /* |
| 288 | * Derive entry from addr and check it is really in this arena. |
| 289 | */ |
| 290 | entry = hib_addr_to_entry(addr); |
| 291 | if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_FIND(&arena->hib_addrs, entry) != entry) |
| 292 | panic("hib_free: freed item %p not in hib arena", addr); |
| 293 | |
| 294 | /* |
| 295 | * Give the space in entry to its predecessor. |
| 296 | * |
| 297 | * If entry has no predecessor, change its used space into free space |
| 298 | * instead. |
| 299 | */ |
| 300 | prev = RBT_PREV(hiballoc_addr, entry)hiballoc_addr_RBT_PREV(entry); |
| 301 | if (prev != NULL((void *)0) && |
| 302 | (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + |
| 303 | prev->hibe_use + prev->hibe_space) == entry) { |
| 304 | /* Merge entry. */ |
| 305 | RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_REMOVE(&arena->hib_addrs, entry); |
| 306 | prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + |
| 307 | entry->hibe_use + entry->hibe_space; |
| 308 | } else { |
| 309 | /* Flip used memory to free space. */ |
| 310 | entry->hibe_space += entry->hibe_use; |
| 311 | entry->hibe_use = 0; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | /* |
| 316 | * Initialize hiballoc. |
| 317 | * |
| 318 | * The allocator will manage memory at ptr, which is len bytes. |
| 319 | */ |
| 320 | int |
| 321 | hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) |
| 322 | { |
| 323 | struct hiballoc_entry *entry; |
| 324 | caddr_t ptr; |
| 325 | size_t len; |
| 326 | |
| 327 | RBT_INIT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_INIT(&arena->hib_addrs); |
| 328 | |
| 329 | /* |
| 330 | * Hib allocator enforces HIB_ALIGN alignment. |
| 331 | * Fixup ptr and len. |
| 332 | */ |
| 333 | ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN)(((((vaddr_t)p_ptr)+((8)-1))/(8))*(8)); |
| 334 | len = p_len - ((size_t)ptr - (size_t)p_ptr); |
| 335 | len &= ~((size_t)HIB_ALIGN8 - 1); |
| 336 | |
| 337 | /* |
| 338 | * Insufficient memory to be able to allocate and also do bookkeeping. |
| 339 | */ |
| 340 | if (len <= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8))) |
| 341 | return ENOMEM12; |
| 342 | |
| 343 | /* |
| 344 | * Create entry describing space. |
| 345 | */ |
| 346 | entry = (struct hiballoc_entry*)ptr; |
| 347 | entry->hibe_use = 0; |
| 348 | entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); |
| 349 | RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, entry); |
| 350 | |
| 351 | return 0; |
| 352 | } |
| 353 | |
| 354 | /* |
| 355 | * Zero all free memory. |
| 356 | */ |
| 357 | void |
| 358 | uvm_pmr_zero_everything(void) |
| 359 | { |
| 360 | struct uvm_pmemrange *pmr; |
| 361 | struct vm_page *pg; |
| 362 | int i; |
| 363 | |
| 364 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
| 365 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { |
| 366 | /* Zero single pages. */ |
| 367 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])((&pmr->single[0])->tqh_first)) |
| 368 | != NULL((void *)0)) { |
| 369 | uvm_pmr_remove(pmr, pg); |
| 370 | uvm_pagezero(pg); |
| 371 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); |
| 372 | uvmexp.zeropages++; |
| 373 | uvm_pmr_insert(pmr, pg, 0); |
| 374 | } |
| 375 | |
| 376 | /* Zero multi page ranges. */ |
| 377 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[0]) |
| 378 | &pmr->size[UVM_PMR_MEMTYPE_DIRTY])uvm_pmr_size_RBT_ROOT(&pmr->size[0])) != NULL((void *)0)) { |
| 379 | pg--; /* Size tree always has second page. */ |
| 380 | uvm_pmr_remove(pmr, pg); |
| 381 | for (i = 0; i < pg->fpgsz; i++) { |
| 382 | uvm_pagezero(&pg[i]); |
| 383 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); |
| 384 | uvmexp.zeropages++; |
| 385 | } |
| 386 | uvm_pmr_insert(pmr, pg, 0); |
| 387 | } |
| 388 | } |
| 389 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); |
| 390 | } |
| 391 | |
| 392 | /* |
| 393 | * Mark all memory as dirty. |
| 394 | * |
| 395 | * Used to inform the system that the clean memory isn't clean for some |
| 396 | * reason, for example because we just came back from hibernate. |
| 397 | */ |
| 398 | void |
| 399 | uvm_pmr_dirty_everything(void) |
| 400 | { |
| 401 | struct uvm_pmemrange *pmr; |
| 402 | struct vm_page *pg; |
| 403 | int i; |
| 404 | |
| 405 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
| 406 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { |
| 407 | /* Dirty single pages. */ |
| 408 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])((&pmr->single[1])->tqh_first)) |
| 409 | != NULL((void *)0)) { |
| 410 | uvm_pmr_remove(pmr, pg); |
| 411 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); |
| 412 | uvm_pmr_insert(pmr, pg, 0); |
| 413 | } |
| 414 | |
| 415 | /* Dirty multi page ranges. */ |
| 416 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[1]) |
| 417 | &pmr->size[UVM_PMR_MEMTYPE_ZERO])uvm_pmr_size_RBT_ROOT(&pmr->size[1])) != NULL((void *)0)) { |
| 418 | pg--; /* Size tree always has second page. */ |
| 419 | uvm_pmr_remove(pmr, pg); |
| 420 | for (i = 0; i < pg->fpgsz; i++) |
| 421 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); |
| 422 | uvm_pmr_insert(pmr, pg, 0); |
| 423 | } |
| 424 | } |
| 425 | |
| 426 | uvmexp.zeropages = 0; |
| 427 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); |
| 428 | } |
| 429 | |
| 430 | /* |
| 431 | * Allocate an area that can hold sz bytes and doesn't overlap with |
| 432 | * the piglet at piglet_pa. |
| 433 | */ |
| 434 | int |
| 435 | uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) |
| 436 | { |
| 437 | struct uvm_constraint_range pig_constraint; |
| 438 | struct kmem_pa_mode kp_pig = { |
| 439 | .kp_constraint = &pig_constraint, |
| 440 | .kp_maxseg = 1 |
| 441 | }; |
| 442 | vaddr_t va; |
| 443 | |
| 444 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); |
| 445 | |
| 446 | pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE0x400000; |
| 447 | pig_constraint.ucr_high = -1; |
| 448 | |
| 449 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); |
| 450 | if (va == 0) { |
| 451 | pig_constraint.ucr_low = 0; |
| 452 | pig_constraint.ucr_high = piglet_pa - 1; |
| 453 | |
| 454 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); |
| 455 | if (va == 0) |
| 456 | return ENOMEM12; |
| 457 | } |
| 458 | |
| 459 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, pa); |
| 460 | return 0; |
| 461 | } |
| 462 | |
| 463 | /* |
| 464 | * Allocate a piglet area. |
| 465 | * |
| 466 | * This needs to be in DMA-safe memory. |
| 467 | * Piglets are aligned. |
| 468 | * |
| 469 | * sz and align in bytes. |
| 470 | * |
| 471 | * The call will sleep for the pagedaemon to attempt to free memory. |
| 472 | * The pagedaemon may decide its not possible to free enough memory, causing |
| 473 | * the allocation to fail. |
| 474 | */ |
| 475 | int |
| 476 | uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) |
| 477 | { |
| 478 | struct kmem_pa_mode kp_piglet = { |
| 479 | .kp_constraint = &dma_constraint, |
| 480 | .kp_align = align, |
| 481 | .kp_maxseg = 1 |
| 482 | }; |
| 483 | |
| 484 | /* Ensure align is a power of 2 */ |
| 485 | KASSERT((align & (align - 1)) == 0)(((align & (align - 1)) == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 485, "(align & (align - 1)) == 0" )); |
| 486 | |
| 487 | /* |
| 488 | * Fixup arguments: align must be at least PAGE_SIZE, |
| 489 | * sz will be converted to pagecount, since that is what |
| 490 | * pmemrange uses internally. |
| 491 | */ |
| 492 | if (align < PAGE_SIZE(1 << 12)) |
| 493 | kp_piglet.kp_align = PAGE_SIZE(1 << 12); |
| 494 | |
| 495 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); |
| 496 | |
| 497 | *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); |
| 498 | if (*va == 0) |
| 499 | return ENOMEM12; |
| 500 | |
| 501 | pmap_extract(pmap_kernel()(&kernel_pmap_store), *va, pa); |
| 502 | return 0; |
| 503 | } |
| 504 | |
| 505 | /* |
| 506 | * Free a piglet area. |
| 507 | */ |
| 508 | void |
| 509 | uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) |
| 510 | { |
| 511 | /* |
| 512 | * Fix parameters. |
| 513 | */ |
| 514 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); |
| 515 | |
| 516 | /* |
| 517 | * Free the physical and virtual memory. |
| 518 | */ |
| 519 | km_free((void *)va, sz, &kv_any, &kp_dma_contig); |
| 520 | } |
| 521 | |
| 522 | /* |
| 523 | * Physmem RLE compression support. |
| 524 | * |
| 525 | * Given a physical page address, return the number of pages starting at the |
| 526 | * address that are free. Clamps to the number of pages in |
| 527 | * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. |
| 528 | */ |
| 529 | int |
| 530 | uvm_page_rle(paddr_t addr) |
| 531 | { |
| 532 | struct vm_page *pg, *pg_end; |
| 533 | struct vm_physseg *vmp; |
| 534 | int pseg_idx, off_idx; |
| 535 | |
| 536 | pseg_idx = vm_physseg_find(atop(addr)((addr) >> 12), &off_idx); |
| 537 | if (pseg_idx == -1) |
| 538 | return 0; |
| 539 | |
| 540 | vmp = &vm_physmem[pseg_idx]; |
| 541 | pg = &vmp->pgs[off_idx]; |
| 542 | if (!(pg->pg_flags & PQ_FREE0x00010000)) |
| 543 | return 0; |
| 544 | |
| 545 | /* |
| 546 | * Search for the first non-free page after pg. |
| 547 | * Note that the page may not be the first page in a free pmemrange, |
| 548 | * therefore pg->fpgsz cannot be used. |
| 549 | */ |
| 550 | for (pg_end = pg; pg_end <= vmp->lastpg && |
| 551 | (pg_end->pg_flags & PQ_FREE0x00010000) == PQ_FREE0x00010000 && |
| 552 | (pg_end - pg) < HIBERNATE_CHUNK_SIZE0x400000/PAGE_SIZE(1 << 12); pg_end++) |
| 553 | ; |
| 554 | return pg_end - pg; |
| 555 | } |
| 556 | |
| 557 | /* |
| 558 | * Fills out the hibernate_info union pointed to by hib |
| 559 | * with information about this machine (swap signature block |
| 560 | * offsets, number of memory ranges, kernel in use, etc) |
| 561 | */ |
| 562 | int |
| 563 | get_hibernate_info(union hibernate_info *hib, int suspend) |
| 564 | { |
| 565 | struct disklabel dl; |
| 566 | char err_string[128], *dl_ret; |
| 567 | int part; |
| 568 | SHA2_CTX ctx; |
| 569 | void *fn; |
| 570 | |
| 571 | #ifndef NO_PROPOLICE |
| 572 | /* Save propolice guard */ |
| 573 | hib->guard = __guard_local; |
| 574 | #endif /* ! NO_PROPOLICE */ |
| 575 | |
| 576 | /* Determine I/O function to use */ |
| 577 | hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev); |
| 578 | if (hib->io_func == NULL((void *)0)) |
| 579 | return (1); |
| 580 | |
| 581 | /* Calculate hibernate device */ |
| 582 | hib->dev = swdevt[0].sw_dev; |
| 583 | |
| 584 | /* Read disklabel (used to calculate signature and image offsets) */ |
| 585 | dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); |
| 586 | |
| 587 | if (dl_ret) { |
| 588 | printf("Hibernate error reading disklabel: %s\n", dl_ret); |
| 589 | return (1); |
| 590 | } |
| 591 | |
| 592 | /* Make sure we have a swap partition. */ |
| 593 | part = DISKPART(hib->dev)(((unsigned)((hib->dev) & 0xff) | (((hib->dev) & 0xffff0000) >> 8)) % 16); |
| 594 | if (dl.d_npartitions <= part || |
| 595 | dl.d_partitions[part].p_fstype != FS_SWAP1 || |
| 596 | DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) == 0) |
| 597 | return (1); |
| 598 | |
| 599 | /* Magic number */ |
| 600 | hib->magic = HIBERNATE_MAGIC0x0B5D0B5D; |
| 601 | |
| 602 | /* Calculate signature block location */ |
| 603 | hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) - |
| 604 | sizeof(union hibernate_info)/DEV_BSIZE(1 << 9); |
| 605 | |
| 606 | SHA256Init(&ctx); |
| 607 | SHA256Update(&ctx, version, strlen(version)); |
| 608 | fn = printf; |
| 609 | SHA256Update(&ctx, &fn, sizeof(fn)); |
| 610 | fn = malloc; |
| 611 | SHA256Update(&ctx, &fn, sizeof(fn)); |
| 612 | fn = km_alloc; |
| 613 | SHA256Update(&ctx, &fn, sizeof(fn)); |
| 614 | fn = strlen; |
| 615 | SHA256Update(&ctx, &fn, sizeof(fn)); |
| 616 | SHA256Final((u_int8_t *)&hib->kern_hash, &ctx); |
| 617 | |
| 618 | if (suspend) { |
| 619 | /* Grab the previously-allocated piglet addresses */ |
| 620 | hib->piglet_va = global_piglet_va; |
| 621 | hib->piglet_pa = global_piglet_pa; |
| 622 | hib->io_page = (void *)hib->piglet_va; |
| 623 | |
| 624 | /* |
| 625 | * Initialization of the hibernate IO function for drivers |
| 626 | * that need to do prep work (such as allocating memory or |
| 627 | * setting up data structures that cannot safely be done |
| 628 | * during suspend without causing side effects). There is |
| 629 | * a matching HIB_DONE call performed after the write is |
| 630 | * completed. |
| 631 | */ |
| 632 | if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_offseth << 32) + (&dl.d_partitions[part])->p_offset), |
| 633 | (vaddr_t)NULL((void *)0), DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size), |
| 634 | HIB_INIT-1, hib->io_page)) |
| 635 | goto fail; |
| 636 | |
| 637 | } else { |
| 638 | /* |
| 639 | * Resuming kernels use a regular private page for the driver |
| 640 | * No need to free this I/O page as it will vanish as part of |
| 641 | * the resume. |
| 642 | */ |
| 643 | hib->io_page = malloc(PAGE_SIZE(1 << 12), M_DEVBUF2, M_NOWAIT0x0002); |
| 644 | if (!hib->io_page) |
| 645 | goto fail; |
| 646 | } |
| 647 | |
| 648 | if (get_hibernate_info_md(hib)) |
| 649 | goto fail; |
| 650 | |
| 651 | return (0); |
| 652 | |
| 653 | fail: |
| 654 | return (1); |
| 655 | } |
| 656 | |
| 657 | /* |
| 658 | * Allocate nitems*size bytes from the hiballoc area presently in use |
| 659 | */ |
| 660 | void * |
| 661 | hibernate_zlib_alloc(void *unused, int nitems, int size) |
| 662 | { |
| 663 | struct hibernate_zlib_state *hibernate_state; |
| 664 | |
| 665 | hibernate_state = |
| 666 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 667 | |
| 668 | return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); |
| 669 | } |
| 670 | |
| 671 | /* |
| 672 | * Free the memory pointed to by addr in the hiballoc area presently in |
| 673 | * use |
| 674 | */ |
| 675 | void |
| 676 | hibernate_zlib_free(void *unused, void *addr) |
| 677 | { |
| 678 | struct hibernate_zlib_state *hibernate_state; |
| 679 | |
| 680 | hibernate_state = |
| 681 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 682 | |
| 683 | hib_free(&hibernate_state->hiballoc_arena, addr); |
| 684 | } |
| 685 | |
| 686 | /* |
| 687 | * Inflate next page of data from the image stream. |
| 688 | * The rle parameter is modified on exit to contain the number of pages to |
| 689 | * skip in the output stream (or 0 if this page was inflated into). |
| 690 | * |
| 691 | * Returns 0 if the stream contains additional data, or 1 if the stream is |
| 692 | * finished. |
| 693 | */ |
| 694 | int |
| 695 | hibernate_inflate_page(int *rle) |
| 696 | { |
| 697 | struct hibernate_zlib_state *hibernate_state; |
| 698 | int i; |
| 699 | |
| 700 | hibernate_state = |
| 701 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 702 | |
| 703 | /* Set up the stream for RLE code inflate */ |
| 704 | hibernate_state->hib_stream.next_out = (unsigned char *)rle; |
| 705 | hibernate_state->hib_stream.avail_out = sizeof(*rle); |
| 706 | |
| 707 | /* Inflate RLE code */ |
| 708 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); |
| 709 | if (i != Z_OK0 && i != Z_STREAM_END1) { |
| 710 | /* |
| 711 | * XXX - this will likely reboot/hang most machines |
| 712 | * since the console output buffer will be unmapped, |
| 713 | * but there's not much else we can do here. |
| 714 | */ |
| 715 | panic("rle inflate stream error"); |
| 716 | } |
| 717 | |
| 718 | if (hibernate_state->hib_stream.avail_out != 0) { |
| 719 | /* |
| 720 | * XXX - this will likely reboot/hang most machines |
| 721 | * since the console output buffer will be unmapped, |
| 722 | * but there's not much else we can do here. |
| 723 | */ |
| 724 | panic("rle short inflate error"); |
| 725 | } |
| 726 | |
| 727 | if (*rle < 0 || *rle > 1024) { |
| 728 | /* |
| 729 | * XXX - this will likely reboot/hang most machines |
| 730 | * since the console output buffer will be unmapped, |
| 731 | * but there's not much else we can do here. |
| 732 | */ |
| 733 | panic("invalid rle count"); |
| 734 | } |
| 735 | |
| 736 | if (i == Z_STREAM_END1) |
| 737 | return (1); |
| 738 | |
| 739 | if (*rle != 0) |
| 740 | return (0); |
| 741 | |
| 742 | /* Set up the stream for page inflate */ |
| 743 | hibernate_state->hib_stream.next_out = |
| 744 | (unsigned char *)HIBERNATE_INFLATE_PAGE((1 << 12) * 33); |
| 745 | hibernate_state->hib_stream.avail_out = PAGE_SIZE(1 << 12); |
| 746 | |
| 747 | /* Process next block of data */ |
| 748 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); |
| 749 | if (i != Z_OK0 && i != Z_STREAM_END1) { |
| 750 | /* |
| 751 | * XXX - this will likely reboot/hang most machines |
| 752 | * since the console output buffer will be unmapped, |
| 753 | * but there's not much else we can do here. |
| 754 | */ |
| 755 | panic("inflate error"); |
| 756 | } |
| 757 | |
| 758 | /* We should always have extracted a full page ... */ |
| 759 | if (hibernate_state->hib_stream.avail_out != 0) { |
| 760 | /* |
| 761 | * XXX - this will likely reboot/hang most machines |
| 762 | * since the console output buffer will be unmapped, |
| 763 | * but there's not much else we can do here. |
| 764 | */ |
| 765 | panic("incomplete page"); |
| 766 | } |
| 767 | |
| 768 | return (i == Z_STREAM_END1); |
| 769 | } |
| 770 | |
| 771 | /* |
| 772 | * Inflate size bytes from src into dest, skipping any pages in |
| 773 | * [src..dest] that are special (see hibernate_inflate_skip) |
| 774 | * |
| 775 | * This function executes while using the resume-time stack |
| 776 | * and pmap, and therefore cannot use ddb/printf/etc. Doing so |
| 777 | * will likely hang or reset the machine since the console output buffer |
| 778 | * will be unmapped. |
| 779 | */ |
| 780 | void |
| 781 | hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, |
| 782 | paddr_t src, size_t size) |
| 783 | { |
| 784 | int end_stream = 0, rle, skip; |
| 785 | struct hibernate_zlib_state *hibernate_state; |
| 786 | |
| 787 | hibernate_state = |
| 788 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 789 | |
| 790 | hibernate_state->hib_stream.next_in = (unsigned char *)src; |
| 791 | hibernate_state->hib_stream.avail_in = size; |
| 792 | |
| 793 | do { |
| 794 | /* |
| 795 | * Is this a special page? If yes, redirect the |
| 796 | * inflate output to a scratch page (eg, discard it) |
| 797 | */ |
| 798 | skip = hibernate_inflate_skip(hib, dest); |
| 799 | if (skip == HIB_SKIP1) { |
| 800 | hibernate_enter_resume_mapping( |
| 801 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), |
| 802 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), 0); |
| 803 | } else if (skip == HIB_MOVE2) { |
| 804 | /* |
| 805 | * Special case : retguard region. This gets moved |
| 806 | * temporarily into the piglet region and copied into |
| 807 | * place immediately before resume |
| 808 | */ |
| 809 | hibernate_enter_resume_mapping( |
| 810 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), |
| 811 | hib->piglet_pa + (110 * PAGE_SIZE(1 << 12)) + |
| 812 | hib->retguard_ofs, 0); |
| 813 | hib->retguard_ofs += PAGE_SIZE(1 << 12); |
| 814 | if (hib->retguard_ofs > 255 * PAGE_SIZE(1 << 12)) { |
| 815 | /* |
| 816 | * XXX - this will likely reboot/hang most |
| 817 | * machines since the console output |
| 818 | * buffer will be unmapped, but there's |
| 819 | * not much else we can do here. |
| 820 | */ |
| 821 | panic("retguard move error, out of space"); |
| 822 | } |
| 823 | } else { |
| 824 | hibernate_enter_resume_mapping( |
| 825 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), dest, 0); |
| 826 | } |
| 827 | |
| 828 | hibernate_flush(); |
| 829 | end_stream = hibernate_inflate_page(&rle); |
| 830 | |
| 831 | if (rle == 0) |
| 832 | dest += PAGE_SIZE(1 << 12); |
| 833 | else |
| 834 | dest += (rle * PAGE_SIZE(1 << 12)); |
| 835 | } while (!end_stream); |
| 836 | } |
| 837 | |
| 838 | /* |
| 839 | * deflate from src into the I/O page, up to 'remaining' bytes |
| 840 | * |
| 841 | * Returns number of input bytes consumed, and may reset |
| 842 | * the 'remaining' parameter if not all the output space was consumed |
| 843 | * (this information is needed to know how much to write to disk) |
| 844 | */ |
| 845 | size_t |
| 846 | hibernate_deflate(union hibernate_info *hib, paddr_t src, |
| 847 | size_t *remaining) |
| 848 | { |
| 849 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); |
| 850 | struct hibernate_zlib_state *hibernate_state; |
| 851 | |
| 852 | hibernate_state = |
| 853 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 854 | |
| 855 | /* Set up the stream for deflate */ |
| 856 | hibernate_state->hib_stream.next_in = (unsigned char *)src; |
| 857 | hibernate_state->hib_stream.avail_in = PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1)); |
| 858 | hibernate_state->hib_stream.next_out = |
| 859 | (unsigned char *)hibernate_io_page + (PAGE_SIZE(1 << 12) - *remaining); |
| 860 | hibernate_state->hib_stream.avail_out = *remaining; |
| 861 | |
| 862 | /* Process next block of data */ |
| 863 | if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2) != Z_OK0) |
| 864 | panic("hibernate zlib deflate error"); |
| 865 | |
| 866 | /* Update pointers and return number of bytes consumed */ |
| 867 | *remaining = hibernate_state->hib_stream.avail_out; |
| 868 | return (PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1))) - |
| 869 | hibernate_state->hib_stream.avail_in; |
| 870 | } |
| 871 | |
| 872 | /* |
| 873 | * Write the hibernation information specified in hiber_info |
| 874 | * to the location in swap previously calculated (last block of |
| 875 | * swap), called the "signature block". |
| 876 | */ |
| 877 | int |
| 878 | hibernate_write_signature(union hibernate_info *hib) |
| 879 | { |
| 880 | /* Write hibernate info to disk */ |
| 881 | return (hib->io_func(hib->dev, hib->sig_offset, |
| 882 | (vaddr_t)hib, DEV_BSIZE(1 << 9), HIB_W1, |
| 883 | hib->io_page)); |
| 884 | } |
| 885 | |
| 886 | /* |
| 887 | * Write the memory chunk table to the area in swap immediately |
| 888 | * preceding the signature block. The chunk table is stored |
| 889 | * in the piglet when this function is called. Returns errno. |
| 890 | */ |
| 891 | int |
| 892 | hibernate_write_chunktable(union hibernate_info *hib) |
| 893 | { |
| 894 | vaddr_t hibernate_chunk_table_start; |
| 895 | size_t hibernate_chunk_table_size; |
| 896 | int i, err; |
| 897 | |
| 898 | hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000; |
| 899 | |
| 900 | hibernate_chunk_table_start = hib->piglet_va + |
| 901 | HIBERNATE_CHUNK_SIZE0x400000; |
| 902 | |
| 903 | /* Write chunk table */ |
| 904 | for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS(64 * 1024)) { |
| 905 | if ((err = hib->io_func(hib->dev, |
| 906 | hib->chunktable_offset + (i/DEV_BSIZE(1 << 9)), |
| 907 | (vaddr_t)(hibernate_chunk_table_start + i), |
| 908 | MAXPHYS(64 * 1024), HIB_W1, hib->io_page))) { |
| 909 | DPRINTF("chunktable write error: %d\n", err); |
| 910 | return (err); |
| 911 | } |
| 912 | } |
| 913 | |
| 914 | return (0); |
| 915 | } |
| 916 | |
| 917 | /* |
| 918 | * Write an empty hiber_info to the swap signature block, which is |
| 919 | * guaranteed to not match any valid hib. |
| 920 | */ |
| 921 | int |
| 922 | hibernate_clear_signature(union hibernate_info *hib) |
| 923 | { |
| 924 | union hibernate_info blank_hiber_info; |
| 925 | |
| 926 | /* Zero out a blank hiber_info */ |
| 927 | memset(&blank_hiber_info, 0, sizeof(union hibernate_info))__builtin_memset((&blank_hiber_info), (0), (sizeof(union hibernate_info ))); |
| 928 | |
| 929 | /* Write (zeroed) hibernate info to disk */ |
| 930 | DPRINTF("clearing hibernate signature block location: %lld\n", |
| 931 | hib->sig_offset); |
| 932 | if (hibernate_block_io(hib, |
| 933 | hib->sig_offset, |
| 934 | DEV_BSIZE(1 << 9), (vaddr_t)&blank_hiber_info, 1)) |
| 935 | printf("Warning: could not clear hibernate signature\n"); |
| 936 | |
| 937 | return (0); |
| 938 | } |
| 939 | |
| 940 | /* |
| 941 | * Compare two hibernate_infos to determine if they are the same (eg, |
| 942 | * we should be performing a hibernate resume on this machine. |
| 943 | * Not all fields are checked - just enough to verify that the machine |
| 944 | * has the same memory configuration and kernel as the one that |
| 945 | * wrote the signature previously. |
| 946 | */ |
| 947 | int |
| 948 | hibernate_compare_signature(union hibernate_info *mine, |
| 949 | union hibernate_info *disk) |
| 950 | { |
| 951 | u_int i; |
| 952 | |
| 953 | if (mine->nranges != disk->nranges) { |
| 954 | printf("unhibernate failed: memory layout changed\n"); |
| 955 | return (1); |
| 956 | } |
| 957 | |
| 958 | if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH32) != 0) { |
| 959 | printf("unhibernate failed: original kernel changed\n"); |
| 960 | return (1); |
| 961 | } |
| 962 | |
| 963 | for (i = 0; i < mine->nranges; i++) { |
| 964 | if ((mine->ranges[i].base != disk->ranges[i].base) || |
| 965 | (mine->ranges[i].end != disk->ranges[i].end) ) { |
| 966 | DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", |
| 967 | i, |
| 968 | (void *)mine->ranges[i].base, |
| 969 | (void *)mine->ranges[i].end, |
| 970 | (void *)disk->ranges[i].base, |
| 971 | (void *)disk->ranges[i].end); |
| 972 | printf("unhibernate failed: memory size changed\n"); |
| 973 | return (1); |
| 974 | } |
| 975 | } |
| 976 | |
| 977 | return (0); |
| 978 | } |
| 979 | |
| 980 | /* |
| 981 | * Transfers xfer_size bytes between the hibernate device specified in |
| 982 | * hib_info at offset blkctr and the vaddr specified at dest. |
| 983 | * |
| 984 | * Separate offsets and pages are used to handle misaligned reads (reads |
| 985 | * that span a page boundary). |
| 986 | * |
| 987 | * blkctr specifies a relative offset (relative to the start of swap), |
| 988 | * not an absolute disk offset |
| 989 | * |
| 990 | */ |
| 991 | int |
| 992 | hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, |
| 993 | size_t xfer_size, vaddr_t dest, int iswrite) |
| 994 | { |
| 995 | struct buf *bp; |
| 996 | struct bdevsw *bdsw; |
| 997 | int error; |
| 998 | |
| 999 | bp = geteblk(xfer_size); |
| 1000 | bdsw = &bdevsw[major(hib->dev)(((unsigned)(hib->dev) >> 8) & 0xff)]; |
| 1001 | |
| 1002 | error = (*bdsw->d_open)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1003 | if (error) { |
| 1004 | printf("hibernate_block_io open failed\n"); |
| 1005 | return (1); |
| 1006 | } |
| 1007 | |
| 1008 | if (iswrite) |
| 1009 | bcopy((caddr_t)dest, bp->b_data, xfer_size); |
| 1010 | |
| 1011 | bp->b_bcount = xfer_size; |
| 1012 | bp->b_blkno = blkctr; |
| 1013 | CLR(bp->b_flags, B_READ | B_WRITE | B_DONE)((bp->b_flags) &= ~(0x00008000 | 0x00000000 | 0x00000100 )); |
| 1014 | SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW)((bp->b_flags) |= (0x00000010 | (iswrite ? 0x00000000 : 0x00008000 ) | 0x00004000)); |
| 1015 | bp->b_dev = hib->dev; |
| 1016 | (*bdsw->d_strategy)(bp); |
| 1017 | |
| 1018 | error = biowait(bp); |
| 1019 | if (error) { |
| 1020 | printf("hib block_io biowait error %d blk %lld size %zu\n", |
| 1021 | error, (long long)blkctr, xfer_size); |
| 1022 | error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR0020000, |
| 1023 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1024 | if (error) |
| 1025 | printf("hibernate_block_io error close failed\n"); |
| 1026 | return (1); |
| 1027 | } |
| 1028 | |
| 1029 | error = (*bdsw->d_close)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1030 | if (error) { |
| 1031 | printf("hibernate_block_io close failed\n"); |
| 1032 | return (1); |
| 1033 | } |
| 1034 | |
| 1035 | if (!iswrite) |
| 1036 | bcopy(bp->b_data, (caddr_t)dest, xfer_size); |
| 1037 | |
| 1038 | bp->b_flags |= B_INVAL0x00000800; |
| 1039 | brelse(bp); |
| 1040 | |
| 1041 | return (0); |
| 1042 | } |
| 1043 | |
| 1044 | /* |
| 1045 | * Preserve one page worth of random data, generated from the resuming |
| 1046 | * kernel's arc4random. After resume, this preserved entropy can be used |
| 1047 | * to further improve the un-hibernated machine's entropy pool. This |
| 1048 | * random data is stored in the piglet, which is preserved across the |
| 1049 | * unpack operation, and is restored later in the resume process (see |
| 1050 | * hib_getentropy) |
| 1051 | */ |
| 1052 | void |
| 1053 | hibernate_preserve_entropy(union hibernate_info *hib) |
| 1054 | { |
| 1055 | void *entropy; |
| 1056 | |
| 1057 | entropy = km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_nowait); |
| 1058 | |
| 1059 | if (!entropy) |
| 1060 | return; |
| 1061 | |
| 1062 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1063 | pmap_kenter_pa((vaddr_t)entropy, |
| 1064 | (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE(1 << 12))), |
| 1065 | PROT_READ0x01 | PROT_WRITE0x02); |
| 1066 | |
| 1067 | arc4random_buf((void *)entropy, PAGE_SIZE(1 << 12)); |
| 1068 | pmap_kremove((vaddr_t)entropy, PAGE_SIZE(1 << 12)); |
| 1069 | km_free(entropy, PAGE_SIZE(1 << 12), &kv_any, &kp_none); |
| 1070 | } |
| 1071 | |
| 1072 | #ifndef NO_PROPOLICE |
| 1073 | vaddr_t |
| 1074 | hibernate_unprotect_ssp(void) |
| 1075 | { |
| 1076 | struct kmem_dyn_mode kd_avoidalias; |
| 1077 | vaddr_t va = trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); |
| 1078 | paddr_t pa; |
| 1079 | |
| 1080 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); |
| 1081 | |
| 1082 | memset(&kd_avoidalias, 0, sizeof kd_avoidalias)__builtin_memset((&kd_avoidalias), (0), (sizeof kd_avoidalias )); |
| 1083 | kd_avoidalias.kd_prefer = pa; |
| 1084 | kd_avoidalias.kd_waitok = 1; |
| 1085 | va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_avoidalias); |
| 1086 | if (!va) |
| 1087 | panic("hibernate_unprotect_ssp"); |
| 1088 | |
| 1089 | pmap_kenter_pa(va, pa, PROT_READ0x01 | PROT_WRITE0x02); |
| 1090 | pmap_update(pmap_kernel()); |
| 1091 | |
| 1092 | return va; |
| 1093 | } |
| 1094 | |
| 1095 | void |
| 1096 | hibernate_reprotect_ssp(vaddr_t va) |
| 1097 | { |
| 1098 | pmap_kremove(va, PAGE_SIZE(1 << 12)); |
| 1099 | km_free((void *)va, PAGE_SIZE(1 << 12), &kv_any, &kp_none); |
| 1100 | } |
| 1101 | #endif /* NO_PROPOLICE */ |
| 1102 | |
| 1103 | /* |
| 1104 | * Reads the signature block from swap, checks against the current machine's |
| 1105 | * information. If the information matches, perform a resume by reading the |
| 1106 | * saved image into the pig area, and unpacking. |
| 1107 | * |
| 1108 | * Must be called with interrupts enabled. |
| 1109 | */ |
| 1110 | void |
| 1111 | hibernate_resume(void) |
| 1112 | { |
| 1113 | union hibernate_info hib; |
| 1114 | int s; |
| 1115 | #ifndef NO_PROPOLICE |
| 1116 | vsize_t off = (vaddr_t)&__guard_local - |
| 1117 | trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); |
| 1118 | vaddr_t guard_va; |
| 1119 | #endif |
| 1120 | |
| 1121 | /* Get current running machine's hibernate info */ |
| 1122 | memset(&hib, 0, sizeof(hib))__builtin_memset((&hib), (0), (sizeof(hib))); |
| 1123 | if (get_hibernate_info(&hib, 0)) { |
| 1124 | DPRINTF("couldn't retrieve machine's hibernate info\n"); |
| 1125 | return; |
| 1126 | } |
| 1127 | |
| 1128 | /* Read hibernate info from disk */ |
| 1129 | s = splbio()splraise(0x3); |
| 1130 | |
| 1131 | DPRINTF("reading hibernate signature block location: %lld\n", |
| 1132 | hib.sig_offset); |
| 1133 | |
| 1134 | if (hibernate_block_io(&hib, |
| 1135 | hib.sig_offset, |
| 1136 | DEV_BSIZE(1 << 9), (vaddr_t)&disk_hib, 0)) { |
| 1137 | DPRINTF("error in hibernate read"); |
| 1138 | splx(s)spllower(s); |
| 1139 | return; |
| 1140 | } |
| 1141 | |
| 1142 | /* Check magic number */ |
| 1143 | if (disk_hib.magic != HIBERNATE_MAGIC0x0B5D0B5D) { |
| 1144 | DPRINTF("wrong magic number in hibernate signature: %x\n", |
| 1145 | disk_hib.magic); |
| 1146 | splx(s)spllower(s); |
| 1147 | return; |
| 1148 | } |
| 1149 | |
| 1150 | /* |
| 1151 | * We (possibly) found a hibernate signature. Clear signature first, |
| 1152 | * to prevent accidental resume or endless resume cycles later. |
| 1153 | */ |
| 1154 | if (hibernate_clear_signature(&hib)) { |
| 1155 | DPRINTF("error clearing hibernate signature block\n"); |
| 1156 | splx(s)spllower(s); |
| 1157 | return; |
| 1158 | } |
| 1159 | |
| 1160 | /* |
| 1161 | * If on-disk and in-memory hibernate signatures match, |
| 1162 | * this means we should do a resume from hibernate. |
| 1163 | */ |
| 1164 | if (hibernate_compare_signature(&hib, &disk_hib)) { |
| 1165 | DPRINTF("mismatched hibernate signature block\n"); |
| 1166 | splx(s)spllower(s); |
| 1167 | return; |
| 1168 | } |
| 1169 | disk_hib.dev = hib.dev; |
| 1170 | |
| 1171 | #ifdef MULTIPROCESSOR1 |
| 1172 | /* XXX - if we fail later, we may need to rehatch APs on some archs */ |
| 1173 | DPRINTF("hibernate: quiescing APs\n"); |
| 1174 | hibernate_quiesce_cpus(); |
| 1175 | #endif /* MULTIPROCESSOR */ |
| 1176 | |
| 1177 | /* Read the image from disk into the image (pig) area */ |
| 1178 | if (hibernate_read_image(&disk_hib)) |
| 1179 | goto fail; |
| 1180 | |
| 1181 | DPRINTF("hibernate: quiescing devices\n"); |
| 1182 | if (config_suspend_all(DVACT_QUIESCE2) != 0) |
| 1183 | goto fail; |
| 1184 | |
| 1185 | #ifndef NO_PROPOLICE |
| 1186 | guard_va = hibernate_unprotect_ssp(); |
| 1187 | #endif /* NO_PROPOLICE */ |
| 1188 | |
| 1189 | (void) splhigh()splraise(0xd); |
| 1190 | hibernate_disable_intr_machdep(); |
| 1191 | cold = 2; |
| 1192 | |
| 1193 | DPRINTF("hibernate: suspending devices\n"); |
| 1194 | if (config_suspend_all(DVACT_SUSPEND3) != 0) { |
| 1195 | cold = 0; |
| 1196 | hibernate_enable_intr_machdep(); |
| 1197 | #ifndef NO_PROPOLICE |
| 1198 | hibernate_reprotect_ssp(guard_va); |
| 1199 | #endif /* ! NO_PROPOLICE */ |
| 1200 | goto fail; |
| 1201 | } |
| 1202 | |
| 1203 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, |
| 1204 | &retguard_start_phys); |
| 1205 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, |
| 1206 | &retguard_end_phys); |
| 1207 | |
| 1208 | hibernate_preserve_entropy(&disk_hib); |
| 1209 | |
| 1210 | printf("Unpacking image...\n"); |
| 1211 | |
| 1212 | /* Switch stacks */ |
| 1213 | DPRINTF("hibernate: switching stacks\n"); |
| 1214 | hibernate_switch_stack_machdep(); |
| 1215 | |
| 1216 | #ifndef NO_PROPOLICE |
| 1217 | /* Start using suspended kernel's propolice guard */ |
| 1218 | *(long *)(guard_va + off) = disk_hib.guard; |
| 1219 | hibernate_reprotect_ssp(guard_va); |
| 1220 | #endif /* ! NO_PROPOLICE */ |
| 1221 | |
| 1222 | /* Unpack and resume */ |
| 1223 | hibernate_unpack_image(&disk_hib); |
| 1224 | |
| 1225 | fail: |
| 1226 | splx(s)spllower(s); |
| 1227 | printf("\nUnable to resume hibernated image\n"); |
| 1228 | } |
| 1229 | |
| 1230 | /* |
| 1231 | * Unpack image from pig area to original location by looping through the |
| 1232 | * list of output chunks in the order they should be restored (fchunks). |
| 1233 | * |
| 1234 | * Note that due to the stack smash protector and the fact that we have |
| 1235 | * switched stacks, it is not permitted to return from this function. |
| 1236 | */ |
| 1237 | void |
| 1238 | hibernate_unpack_image(union hibernate_info *hib) |
| 1239 | { |
| 1240 | struct hibernate_disk_chunk *chunks; |
| 1241 | union hibernate_info local_hib; |
| 1242 | paddr_t image_cur = global_pig_start; |
| 1243 | short i, *fchunks; |
| 1244 | char *pva; |
| 1245 | |
| 1246 | /* Piglet will be identity mapped (VA == PA) */ |
| 1247 | pva = (char *)hib->piglet_pa; |
| 1248 | |
| 1249 | fchunks = (short *)(pva + (4 * PAGE_SIZE(1 << 12))); |
| 1250 | |
| 1251 | chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE0x400000); |
| 1252 | |
| 1253 | /* Can't use hiber_info that's passed in after this point */ |
| 1254 | bcopy(hib, &local_hib, sizeof(union hibernate_info)); |
| 1255 | local_hib.retguard_ofs = 0; |
| 1256 | |
| 1257 | /* VA == PA */ |
| 1258 | local_hib.piglet_va = local_hib.piglet_pa; |
| 1259 | |
| 1260 | /* |
| 1261 | * Point of no return. Once we pass this point, only kernel code can |
| 1262 | * be accessed. No global variables or other kernel data structures |
| 1263 | * are guaranteed to be coherent after unpack starts. |
| 1264 | * |
| 1265 | * The image is now in high memory (pig area), we unpack from the pig |
| 1266 | * to the correct location in memory. We'll eventually end up copying |
| 1267 | * on top of ourself, but we are assured the kernel code here is the |
| 1268 | * same between the hibernated and resuming kernel, and we are running |
| 1269 | * on our own stack, so the overwrite is ok. |
| 1270 | */ |
| 1271 | DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); |
| 1272 | hibernate_activate_resume_pt_machdep(); |
| 1273 | |
| 1274 | for (i = 0; i < local_hib.chunk_ctr; i++) { |
| 1275 | /* Reset zlib for inflate */ |
| 1276 | if (hibernate_zlib_reset(&local_hib, 0) != Z_OK0) |
| 1277 | panic("hibernate failed to reset zlib for inflate"); |
| 1278 | |
| 1279 | hibernate_process_chunk(&local_hib, &chunks[fchunks[i]], |
| 1280 | image_cur); |
| 1281 | |
| 1282 | image_cur += chunks[fchunks[i]].compressed_size; |
| 1283 | } |
| 1284 | |
| 1285 | /* |
| 1286 | * Resume the loaded kernel by jumping to the MD resume vector. |
| 1287 | * We won't be returning from this call. We pass the location of |
| 1288 | * the retguard save area so the MD code can replace it before |
| 1289 | * resuming. See the piglet layout at the top of this file for |
| 1290 | * more information on the layout of the piglet area. |
| 1291 | * |
| 1292 | * We use 'global_piglet_va' here since by the time we are at |
| 1293 | * this point, we have already unpacked the image, and we want |
| 1294 | * the suspended kernel's view of what the piglet was, before |
| 1295 | * suspend occurred (since we will need to use that in the retguard |
| 1296 | * copy code in hibernate_resume_machdep.) |
| 1297 | */ |
| 1298 | hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE(1 << 12))); |
| 1299 | } |
| 1300 | |
| 1301 | /* |
| 1302 | * Bounce a compressed image chunk to the piglet, entering mappings for the |
| 1303 | * copied pages as needed |
| 1304 | */ |
| 1305 | void |
| 1306 | hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) |
| 1307 | { |
| 1308 | size_t ct, ofs; |
| 1309 | paddr_t src = img_cur; |
| 1310 | vaddr_t dest = piglet; |
| 1311 | |
| 1312 | /* Copy first partial page */ |
| 1313 | ct = (PAGE_SIZE(1 << 12)) - (src & PAGE_MASK((1 << 12) - 1)); |
| 1314 | ofs = (src & PAGE_MASK((1 << 12) - 1)); |
| 1315 | |
| 1316 | if (ct < PAGE_SIZE(1 << 12)) { |
| 1317 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), |
| 1318 | (src - ofs), 0); |
| 1319 | hibernate_flush(); |
| 1320 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33) + ofs), (caddr_t)dest, ct); |
| 1321 | src += ct; |
| 1322 | dest += ct; |
| 1323 | } |
| 1324 | |
| 1325 | /* Copy remaining pages */ |
| 1326 | while (src < size + img_cur) { |
| 1327 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), src, 0); |
| 1328 | hibernate_flush(); |
| 1329 | ct = PAGE_SIZE(1 << 12); |
| 1330 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33)), (caddr_t)dest, ct); |
| 1331 | hibernate_flush(); |
| 1332 | src += ct; |
| 1333 | dest += ct; |
| 1334 | } |
| 1335 | } |
| 1336 | |
| 1337 | /* |
| 1338 | * Process a chunk by bouncing it to the piglet, followed by unpacking |
| 1339 | */ |
| 1340 | void |
| 1341 | hibernate_process_chunk(union hibernate_info *hib, |
| 1342 | struct hibernate_disk_chunk *chunk, paddr_t img_cur) |
| 1343 | { |
| 1344 | char *pva = (char *)hib->piglet_va; |
| 1345 | |
| 1346 | hibernate_copy_chunk_to_piglet(img_cur, |
| 1347 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), chunk->compressed_size); |
| 1348 | hibernate_inflate_region(hib, chunk->base, |
| 1349 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), |
| 1350 | chunk->compressed_size); |
| 1351 | } |
| 1352 | |
| 1353 | /* |
| 1354 | * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between |
| 1355 | * inaddr and range_end. |
| 1356 | */ |
| 1357 | int |
| 1358 | hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) |
| 1359 | { |
| 1360 | int rle; |
| 1361 | |
| 1362 | rle = uvm_page_rle(inaddr); |
| 1363 | KASSERT(rle >= 0 && rle <= MAX_RLE)((rle >= 0 && rle <= (0x400000 / (1 << 12 ))) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1363, "rle >= 0 && rle <= MAX_RLE")); |
| 1364 | |
| 1365 | /* Clamp RLE to range end */ |
| 1366 | if (rle > 0 && inaddr + (rle * PAGE_SIZE(1 << 12)) > range_end) |
| 1367 | rle = (range_end - inaddr) / PAGE_SIZE(1 << 12); |
| 1368 | |
| 1369 | return (rle); |
| 1370 | } |
| 1371 | |
| 1372 | /* |
| 1373 | * Write the RLE byte for page at 'inaddr' to the output stream. |
| 1374 | * Returns the number of pages to be skipped at 'inaddr'. |
| 1375 | */ |
| 1376 | int |
| 1377 | hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, |
| 1378 | paddr_t range_end, daddr_t *blkctr, |
| 1379 | size_t *out_remaining) |
| 1380 | { |
| 1381 | int rle, err, *rleloc; |
| 1382 | struct hibernate_zlib_state *hibernate_state; |
| 1383 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); |
| 1384 | |
| 1385 | hibernate_state = |
| 1386 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 1387 | |
| 1388 | rle = hibernate_calc_rle(inaddr, range_end); |
| 1389 | |
| 1390 | rleloc = (int *)hibernate_rle_page + MAX_RLE(0x400000 / (1 << 12)) - 1; |
| 1391 | *rleloc = rle; |
| 1392 | |
| 1393 | /* Deflate the RLE byte into the stream */ |
| 1394 | hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); |
| 1395 | |
| 1396 | /* Did we fill the output page? If so, flush to disk */ |
| 1397 | if (*out_remaining == 0) { |
| 1398 | if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset, |
| 1399 | (vaddr_t)hibernate_io_page, PAGE_SIZE(1 << 12), HIB_W1, |
| 1400 | hib->io_page))) { |
| 1401 | DPRINTF("hib write error %d\n", err); |
| 1402 | return (err); |
| 1403 | } |
| 1404 | |
| 1405 | *blkctr += PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); |
| 1406 | *out_remaining = PAGE_SIZE(1 << 12); |
| 1407 | |
| 1408 | /* If we didn't deflate the entire RLE byte, finish it now */ |
| 1409 | if (hibernate_state->hib_stream.avail_in != 0) |
| 1410 | hibernate_deflate(hib, |
| 1411 | (vaddr_t)hibernate_state->hib_stream.next_in, |
| 1412 | out_remaining); |
| 1413 | } |
| 1414 | |
| 1415 | return (rle); |
| 1416 | } |
| 1417 | |
| 1418 | /* |
| 1419 | * Write a compressed version of this machine's memory to disk, at the |
| 1420 | * precalculated swap offset: |
| 1421 | * |
| 1422 | * end of swap - signature block size - chunk table size - memory size |
| 1423 | * |
| 1424 | * The function begins by looping through each phys mem range, cutting each |
| 1425 | * one into MD sized chunks. These chunks are then compressed individually |
| 1426 | * and written out to disk, in phys mem order. Some chunks might compress |
| 1427 | * more than others, and for this reason, each chunk's size is recorded |
| 1428 | * in the chunk table, which is written to disk after the image has |
| 1429 | * properly been compressed and written (in hibernate_write_chunktable). |
| 1430 | * |
| 1431 | * When this function is called, the machine is nearly suspended - most |
| 1432 | * devices are quiesced/suspended, interrupts are off, and cold has |
| 1433 | * been set. This means that there can be no side effects once the |
| 1434 | * write has started, and the write function itself can also have no |
| 1435 | * side effects. This also means no printfs are permitted (since printf |
| 1436 | * has side effects.) |
| 1437 | * |
| 1438 | * Return values : |
| 1439 | * |
| 1440 | * 0 - success |
| 1441 | * EIO - I/O error occurred writing the chunks |
| 1442 | * EINVAL - Failed to write a complete range |
| 1443 | * ENOMEM - Memory allocation failure during preparation of the zlib arena |
| 1444 | */ |
| 1445 | int |
| 1446 | hibernate_write_chunks(union hibernate_info *hib) |
| 1447 | { |
| 1448 | paddr_t range_base, range_end, inaddr, temp_inaddr; |
| 1449 | size_t nblocks, out_remaining, used; |
| 1450 | struct hibernate_disk_chunk *chunks; |
| 1451 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); |
| 1452 | daddr_t blkctr = 0; |
| 1453 | int i, rle, err; |
| 1454 | struct hibernate_zlib_state *hibernate_state; |
| 1455 | |
| 1456 | hibernate_state = |
| 1457 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 1458 | |
| 1459 | hib->chunk_ctr = 0; |
| 1460 | |
| 1461 | /* |
| 1462 | * Map the utility VAs to the piglet. See the piglet map at the |
| 1463 | * top of this file for piglet layout information. |
| 1464 | */ |
| 1465 | hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE(1 << 12); |
| 1466 | hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE(1 << 12); |
| 1467 | |
| 1468 | chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + |
| 1469 | HIBERNATE_CHUNK_SIZE0x400000); |
| 1470 | |
| 1471 | /* Calculate the chunk regions */ |
| 1472 | for (i = 0; i < hib->nranges; i++) { |
| 1473 | range_base = hib->ranges[i].base; |
| 1474 | range_end = hib->ranges[i].end; |
| 1475 | |
| 1476 | inaddr = range_base; |
| 1477 | |
| 1478 | while (inaddr < range_end) { |
| 1479 | chunks[hib->chunk_ctr].base = inaddr; |
| 1480 | if (inaddr + HIBERNATE_CHUNK_SIZE0x400000 < range_end) |
| 1481 | chunks[hib->chunk_ctr].end = inaddr + |
| 1482 | HIBERNATE_CHUNK_SIZE0x400000; |
| 1483 | else |
| 1484 | chunks[hib->chunk_ctr].end = range_end; |
| 1485 | |
| 1486 | inaddr += HIBERNATE_CHUNK_SIZE0x400000; |
| 1487 | hib->chunk_ctr ++; |
| 1488 | } |
| 1489 | } |
| 1490 | |
| 1491 | uvm_pmr_dirty_everything(); |
| 1492 | uvm_pmr_zero_everything(); |
| 1493 | |
| 1494 | /* Compress and write the chunks in the chunktable */ |
| 1495 | for (i = 0; i < hib->chunk_ctr; i++) { |
| 1496 | range_base = chunks[i].base; |
| 1497 | range_end = chunks[i].end; |
| 1498 | |
| 1499 | chunks[i].offset = blkctr + hib->image_offset; |
| 1500 | |
| 1501 | /* Reset zlib for deflate */ |
| 1502 | if (hibernate_zlib_reset(hib, 1) != Z_OK0) { |
| 1503 | DPRINTF("hibernate_zlib_reset failed for deflate\n"); |
| 1504 | return (ENOMEM12); |
| 1505 | } |
| 1506 | |
| 1507 | inaddr = range_base; |
| 1508 | |
| 1509 | /* |
| 1510 | * For each range, loop through its phys mem region |
| 1511 | * and write out the chunks (the last chunk might be |
| 1512 | * smaller than the chunk size). |
| 1513 | */ |
| 1514 | while (inaddr < range_end) { |
| 1515 | out_remaining = PAGE_SIZE(1 << 12); |
| 1516 | while (out_remaining > 0 && inaddr < range_end) { |
| 1517 | /* |
| 1518 | * Adjust for regions that are not evenly |
| 1519 | * divisible by PAGE_SIZE or overflowed |
| 1520 | * pages from the previous iteration. |
| 1521 | */ |
| 1522 | temp_inaddr = (inaddr & PAGE_MASK((1 << 12) - 1)) + |
| 1523 | hibernate_copy_page; |
| 1524 | |
| 1525 | /* Deflate from temp_inaddr to IO page */ |
| 1526 | if (inaddr != range_end) { |
| 1527 | if (inaddr % PAGE_SIZE(1 << 12) == 0) { |
| 1528 | rle = hibernate_write_rle(hib, |
| 1529 | inaddr, |
| 1530 | range_end, |
| 1531 | &blkctr, |
| 1532 | &out_remaining); |
| 1533 | } |
| 1534 | |
| 1535 | if (rle == 0) { |
| 1536 | pmap_kenter_pa(hibernate_temp_page, |
| 1537 | inaddr & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1)), |
| 1538 | PROT_READ0x01); |
| 1539 | |
| 1540 | bcopy((caddr_t)hibernate_temp_page, |
| 1541 | (caddr_t)hibernate_copy_page, |
| 1542 | PAGE_SIZE(1 << 12)); |
| 1543 | inaddr += hibernate_deflate(hib, |
| 1544 | temp_inaddr, |
| 1545 | &out_remaining); |
| 1546 | } else { |
| 1547 | inaddr += rle * PAGE_SIZE(1 << 12); |
| 1548 | if (inaddr > range_end) |
| 1549 | inaddr = range_end; |
| 1550 | } |
| 1551 | |
| 1552 | } |
| 1553 | |
| 1554 | if (out_remaining == 0) { |
| 1555 | /* Filled up the page */ |
| 1556 | nblocks = PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); |
| 1557 | |
| 1558 | if ((err = hib->io_func(hib->dev, |
| 1559 | blkctr + hib->image_offset, |
| 1560 | (vaddr_t)hibernate_io_page, |
| 1561 | PAGE_SIZE(1 << 12), HIB_W1, hib->io_page))) { |
| 1562 | DPRINTF("hib write error %d\n", |
| 1563 | err); |
| 1564 | return (err); |
| 1565 | } |
| 1566 | |
| 1567 | blkctr += nblocks; |
| 1568 | } |
| 1569 | } |
| 1570 | } |
| 1571 | |
| 1572 | if (inaddr != range_end) { |
| 1573 | DPRINTF("deflate range ended prematurely\n"); |
| 1574 | return (EINVAL22); |
| 1575 | } |
| 1576 | |
| 1577 | /* |
| 1578 | * End of range. Round up to next secsize bytes |
| 1579 | * after finishing compress |
| 1580 | */ |
| 1581 | if (out_remaining == 0) |
| 1582 | out_remaining = PAGE_SIZE(1 << 12); |
| 1583 | |
| 1584 | /* Finish compress */ |
| 1585 | hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; |
| 1586 | hibernate_state->hib_stream.avail_in = 0; |
| 1587 | hibernate_state->hib_stream.next_out = |
| 1588 | (unsigned char *)hibernate_io_page + |
| 1589 | (PAGE_SIZE(1 << 12) - out_remaining); |
| 1590 | |
| 1591 | /* We have an extra output page available for finalize */ |
| 1592 | hibernate_state->hib_stream.avail_out = |
| 1593 | out_remaining + PAGE_SIZE(1 << 12); |
| 1594 | |
| 1595 | if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH4)) != |
| 1596 | Z_STREAM_END1) { |
| 1597 | DPRINTF("deflate error in output stream: %d\n", err); |
| 1598 | return (err); |
| 1599 | } |
| 1600 | |
| 1601 | out_remaining = hibernate_state->hib_stream.avail_out; |
| 1602 | |
| 1603 | used = 2 * PAGE_SIZE(1 << 12) - out_remaining; |
| 1604 | nblocks = used / DEV_BSIZE(1 << 9); |
| 1605 | |
| 1606 | /* Round up to next block if needed */ |
| 1607 | if (used % DEV_BSIZE(1 << 9) != 0) |
| 1608 | nblocks ++; |
| 1609 | |
| 1610 | /* Write final block(s) for this chunk */ |
| 1611 | if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, |
| 1612 | (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE(1 << 9), |
| 1613 | HIB_W1, hib->io_page))) { |
| 1614 | DPRINTF("hib final write error %d\n", err); |
| 1615 | return (err); |
| 1616 | } |
| 1617 | |
| 1618 | blkctr += nblocks; |
| 1619 | |
| 1620 | chunks[i].compressed_size = (blkctr + hib->image_offset - |
| 1621 | chunks[i].offset) * DEV_BSIZE(1 << 9); |
| 1622 | } |
| 1623 | |
| 1624 | hib->chunktable_offset = hib->image_offset + blkctr; |
| 1625 | return (0); |
| 1626 | } |
| 1627 | |
| 1628 | /* |
| 1629 | * Reset the zlib stream state and allocate a new hiballoc area for either |
| 1630 | * inflate or deflate. This function is called once for each hibernate chunk. |
| 1631 | * Calling hiballoc_init multiple times is acceptable since the memory it is |
| 1632 | * provided is unmanaged memory (stolen). We use the memory provided to us |
| 1633 | * by the piglet allocated via the supplied hib. |
| 1634 | */ |
| 1635 | int |
| 1636 | hibernate_zlib_reset(union hibernate_info *hib, int deflate) |
| 1637 | { |
| 1638 | vaddr_t hibernate_zlib_start; |
| 1639 | size_t hibernate_zlib_size; |
| 1640 | char *pva = (char *)hib->piglet_va; |
| 1641 | struct hibernate_zlib_state *hibernate_state; |
| 1642 | |
| 1643 | hibernate_state = |
| 1644 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); |
| 1645 | |
| 1646 | if (!deflate) |
| 1647 | pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK(((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL )))); |
| 1648 | |
| 1649 | /* |
| 1650 | * See piglet layout information at the start of this file for |
| 1651 | * information on the zlib page assignments. |
| 1652 | */ |
| 1653 | hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE(1 << 12))); |
| 1654 | hibernate_zlib_size = 80 * PAGE_SIZE(1 << 12); |
| 1655 | |
| 1656 | memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size)__builtin_memset(((void *)hibernate_zlib_start), (0), (hibernate_zlib_size )); |
| 1657 | memset(hibernate_state, 0, PAGE_SIZE)__builtin_memset((hibernate_state), (0), ((1 << 12))); |
| 1658 | |
| 1659 | /* Set up stream structure */ |
| 1660 | hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; |
| 1661 | hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; |
| 1662 | |
| 1663 | /* Initialize the hiballoc arena for zlib allocs/frees */ |
| 1664 | hiballoc_init(&hibernate_state->hiballoc_arena, |
| 1665 | (caddr_t)hibernate_zlib_start, hibernate_zlib_size); |
| 1666 | |
| 1667 | if (deflate) { |
| 1668 | return deflateInit(&hibernate_state->hib_stream,deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)) |
| 1669 | Z_BEST_SPEED)deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)); |
| 1670 | } else |
| 1671 | return inflateInit(&hibernate_state->hib_stream)inflateInit_((&hibernate_state->hib_stream), "1.3.0.1-motley" , (int)sizeof(z_stream)); |
| 1672 | } |
| 1673 | |
| 1674 | /* |
| 1675 | * Reads the hibernated memory image from disk, whose location and |
| 1676 | * size are recorded in hib. Begin by reading the persisted |
| 1677 | * chunk table, which records the original chunk placement location |
| 1678 | * and compressed size for each. Next, allocate a pig region of |
| 1679 | * sufficient size to hold the compressed image. Next, read the |
| 1680 | * chunks into the pig area (calling hibernate_read_chunks to do this), |
| 1681 | * and finally, if all of the above succeeds, clear the hibernate signature. |
| 1682 | * The function will then return to hibernate_resume, which will proceed |
| 1683 | * to unpack the pig image to the correct place in memory. |
| 1684 | */ |
| 1685 | int |
| 1686 | hibernate_read_image(union hibernate_info *hib) |
| 1687 | { |
| 1688 | size_t compressed_size, disk_size, chunktable_size, pig_sz; |
| 1689 | paddr_t image_start, image_end, pig_start, pig_end; |
| 1690 | struct hibernate_disk_chunk *chunks; |
| 1691 | daddr_t blkctr; |
| 1692 | vaddr_t chunktable = (vaddr_t)NULL((void *)0); |
| 1693 | paddr_t piglet_chunktable = hib->piglet_pa + |
| 1694 | HIBERNATE_CHUNK_SIZE0x400000; |
| 1695 | int i, status; |
| 1696 | |
| 1697 | status = 0; |
| 1698 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1699 | |
| 1700 | /* Calculate total chunk table size in disk blocks */ |
| 1701 | chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000 / DEV_BSIZE(1 << 9); |
| 1702 | |
| 1703 | blkctr = hib->chunktable_offset; |
| 1704 | |
| 1705 | chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE0x100000, &kv_any, |
| 1706 | &kp_none, &kd_nowait); |
| 1707 | |
| 1708 | if (!chunktable) |
| 1709 | return (1); |
| 1710 | |
| 1711 | /* Map chunktable pages */ |
| 1712 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; i += PAGE_SIZE(1 << 12)) |
| 1713 | pmap_kenter_pa(chunktable + i, piglet_chunktable + i, |
| 1714 | PROT_READ0x01 | PROT_WRITE0x02); |
| 1715 | pmap_update(pmap_kernel()); |
| 1716 | |
| 1717 | /* Read the chunktable from disk into the piglet chunktable */ |
| 1718 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; |
| 1719 | i += MAXPHYS(64 * 1024), blkctr += MAXPHYS(64 * 1024)/DEV_BSIZE(1 << 9)) |
| 1720 | hibernate_block_io(hib, blkctr, MAXPHYS(64 * 1024), |
| 1721 | chunktable + i, 0); |
| 1722 | |
| 1723 | blkctr = hib->image_offset; |
Value stored to 'blkctr' is never read | |
| 1724 | compressed_size = 0; |
| 1725 | |
| 1726 | chunks = (struct hibernate_disk_chunk *)chunktable; |
| 1727 | |
| 1728 | for (i = 0; i < hib->chunk_ctr; i++) |
| 1729 | compressed_size += chunks[i].compressed_size; |
| 1730 | |
| 1731 | disk_size = compressed_size; |
| 1732 | |
| 1733 | printf("unhibernating @ block %lld length %luMB\n", |
| 1734 | hib->sig_offset - chunktable_size, |
| 1735 | compressed_size / (1024 * 1024)); |
| 1736 | |
| 1737 | /* Allocate the pig area */ |
| 1738 | pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE0x400000; |
| 1739 | if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM12) { |
| 1740 | status = 1; |
| 1741 | goto unmap; |
| 1742 | } |
| 1743 | |
| 1744 | pig_end = pig_start + pig_sz; |
| 1745 | |
| 1746 | /* Calculate image extents. Pig image must end on a chunk boundary. */ |
| 1747 | image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE0x400000 - 1); |
| 1748 | image_start = image_end - disk_size; |
| 1749 | |
| 1750 | hibernate_read_chunks(hib, image_start, image_end, disk_size, |
| 1751 | chunks); |
| 1752 | |
| 1753 | /* Prepare the resume time pmap/page table */ |
| 1754 | hibernate_populate_resume_pt(hib, image_start, image_end); |
| 1755 | |
| 1756 | unmap: |
| 1757 | /* Unmap chunktable pages */ |
| 1758 | pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE0x100000); |
| 1759 | pmap_update(pmap_kernel()); |
| 1760 | |
| 1761 | return (status); |
| 1762 | } |
| 1763 | |
| 1764 | /* |
| 1765 | * Read the hibernated memory chunks from disk (chunk information at this |
| 1766 | * point is stored in the piglet) into the pig area specified by |
| 1767 | * [pig_start .. pig_end]. Order the chunks so that the final chunk is the |
| 1768 | * only chunk with overlap possibilities. |
| 1769 | */ |
| 1770 | int |
| 1771 | hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, |
| 1772 | paddr_t pig_end, size_t image_compr_size, |
| 1773 | struct hibernate_disk_chunk *chunks) |
| 1774 | { |
| 1775 | paddr_t img_cur, piglet_base; |
| 1776 | daddr_t blkctr; |
| 1777 | size_t processed, compressed_size, read_size; |
| 1778 | int nchunks, nfchunks, num_io_pages; |
| 1779 | vaddr_t tempva, hibernate_fchunk_area; |
| 1780 | short *fchunks, i, j; |
| 1781 | |
| 1782 | tempva = (vaddr_t)NULL((void *)0); |
| 1783 | hibernate_fchunk_area = (vaddr_t)NULL((void *)0); |
| 1784 | nfchunks = 0; |
| 1785 | piglet_base = hib->piglet_pa; |
| 1786 | global_pig_start = pig_start; |
| 1787 | |
| 1788 | /* |
| 1789 | * These mappings go into the resuming kernel's page table, and are |
| 1790 | * used only during image read. They disappear from existence |
| 1791 | * when the suspended kernel is unpacked on top of us. |
| 1792 | */ |
| 1793 | tempva = (vaddr_t)km_alloc(MAXPHYS(64 * 1024) + PAGE_SIZE(1 << 12), &kv_any, &kp_none, |
| 1794 | &kd_nowait); |
| 1795 | if (!tempva) |
| 1796 | return (1); |
| 1797 | hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE(1 << 12), &kv_any, |
| 1798 | &kp_none, &kd_nowait); |
| 1799 | if (!hibernate_fchunk_area) |
| 1800 | return (1); |
| 1801 | |
| 1802 | /* Final output chunk ordering VA */ |
| 1803 | fchunks = (short *)hibernate_fchunk_area; |
| 1804 | |
| 1805 | /* Map the chunk ordering region */ |
| 1806 | for(i = 0; i < 24 ; i++) |
| 1807 | pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE(1 << 12)), |
| 1808 | piglet_base + ((4 + i) * PAGE_SIZE(1 << 12)), |
| 1809 | PROT_READ0x01 | PROT_WRITE0x02); |
| 1810 | pmap_update(pmap_kernel()); |
| 1811 | |
| 1812 | nchunks = hib->chunk_ctr; |
| 1813 | |
| 1814 | /* Initially start all chunks as unplaced */ |
| 1815 | for (i = 0; i < nchunks; i++) |
| 1816 | chunks[i].flags = 0; |
| 1817 | |
| 1818 | /* |
| 1819 | * Search the list for chunks that are outside the pig area. These |
| 1820 | * can be placed first in the final output list. |
| 1821 | */ |
| 1822 | for (i = 0; i < nchunks; i++) { |
| 1823 | if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { |
| 1824 | fchunks[nfchunks] = i; |
| 1825 | nfchunks++; |
| 1826 | chunks[i].flags |= HIBERNATE_CHUNK_PLACED4; |
| 1827 | } |
| 1828 | } |
| 1829 | |
| 1830 | /* |
| 1831 | * Walk the ordering, place the chunks in ascending memory order. |
| 1832 | */ |
| 1833 | for (i = 0; i < nchunks; i++) { |
| 1834 | if (chunks[i].flags != HIBERNATE_CHUNK_PLACED4) { |
| 1835 | fchunks[nfchunks] = i; |
| 1836 | nfchunks++; |
| 1837 | chunks[i].flags = HIBERNATE_CHUNK_PLACED4; |
| 1838 | } |
| 1839 | } |
| 1840 | |
| 1841 | img_cur = pig_start; |
| 1842 | |
| 1843 | for (i = 0; i < nfchunks; i++) { |
| 1844 | blkctr = chunks[fchunks[i]].offset; |
| 1845 | processed = 0; |
| 1846 | compressed_size = chunks[fchunks[i]].compressed_size; |
| 1847 | |
| 1848 | while (processed < compressed_size) { |
| 1849 | if (compressed_size - processed >= MAXPHYS(64 * 1024)) |
| 1850 | read_size = MAXPHYS(64 * 1024); |
| 1851 | else |
| 1852 | read_size = compressed_size - processed; |
| 1853 | |
| 1854 | /* |
| 1855 | * We're reading read_size bytes, offset from the |
| 1856 | * start of a page by img_cur % PAGE_SIZE, so the |
| 1857 | * end will be read_size + (img_cur % PAGE_SIZE) |
| 1858 | * from the start of the first page. Round that |
| 1859 | * up to the next page size. |
| 1860 | */ |
| 1861 | num_io_pages = (read_size + (img_cur % PAGE_SIZE(1 << 12)) |
| 1862 | + PAGE_SIZE(1 << 12) - 1) / PAGE_SIZE(1 << 12); |
| 1863 | |
| 1864 | KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1)((num_io_pages <= (64 * 1024)/(1 << 12) + 1) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1864, "num_io_pages <= MAXPHYS/PAGE_SIZE + 1")); |
| 1865 | |
| 1866 | /* Map pages for this read */ |
| 1867 | for (j = 0; j < num_io_pages; j ++) |
| 1868 | pmap_kenter_pa(tempva + j * PAGE_SIZE(1 << 12), |
| 1869 | img_cur + j * PAGE_SIZE(1 << 12), |
| 1870 | PROT_READ0x01 | PROT_WRITE0x02); |
| 1871 | |
| 1872 | pmap_update(pmap_kernel()); |
| 1873 | |
| 1874 | hibernate_block_io(hib, blkctr, read_size, |
| 1875 | tempva + (img_cur & PAGE_MASK((1 << 12) - 1)), 0); |
| 1876 | |
| 1877 | blkctr += (read_size / DEV_BSIZE(1 << 9)); |
| 1878 | |
| 1879 | pmap_kremove(tempva, num_io_pages * PAGE_SIZE(1 << 12)); |
| 1880 | pmap_update(pmap_kernel()); |
| 1881 | |
| 1882 | processed += read_size; |
| 1883 | img_cur += read_size; |
| 1884 | } |
| 1885 | } |
| 1886 | |
| 1887 | pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE(1 << 12)); |
| 1888 | pmap_update(pmap_kernel()); |
| 1889 | |
| 1890 | return (0); |
| 1891 | } |
| 1892 | |
| 1893 | /* |
| 1894 | * Hibernating a machine comprises the following operations: |
| 1895 | * 1. Calculating this machine's hibernate_info information |
| 1896 | * 2. Allocating a piglet and saving the piglet's physaddr |
| 1897 | * 3. Calculating the memory chunks |
| 1898 | * 4. Writing the compressed chunks to disk |
| 1899 | * 5. Writing the chunk table |
| 1900 | * 6. Writing the signature block (hibernate_info) |
| 1901 | * |
| 1902 | * On most architectures, the function calling hibernate_suspend would |
| 1903 | * then power off the machine using some MD-specific implementation. |
| 1904 | */ |
| 1905 | int |
| 1906 | hibernate_suspend(void) |
| 1907 | { |
| 1908 | union hibernate_info hib; |
| 1909 | u_long start, end; |
| 1910 | |
| 1911 | /* |
| 1912 | * Calculate memory ranges, swap offsets, etc. |
| 1913 | * This also allocates a piglet whose physaddr is stored in |
| 1914 | * hib->piglet_pa and vaddr stored in hib->piglet_va |
| 1915 | */ |
| 1916 | if (get_hibernate_info(&hib, 1)) { |
| 1917 | DPRINTF("failed to obtain hibernate info\n"); |
| 1918 | return (1); |
| 1919 | } |
| 1920 | |
| 1921 | /* Find a page-addressed region in swap [start,end] */ |
| 1922 | if (uvm_hibswap(hib.dev, &start, &end)) { |
| 1923 | printf("hibernate: cannot find any swap\n"); |
| 1924 | return (1); |
| 1925 | } |
| 1926 | |
| 1927 | if (end - start < 1000) { |
| 1928 | printf("hibernate: insufficient swap (%lu is too small)\n", |
| 1929 | end - start + 1); |
| 1930 | return (1); |
| 1931 | } |
| 1932 | |
| 1933 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, |
| 1934 | &retguard_start_phys); |
| 1935 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, |
| 1936 | &retguard_end_phys); |
| 1937 | |
| 1938 | /* Calculate block offsets in swap */ |
| 1939 | hib.image_offset = ctod(start)((start) << (12 - 9)); |
| 1940 | |
| 1941 | DPRINTF("hibernate @ block %lld max-length %lu blocks\n", |
| 1942 | hib.image_offset, ctod(end) - ctod(start) + 1); |
| 1943 | |
| 1944 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1945 | DPRINTF("hibernate: writing chunks\n"); |
| 1946 | if (hibernate_write_chunks(&hib)) { |
| 1947 | DPRINTF("hibernate_write_chunks failed\n"); |
| 1948 | return (1); |
| 1949 | } |
| 1950 | |
| 1951 | DPRINTF("hibernate: writing chunktable\n"); |
| 1952 | if (hibernate_write_chunktable(&hib)) { |
| 1953 | DPRINTF("hibernate_write_chunktable failed\n"); |
| 1954 | return (1); |
| 1955 | } |
| 1956 | |
| 1957 | DPRINTF("hibernate: writing signature\n"); |
| 1958 | if (hibernate_write_signature(&hib)) { |
| 1959 | DPRINTF("hibernate_write_signature failed\n"); |
| 1960 | return (1); |
| 1961 | } |
| 1962 | |
| 1963 | /* Allow the disk to settle */ |
| 1964 | delay(500000)(*delay_func)(500000); |
| 1965 | |
| 1966 | /* |
| 1967 | * Give the device-specific I/O function a notification that we're |
| 1968 | * done, and that it can clean up or shutdown as needed. |
| 1969 | */ |
| 1970 | hib.io_func(hib.dev, 0, (vaddr_t)NULL((void *)0), 0, HIB_DONE-2, hib.io_page); |
| 1971 | return (0); |
| 1972 | } |
| 1973 | |
| 1974 | int |
| 1975 | hibernate_alloc(void) |
| 1976 | { |
| 1977 | KASSERT(global_piglet_va == 0)((global_piglet_va == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1977, "global_piglet_va == 0")); |
| 1978 | KASSERT(hibernate_temp_page == 0)((hibernate_temp_page == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 1978, "hibernate_temp_page == 0" )); |
| 1979 | |
| 1980 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 1981 | pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), |
| 1982 | PROT_READ0x01 | PROT_WRITE0x02); |
| 1983 | |
| 1984 | /* Allocate a piglet, store its addresses in the supplied globals */ |
| 1985 | if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, |
| 1986 | HIBERNATE_CHUNK_SIZE0x400000 * 4, HIBERNATE_CHUNK_SIZE0x400000)) |
| 1987 | goto unmap; |
| 1988 | |
| 1989 | /* |
| 1990 | * Allocate VA for the temp page. |
| 1991 | * |
| 1992 | * This will become part of the suspended kernel and will |
| 1993 | * be freed in hibernate_free, upon resume (or hibernate |
| 1994 | * failure) |
| 1995 | */ |
| 1996 | hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, |
| 1997 | &kp_none, &kd_nowait); |
| 1998 | if (!hibernate_temp_page) { |
| 1999 | uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE0x400000); |
| 2000 | global_piglet_va = 0; |
| 2001 | goto unmap; |
| 2002 | } |
| 2003 | return (0); |
| 2004 | unmap: |
| 2005 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); |
| 2006 | pmap_update(pmap_kernel()); |
| 2007 | return (ENOMEM12); |
| 2008 | } |
| 2009 | |
| 2010 | /* |
| 2011 | * Free items allocated by hibernate_alloc() |
| 2012 | */ |
| 2013 | void |
| 2014 | hibernate_free(void) |
| 2015 | { |
| 2016 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); |
| 2017 | |
| 2018 | if (global_piglet_va) |
| 2019 | uvm_pmr_free_piglet(global_piglet_va, |
| 2020 | 4 * HIBERNATE_CHUNK_SIZE0x400000); |
| 2021 | |
| 2022 | if (hibernate_temp_page) { |
| 2023 | pmap_kremove(hibernate_temp_page, PAGE_SIZE(1 << 12)); |
| 2024 | km_free((void *)hibernate_temp_page, PAGE_SIZE(1 << 12), |
| 2025 | &kv_any, &kp_none); |
| 2026 | } |
| 2027 | |
| 2028 | global_piglet_va = 0; |
| 2029 | hibernate_temp_page = 0; |
| 2030 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); |
| 2031 | pmap_update(pmap_kernel()); |
| 2032 | } |