| File: | kern/subr_hibernate.c | 
| Warning: | line 1581, column 21 The left operand of '==' is a garbage value | 
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: subr_hibernate.c,v 1.138 2022/09/03 18:17:15 mlarkin Exp $ */ | |||
| 2 | ||||
| 3 | /* | |||
| 4 | * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> | |||
| 5 | * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> | |||
| 6 | * | |||
| 7 | * Permission to use, copy, modify, and distribute this software for any | |||
| 8 | * purpose with or without fee is hereby granted, provided that the above | |||
| 9 | * copyright notice and this permission notice appear in all copies. | |||
| 10 | * | |||
| 11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
| 12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
| 13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
| 14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
| 16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
| 17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| 18 | */ | |||
| 19 | ||||
| 20 | #include <sys/hibernate.h> | |||
| 21 | #include <sys/malloc.h> | |||
| 22 | #include <sys/param.h> | |||
| 23 | #include <sys/tree.h> | |||
| 24 | #include <sys/systm.h> | |||
| 25 | #include <sys/disklabel.h> | |||
| 26 | #include <sys/disk.h> | |||
| 27 | #include <sys/conf.h> | |||
| 28 | #include <sys/buf.h> | |||
| 29 | #include <sys/fcntl.h> | |||
| 30 | #include <sys/stat.h> | |||
| 31 | #include <sys/atomic.h> | |||
| 32 | ||||
| 33 | #include <uvm/uvm.h> | |||
| 34 | #include <uvm/uvm_swap.h> | |||
| 35 | ||||
| 36 | #include <machine/hibernate.h> | |||
| 37 | ||||
| 38 | /* Make sure the signature can fit in one block */ | |||
| 39 | CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE)extern char _ctassert[(sizeof(union hibernate_info) <= (1 << 9)) ? 1 : -1 ] __attribute__((__unused__)); | |||
| 40 | ||||
| 41 | /* | |||
| 42 | * Hibernate piglet layout information | |||
| 43 | * | |||
| 44 | * The piglet is a scratch area of memory allocated by the suspending kernel. | |||
| 45 | * Its phys and virt addrs are recorded in the signature block. The piglet is | |||
| 46 | * used to guarantee an unused area of memory that can be used by the resuming | |||
| 47 | * kernel for various things. The piglet is excluded during unpack operations. | |||
| 48 | * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). | |||
| 49 | * | |||
| 50 | * Offset from piglet_base Purpose | |||
| 51 | * ---------------------------------------------------------------------------- | |||
| 52 | * 0 Private page for suspend I/O write functions | |||
| 53 | * 1*PAGE_SIZE I/O page used during hibernate suspend | |||
| 54 | * 2*PAGE_SIZE I/O page used during hibernate suspend | |||
| 55 | * 3*PAGE_SIZE copy page used during hibernate suspend | |||
| 56 | * 4*PAGE_SIZE final chunk ordering list (24 pages) | |||
| 57 | * 28*PAGE_SIZE RLE utility page | |||
| 58 | * 29*PAGE_SIZE start of hiballoc area | |||
| 59 | * 30*PAGE_SIZE preserved entropy | |||
| 60 | * 110*PAGE_SIZE end of hiballoc area (80 pages) | |||
| 61 | * 366*PAGE_SIZE end of retguard preservation region (256 pages) | |||
| 62 | * ... unused | |||
| 63 | * HIBERNATE_CHUNK_SIZE start of hibernate chunk table | |||
| 64 | * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked | |||
| 65 | * 4*HIBERNATE_CHUNK_SIZE end of piglet | |||
| 66 | */ | |||
| 67 | ||||
| 68 | /* Temporary vaddr ranges used during hibernate */ | |||
| 69 | vaddr_t hibernate_temp_page; | |||
| 70 | vaddr_t hibernate_copy_page; | |||
| 71 | vaddr_t hibernate_rle_page; | |||
| 72 | ||||
| 73 | /* Hibernate info as read from disk during resume */ | |||
| 74 | union hibernate_info disk_hib; | |||
| 75 | ||||
| 76 | /* | |||
| 77 | * Global copy of the pig start address. This needs to be a global as we | |||
| 78 | * switch stacks after computing it - it can't be stored on the stack. | |||
| 79 | */ | |||
| 80 | paddr_t global_pig_start; | |||
| 81 | ||||
| 82 | /* | |||
| 83 | * Global copies of the piglet start addresses (PA/VA). We store these | |||
| 84 | * as globals to avoid having to carry them around as parameters, as the | |||
| 85 | * piglet is allocated early and freed late - its lifecycle extends beyond | |||
| 86 | * that of the hibernate info union which is calculated on suspend/resume. | |||
| 87 | */ | |||
| 88 | vaddr_t global_piglet_va; | |||
| 89 | paddr_t global_piglet_pa; | |||
| 90 | ||||
| 91 | /* #define HIB_DEBUG */ | |||
| 92 | #ifdef HIB_DEBUG | |||
| 93 | int hib_debug = 99; | |||
| 94 | #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) | |||
| 95 | #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) | |||
| 96 | #else | |||
| 97 | #define DPRINTF(x...) | |||
| 98 | #define DNPRINTF(n,x...) | |||
| 99 | #endif | |||
| 100 | ||||
| 101 | #ifndef NO_PROPOLICE | |||
| 102 | extern long __guard_local; | |||
| 103 | #endif /* ! NO_PROPOLICE */ | |||
| 104 | ||||
| 105 | /* Retguard phys address (need to skip this region during unpack) */ | |||
| 106 | paddr_t retguard_start_phys, retguard_end_phys; | |||
| 107 | extern char __retguard_start, __retguard_end; | |||
| 108 | ||||
| 109 | void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); | |||
| 110 | int hibernate_calc_rle(paddr_t, paddr_t); | |||
| 111 | int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, | |||
| 112 | size_t *); | |||
| 113 | ||||
| 114 | #define MAX_RLE(0x400000 / (1 << 12)) (HIBERNATE_CHUNK_SIZE0x400000 / PAGE_SIZE(1 << 12)) | |||
| 115 | ||||
| 116 | /* | |||
| 117 | * Hib alloc enforced alignment. | |||
| 118 | */ | |||
| 119 | #define HIB_ALIGN8 8 /* bytes alignment */ | |||
| 120 | ||||
| 121 | /* | |||
| 122 | * sizeof builtin operation, but with alignment constraint. | |||
| 123 | */ | |||
| 124 | #define HIB_SIZEOF(_type)((((sizeof(_type))+((8)-1))/(8))*(8)) roundup(sizeof(_type), HIB_ALIGN)((((sizeof(_type))+((8)-1))/(8))*(8)) | |||
| 125 | ||||
| 126 | struct hiballoc_entry { | |||
| 127 | size_t hibe_use; | |||
| 128 | size_t hibe_space; | |||
| 129 | RBT_ENTRY(hiballoc_entry)struct rb_entry hibe_entry; | |||
| 130 | }; | |||
| 131 | ||||
| 132 | /* | |||
| 133 | * Sort hibernate memory ranges by ascending PA | |||
| 134 | */ | |||
| 135 | void | |||
| 136 | hibernate_sort_ranges(union hibernate_info *hib_info) | |||
| 137 | { | |||
| 138 | int i, j; | |||
| 139 | struct hibernate_memory_range *ranges; | |||
| 140 | paddr_t base, end; | |||
| 141 | ||||
| 142 | ranges = hib_info->ranges; | |||
| 143 | ||||
| 144 | for (i = 1; i < hib_info->nranges; i++) { | |||
| 145 | j = i; | |||
| 146 | while (j > 0 && ranges[j - 1].base > ranges[j].base) { | |||
| 147 | base = ranges[j].base; | |||
| 148 | end = ranges[j].end; | |||
| 149 | ranges[j].base = ranges[j - 1].base; | |||
| 150 | ranges[j].end = ranges[j - 1].end; | |||
| 151 | ranges[j - 1].base = base; | |||
| 152 | ranges[j - 1].end = end; | |||
| 153 | j--; | |||
| 154 | } | |||
| 155 | } | |||
| 156 | } | |||
| 157 | ||||
| 158 | /* | |||
| 159 | * Compare hiballoc entries based on the address they manage. | |||
| 160 | * | |||
| 161 | * Since the address is fixed, relative to struct hiballoc_entry, | |||
| 162 | * we just compare the hiballoc_entry pointers. | |||
| 163 | */ | |||
| 164 | static __inline int | |||
| 165 | hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) | |||
| 166 | { | |||
| 167 | vaddr_t vl = (vaddr_t)l; | |||
| 168 | vaddr_t vr = (vaddr_t)r; | |||
| 169 | ||||
| 170 | return vl < vr ? -1 : (vl > vr); | |||
| 171 | } | |||
| 172 | ||||
| 173 | RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)extern const struct rb_type *const hiballoc_addr_RBT_TYPE; __attribute__ ((__unused__)) static inline void hiballoc_addr_RBT_INIT(struct hiballoc_addr *head) { _rb_init(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_INSERT (struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_insert(hiballoc_addr_RBT_TYPE, &head->rbh_root, elm ); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_REMOVE(struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_remove(hiballoc_addr_RBT_TYPE , &head->rbh_root, elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_FIND (struct hiballoc_addr *head, const struct hiballoc_entry *key ) { return _rb_find(hiballoc_addr_RBT_TYPE, &head->rbh_root , key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NFIND(struct hiballoc_addr *head, const struct hiballoc_entry *key) { return _rb_nfind(hiballoc_addr_RBT_TYPE , &head->rbh_root, key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_ROOT (struct hiballoc_addr *head) { return _rb_root(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_EMPTY(struct hiballoc_addr *head ) { return _rb_empty(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MIN (struct hiballoc_addr *head) { return _rb_min(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MAX(struct hiballoc_addr *head) { return _rb_max(hiballoc_addr_RBT_TYPE, &head-> rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NEXT(struct hiballoc_entry *elm) { return _rb_next(hiballoc_addr_RBT_TYPE, elm); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PREV (struct hiballoc_entry *elm) { return _rb_prev(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_LEFT(struct hiballoc_entry *elm) { return _rb_left(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline struct hiballoc_entry * hiballoc_addr_RBT_RIGHT (struct hiballoc_entry *elm) { return _rb_right(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PARENT(struct hiballoc_entry *elm) { return _rb_parent(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline void hiballoc_addr_RBT_SET_LEFT(struct hiballoc_entry *elm, struct hiballoc_entry *left) { _rb_set_left(hiballoc_addr_RBT_TYPE , elm, left); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_RIGHT(struct hiballoc_entry *elm, struct hiballoc_entry *right) { _rb_set_right(hiballoc_addr_RBT_TYPE , elm, right); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_PARENT(struct hiballoc_entry *elm, struct hiballoc_entry *parent) { _rb_set_parent(hiballoc_addr_RBT_TYPE , elm, parent); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_POISON(struct hiballoc_entry *elm, unsigned long poison) { _rb_poison(hiballoc_addr_RBT_TYPE, elm, poison ); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_CHECK (struct hiballoc_entry *elm, unsigned long poison) { return _rb_check (hiballoc_addr_RBT_TYPE, elm, poison); } | |||
| 174 | ||||
| 175 | /* | |||
| 176 | * Given a hiballoc entry, return the address it manages. | |||
| 177 | */ | |||
| 178 | static __inline void * | |||
| 179 | hib_entry_to_addr(struct hiballoc_entry *entry) | |||
| 180 | { | |||
| 181 | caddr_t addr; | |||
| 182 | ||||
| 183 | addr = (caddr_t)entry; | |||
| 184 | addr += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
| 185 | return addr; | |||
| 186 | } | |||
| 187 | ||||
| 188 | /* | |||
| 189 | * Given an address, find the hiballoc that corresponds. | |||
| 190 | */ | |||
| 191 | static __inline struct hiballoc_entry* | |||
| 192 | hib_addr_to_entry(void *addr_param) | |||
| 193 | { | |||
| 194 | caddr_t addr; | |||
| 195 | ||||
| 196 | addr = (caddr_t)addr_param; | |||
| 197 | addr -= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
| 198 | return (struct hiballoc_entry*)addr; | |||
| 199 | } | |||
| 200 | ||||
| 201 | RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)static int hiballoc_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct hiballoc_entry *l = lptr, *r = rptr; return hibe_cmp(l, r); } static const struct rb_type hiballoc_addr_RBT_INFO = { hiballoc_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof (struct hiballoc_entry, hibe_entry), }; const struct rb_type * const hiballoc_addr_RBT_TYPE = &hiballoc_addr_RBT_INFO; | |||
| 202 | ||||
| 203 | /* | |||
| 204 | * Allocate memory from the arena. | |||
| 205 | * | |||
| 206 | * Returns NULL if no memory is available. | |||
| 207 | */ | |||
| 208 | void * | |||
| 209 | hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) | |||
| 210 | { | |||
| 211 | struct hiballoc_entry *entry, *new_entry; | |||
| 212 | size_t find_sz; | |||
| 213 | ||||
| 214 | /* | |||
| 215 | * Enforce alignment of HIB_ALIGN bytes. | |||
| 216 | * | |||
| 217 | * Note that, because the entry is put in front of the allocation, | |||
| 218 | * 0-byte allocations are guaranteed a unique address. | |||
| 219 | */ | |||
| 220 | alloc_sz = roundup(alloc_sz, HIB_ALIGN)((((alloc_sz)+((8)-1))/(8))*(8)); | |||
| 221 | ||||
| 222 | /* | |||
| 223 | * Find an entry with hibe_space >= find_sz. | |||
| 224 | * | |||
| 225 | * If the root node is not large enough, we switch to tree traversal. | |||
| 226 | * Because all entries are made at the bottom of the free space, | |||
| 227 | * traversal from the end has a slightly better chance of yielding | |||
| 228 | * a sufficiently large space. | |||
| 229 | */ | |||
| 230 | find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
| 231 | entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_ROOT(&arena->hib_addrs); | |||
| 232 | if (entry != NULL((void *)0) && entry->hibe_space < find_sz) { | |||
| 233 | RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs)for ((entry) = hiballoc_addr_RBT_MAX((&arena->hib_addrs )); (entry) != ((void *)0); (entry) = hiballoc_addr_RBT_PREV( (entry))) { | |||
| 234 | if (entry->hibe_space >= find_sz) | |||
| 235 | break; | |||
| 236 | } | |||
| 237 | } | |||
| 238 | ||||
| 239 | /* | |||
| 240 | * Insufficient or too fragmented memory. | |||
| 241 | */ | |||
| 242 | if (entry == NULL((void *)0)) | |||
| 243 | return NULL((void *)0); | |||
| 244 | ||||
| 245 | /* | |||
| 246 | * Create new entry in allocated space. | |||
| 247 | */ | |||
| 248 | new_entry = (struct hiballoc_entry*)( | |||
| 249 | (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); | |||
| 250 | new_entry->hibe_space = entry->hibe_space - find_sz; | |||
| 251 | new_entry->hibe_use = alloc_sz; | |||
| 252 | ||||
| 253 | /* | |||
| 254 | * Insert entry. | |||
| 255 | */ | |||
| 256 | if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, new_entry) != NULL((void *)0)) | |||
| 257 | panic("hib_alloc: insert failure"); | |||
| 258 | entry->hibe_space = 0; | |||
| 259 | ||||
| 260 | /* Return address managed by entry. */ | |||
| 261 | return hib_entry_to_addr(new_entry); | |||
| 262 | } | |||
| 263 | ||||
| 264 | void | |||
| 265 | hib_getentropy(char **bufp, size_t *bufplen) | |||
| 266 | { | |||
| 267 | if (!bufp || !bufplen) | |||
| 268 | return; | |||
| 269 | ||||
| 270 | *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE(1 << 12))); | |||
| 271 | *bufplen = PAGE_SIZE(1 << 12); | |||
| 272 | } | |||
| 273 | ||||
| 274 | /* | |||
| 275 | * Free a pointer previously allocated from this arena. | |||
| 276 | * | |||
| 277 | * If addr is NULL, this will be silently accepted. | |||
| 278 | */ | |||
| 279 | void | |||
| 280 | hib_free(struct hiballoc_arena *arena, void *addr) | |||
| 281 | { | |||
| 282 | struct hiballoc_entry *entry, *prev; | |||
| 283 | ||||
| 284 | if (addr == NULL((void *)0)) | |||
| 285 | return; | |||
| 286 | ||||
| 287 | /* | |||
| 288 | * Derive entry from addr and check it is really in this arena. | |||
| 289 | */ | |||
| 290 | entry = hib_addr_to_entry(addr); | |||
| 291 | if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_FIND(&arena->hib_addrs, entry) != entry) | |||
| 292 | panic("hib_free: freed item %p not in hib arena", addr); | |||
| 293 | ||||
| 294 | /* | |||
| 295 | * Give the space in entry to its predecessor. | |||
| 296 | * | |||
| 297 | * If entry has no predecessor, change its used space into free space | |||
| 298 | * instead. | |||
| 299 | */ | |||
| 300 | prev = RBT_PREV(hiballoc_addr, entry)hiballoc_addr_RBT_PREV(entry); | |||
| 301 | if (prev != NULL((void *)0) && | |||
| 302 | (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | |||
| 303 | prev->hibe_use + prev->hibe_space) == entry) { | |||
| 304 | /* Merge entry. */ | |||
| 305 | RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_REMOVE(&arena->hib_addrs, entry); | |||
| 306 | prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | |||
| 307 | entry->hibe_use + entry->hibe_space; | |||
| 308 | } else { | |||
| 309 | /* Flip used memory to free space. */ | |||
| 310 | entry->hibe_space += entry->hibe_use; | |||
| 311 | entry->hibe_use = 0; | |||
| 312 | } | |||
| 313 | } | |||
| 314 | ||||
| 315 | /* | |||
| 316 | * Initialize hiballoc. | |||
| 317 | * | |||
| 318 | * The allocator will manage memory at ptr, which is len bytes. | |||
| 319 | */ | |||
| 320 | int | |||
| 321 | hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) | |||
| 322 | { | |||
| 323 | struct hiballoc_entry *entry; | |||
| 324 | caddr_t ptr; | |||
| 325 | size_t len; | |||
| 326 | ||||
| 327 | RBT_INIT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_INIT(&arena->hib_addrs); | |||
| 328 | ||||
| 329 | /* | |||
| 330 | * Hib allocator enforces HIB_ALIGN alignment. | |||
| 331 | * Fixup ptr and len. | |||
| 332 | */ | |||
| 333 | ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN)(((((vaddr_t)p_ptr)+((8)-1))/(8))*(8)); | |||
| 334 | len = p_len - ((size_t)ptr - (size_t)p_ptr); | |||
| 335 | len &= ~((size_t)HIB_ALIGN8 - 1); | |||
| 336 | ||||
| 337 | /* | |||
| 338 | * Insufficient memory to be able to allocate and also do bookkeeping. | |||
| 339 | */ | |||
| 340 | if (len <= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8))) | |||
| 341 | return ENOMEM12; | |||
| 342 | ||||
| 343 | /* | |||
| 344 | * Create entry describing space. | |||
| 345 | */ | |||
| 346 | entry = (struct hiballoc_entry*)ptr; | |||
| 347 | entry->hibe_use = 0; | |||
| 348 | entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
| 349 | RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, entry); | |||
| 350 | ||||
| 351 | return 0; | |||
| 352 | } | |||
| 353 | ||||
| 354 | /* | |||
| 355 | * Zero all free memory. | |||
| 356 | */ | |||
| 357 | void | |||
| 358 | uvm_pmr_zero_everything(void) | |||
| 359 | { | |||
| 360 | struct uvm_pmemrange *pmr; | |||
| 361 | struct vm_page *pg; | |||
| 362 | int i; | |||
| 363 | ||||
| 364 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | |||
| 365 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | |||
| 366 | /* Zero single pages. */ | |||
| 367 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])((&pmr->single[0])->tqh_first)) | |||
| 368 | != NULL((void *)0)) { | |||
| 369 | uvm_pmr_remove(pmr, pg); | |||
| 370 | uvm_pagezero(pg); | |||
| 371 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | |||
| 372 | uvmexp.zeropages++; | |||
| 373 | uvm_pmr_insert(pmr, pg, 0); | |||
| 374 | } | |||
| 375 | ||||
| 376 | /* Zero multi page ranges. */ | |||
| 377 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[0]) | |||
| 378 | &pmr->size[UVM_PMR_MEMTYPE_DIRTY])uvm_pmr_size_RBT_ROOT(&pmr->size[0])) != NULL((void *)0)) { | |||
| 379 | pg--; /* Size tree always has second page. */ | |||
| 380 | uvm_pmr_remove(pmr, pg); | |||
| 381 | for (i = 0; i < pg->fpgsz; i++) { | |||
| 382 | uvm_pagezero(&pg[i]); | |||
| 383 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | |||
| 384 | uvmexp.zeropages++; | |||
| 385 | } | |||
| 386 | uvm_pmr_insert(pmr, pg, 0); | |||
| 387 | } | |||
| 388 | } | |||
| 389 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | |||
| 390 | } | |||
| 391 | ||||
| 392 | /* | |||
| 393 | * Mark all memory as dirty. | |||
| 394 | * | |||
| 395 | * Used to inform the system that the clean memory isn't clean for some | |||
| 396 | * reason, for example because we just came back from hibernate. | |||
| 397 | */ | |||
| 398 | void | |||
| 399 | uvm_pmr_dirty_everything(void) | |||
| 400 | { | |||
| 401 | struct uvm_pmemrange *pmr; | |||
| 402 | struct vm_page *pg; | |||
| 403 | int i; | |||
| 404 | ||||
| 405 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | |||
| 406 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | |||
| 407 | /* Dirty single pages. */ | |||
| 408 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])((&pmr->single[1])->tqh_first)) | |||
| 409 | != NULL((void *)0)) { | |||
| 410 | uvm_pmr_remove(pmr, pg); | |||
| 411 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | |||
| 412 | uvm_pmr_insert(pmr, pg, 0); | |||
| 413 | } | |||
| 414 | ||||
| 415 | /* Dirty multi page ranges. */ | |||
| 416 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[1]) | |||
| 417 | &pmr->size[UVM_PMR_MEMTYPE_ZERO])uvm_pmr_size_RBT_ROOT(&pmr->size[1])) != NULL((void *)0)) { | |||
| 418 | pg--; /* Size tree always has second page. */ | |||
| 419 | uvm_pmr_remove(pmr, pg); | |||
| 420 | for (i = 0; i < pg->fpgsz; i++) | |||
| 421 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | |||
| 422 | uvm_pmr_insert(pmr, pg, 0); | |||
| 423 | } | |||
| 424 | } | |||
| 425 | ||||
| 426 | uvmexp.zeropages = 0; | |||
| 427 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | |||
| 428 | } | |||
| 429 | ||||
| 430 | /* | |||
| 431 | * Allocate an area that can hold sz bytes and doesn't overlap with | |||
| 432 | * the piglet at piglet_pa. | |||
| 433 | */ | |||
| 434 | int | |||
| 435 | uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) | |||
| 436 | { | |||
| 437 | struct uvm_constraint_range pig_constraint; | |||
| 438 | struct kmem_pa_mode kp_pig = { | |||
| 439 | .kp_constraint = &pig_constraint, | |||
| 440 | .kp_maxseg = 1 | |||
| 441 | }; | |||
| 442 | vaddr_t va; | |||
| 443 | ||||
| 444 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
| 445 | ||||
| 446 | pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE0x400000; | |||
| 447 | pig_constraint.ucr_high = -1; | |||
| 448 | ||||
| 449 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | |||
| 450 | if (va == 0) { | |||
| 451 | pig_constraint.ucr_low = 0; | |||
| 452 | pig_constraint.ucr_high = piglet_pa - 1; | |||
| 453 | ||||
| 454 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | |||
| 455 | if (va == 0) | |||
| 456 | return ENOMEM12; | |||
| 457 | } | |||
| 458 | ||||
| 459 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, pa); | |||
| 460 | return 0; | |||
| 461 | } | |||
| 462 | ||||
| 463 | /* | |||
| 464 | * Allocate a piglet area. | |||
| 465 | * | |||
| 466 | * This needs to be in DMA-safe memory. | |||
| 467 | * Piglets are aligned. | |||
| 468 | * | |||
| 469 | * sz and align in bytes. | |||
| 470 | * | |||
| 471 | * The call will sleep for the pagedaemon to attempt to free memory. | |||
| 472 | * The pagedaemon may decide its not possible to free enough memory, causing | |||
| 473 | * the allocation to fail. | |||
| 474 | */ | |||
| 475 | int | |||
| 476 | uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) | |||
| 477 | { | |||
| 478 | struct kmem_pa_mode kp_piglet = { | |||
| 479 | .kp_constraint = &dma_constraint, | |||
| 480 | .kp_align = align, | |||
| 481 | .kp_maxseg = 1 | |||
| 482 | }; | |||
| 483 | ||||
| 484 | /* Ensure align is a power of 2 */ | |||
| 485 | KASSERT((align & (align - 1)) == 0)(((align & (align - 1)) == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 485, "(align & (align - 1)) == 0" )); | |||
| 486 | ||||
| 487 | /* | |||
| 488 | * Fixup arguments: align must be at least PAGE_SIZE, | |||
| 489 | * sz will be converted to pagecount, since that is what | |||
| 490 | * pmemrange uses internally. | |||
| 491 | */ | |||
| 492 | if (align < PAGE_SIZE(1 << 12)) | |||
| 493 | kp_piglet.kp_align = PAGE_SIZE(1 << 12); | |||
| 494 | ||||
| 495 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
| 496 | ||||
| 497 | *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); | |||
| 498 | if (*va == 0) | |||
| 499 | return ENOMEM12; | |||
| 500 | ||||
| 501 | pmap_extract(pmap_kernel()(&kernel_pmap_store), *va, pa); | |||
| 502 | return 0; | |||
| 503 | } | |||
| 504 | ||||
| 505 | /* | |||
| 506 | * Free a piglet area. | |||
| 507 | */ | |||
| 508 | void | |||
| 509 | uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) | |||
| 510 | { | |||
| 511 | /* | |||
| 512 | * Fix parameters. | |||
| 513 | */ | |||
| 514 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
| 515 | ||||
| 516 | /* | |||
| 517 | * Free the physical and virtual memory. | |||
| 518 | */ | |||
| 519 | km_free((void *)va, sz, &kv_any, &kp_dma_contig); | |||
| 520 | } | |||
| 521 | ||||
| 522 | /* | |||
| 523 | * Physmem RLE compression support. | |||
| 524 | * | |||
| 525 | * Given a physical page address, return the number of pages starting at the | |||
| 526 | * address that are free. Clamps to the number of pages in | |||
| 527 | * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. | |||
| 528 | */ | |||
| 529 | int | |||
| 530 | uvm_page_rle(paddr_t addr) | |||
| 531 | { | |||
| 532 | struct vm_page *pg, *pg_end; | |||
| 533 | struct vm_physseg *vmp; | |||
| 534 | int pseg_idx, off_idx; | |||
| 535 | ||||
| 536 | pseg_idx = vm_physseg_find(atop(addr)((addr) >> 12), &off_idx); | |||
| 537 | if (pseg_idx == -1) | |||
| 538 | return 0; | |||
| 539 | ||||
| 540 | vmp = &vm_physmem[pseg_idx]; | |||
| 541 | pg = &vmp->pgs[off_idx]; | |||
| 542 | if (!(pg->pg_flags & PQ_FREE0x00010000)) | |||
| 543 | return 0; | |||
| 544 | ||||
| 545 | /* | |||
| 546 | * Search for the first non-free page after pg. | |||
| 547 | * Note that the page may not be the first page in a free pmemrange, | |||
| 548 | * therefore pg->fpgsz cannot be used. | |||
| 549 | */ | |||
| 550 | for (pg_end = pg; pg_end <= vmp->lastpg && | |||
| 551 | (pg_end->pg_flags & PQ_FREE0x00010000) == PQ_FREE0x00010000 && | |||
| 552 | (pg_end - pg) < HIBERNATE_CHUNK_SIZE0x400000/PAGE_SIZE(1 << 12); pg_end++) | |||
| 553 | ; | |||
| 554 | return pg_end - pg; | |||
| 555 | } | |||
| 556 | ||||
| 557 | /* | |||
| 558 | * Fills out the hibernate_info union pointed to by hib | |||
| 559 | * with information about this machine (swap signature block | |||
| 560 | * offsets, number of memory ranges, kernel in use, etc) | |||
| 561 | */ | |||
| 562 | int | |||
| 563 | get_hibernate_info(union hibernate_info *hib, int suspend) | |||
| 564 | { | |||
| 565 | struct disklabel dl; | |||
| 566 | char err_string[128], *dl_ret; | |||
| 567 | int part; | |||
| 568 | SHA2_CTX ctx; | |||
| 569 | void *fn; | |||
| 570 | ||||
| 571 | #ifndef NO_PROPOLICE | |||
| 572 | /* Save propolice guard */ | |||
| 573 | hib->guard = __guard_local; | |||
| 574 | #endif /* ! NO_PROPOLICE */ | |||
| 575 | ||||
| 576 | /* Determine I/O function to use */ | |||
| 577 | hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev); | |||
| 578 | if (hib->io_func == NULL((void *)0)) | |||
| 579 | return (1); | |||
| 580 | ||||
| 581 | /* Calculate hibernate device */ | |||
| 582 | hib->dev = swdevt[0].sw_dev; | |||
| 583 | ||||
| 584 | /* Read disklabel (used to calculate signature and image offsets) */ | |||
| 585 | dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); | |||
| 586 | ||||
| 587 | if (dl_ret) { | |||
| 588 | printf("Hibernate error reading disklabel: %s\n", dl_ret); | |||
| 589 | return (1); | |||
| 590 | } | |||
| 591 | ||||
| 592 | /* Make sure we have a swap partition. */ | |||
| 593 | part = DISKPART(hib->dev)(((unsigned)((hib->dev) & 0xff) | (((hib->dev) & 0xffff0000) >> 8)) % 16); | |||
| 594 | if (dl.d_npartitions <= part || | |||
| 595 | dl.d_partitions[part].p_fstype != FS_SWAP1 || | |||
| 596 | DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) == 0) | |||
| 597 | return (1); | |||
| 598 | ||||
| 599 | /* Magic number */ | |||
| 600 | hib->magic = HIBERNATE_MAGIC0x0B5D0B5D; | |||
| 601 | ||||
| 602 | /* Calculate signature block location */ | |||
| 603 | hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) - | |||
| 604 | sizeof(union hibernate_info)/DEV_BSIZE(1 << 9); | |||
| 605 | ||||
| 606 | SHA256Init(&ctx); | |||
| 607 | SHA256Update(&ctx, version, strlen(version)); | |||
| 608 | fn = printf; | |||
| 609 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
| 610 | fn = malloc; | |||
| 611 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
| 612 | fn = km_alloc; | |||
| 613 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
| 614 | fn = strlen; | |||
| 615 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
| 616 | SHA256Final((u_int8_t *)&hib->kern_hash, &ctx); | |||
| 617 | ||||
| 618 | if (suspend) { | |||
| 619 | /* Grab the previously-allocated piglet addresses */ | |||
| 620 | hib->piglet_va = global_piglet_va; | |||
| 621 | hib->piglet_pa = global_piglet_pa; | |||
| 622 | hib->io_page = (void *)hib->piglet_va; | |||
| 623 | ||||
| 624 | /* | |||
| 625 | * Initialization of the hibernate IO function for drivers | |||
| 626 | * that need to do prep work (such as allocating memory or | |||
| 627 | * setting up data structures that cannot safely be done | |||
| 628 | * during suspend without causing side effects). There is | |||
| 629 | * a matching HIB_DONE call performed after the write is | |||
| 630 | * completed. | |||
| 631 | */ | |||
| 632 | if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_offseth << 32) + (&dl.d_partitions[part])->p_offset), | |||
| 633 | (vaddr_t)NULL((void *)0), DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size), | |||
| 634 | HIB_INIT-1, hib->io_page)) | |||
| 635 | goto fail; | |||
| 636 | ||||
| 637 | } else { | |||
| 638 | /* | |||
| 639 | * Resuming kernels use a regular private page for the driver | |||
| 640 | * No need to free this I/O page as it will vanish as part of | |||
| 641 | * the resume. | |||
| 642 | */ | |||
| 643 | hib->io_page = malloc(PAGE_SIZE(1 << 12), M_DEVBUF2, M_NOWAIT0x0002); | |||
| 644 | if (!hib->io_page) | |||
| 645 | goto fail; | |||
| 646 | } | |||
| 647 | ||||
| 648 | if (get_hibernate_info_md(hib)) | |||
| 649 | goto fail; | |||
| 650 | ||||
| 651 | return (0); | |||
| 652 | ||||
| 653 | fail: | |||
| 654 | return (1); | |||
| 655 | } | |||
| 656 | ||||
| 657 | /* | |||
| 658 | * Allocate nitems*size bytes from the hiballoc area presently in use | |||
| 659 | */ | |||
| 660 | void * | |||
| 661 | hibernate_zlib_alloc(void *unused, int nitems, int size) | |||
| 662 | { | |||
| 663 | struct hibernate_zlib_state *hibernate_state; | |||
| 664 | ||||
| 665 | hibernate_state = | |||
| 666 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 667 | ||||
| 668 | return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); | |||
| 669 | } | |||
| 670 | ||||
| 671 | /* | |||
| 672 | * Free the memory pointed to by addr in the hiballoc area presently in | |||
| 673 | * use | |||
| 674 | */ | |||
| 675 | void | |||
| 676 | hibernate_zlib_free(void *unused, void *addr) | |||
| 677 | { | |||
| 678 | struct hibernate_zlib_state *hibernate_state; | |||
| 679 | ||||
| 680 | hibernate_state = | |||
| 681 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 682 | ||||
| 683 | hib_free(&hibernate_state->hiballoc_arena, addr); | |||
| 684 | } | |||
| 685 | ||||
| 686 | /* | |||
| 687 | * Inflate next page of data from the image stream. | |||
| 688 | * The rle parameter is modified on exit to contain the number of pages to | |||
| 689 | * skip in the output stream (or 0 if this page was inflated into). | |||
| 690 | * | |||
| 691 | * Returns 0 if the stream contains additional data, or 1 if the stream is | |||
| 692 | * finished. | |||
| 693 | */ | |||
| 694 | int | |||
| 695 | hibernate_inflate_page(int *rle) | |||
| 696 | { | |||
| 697 | struct hibernate_zlib_state *hibernate_state; | |||
| 698 | int i; | |||
| 699 | ||||
| 700 | hibernate_state = | |||
| 701 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 702 | ||||
| 703 | /* Set up the stream for RLE code inflate */ | |||
| 704 | hibernate_state->hib_stream.next_out = (unsigned char *)rle; | |||
| 705 | hibernate_state->hib_stream.avail_out = sizeof(*rle); | |||
| 706 | ||||
| 707 | /* Inflate RLE code */ | |||
| 708 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | |||
| 709 | if (i != Z_OK0 && i != Z_STREAM_END1) { | |||
| 710 | /* | |||
| 711 | * XXX - this will likely reboot/hang most machines | |||
| 712 | * since the console output buffer will be unmapped, | |||
| 713 | * but there's not much else we can do here. | |||
| 714 | */ | |||
| 715 | panic("rle inflate stream error"); | |||
| 716 | } | |||
| 717 | ||||
| 718 | if (hibernate_state->hib_stream.avail_out != 0) { | |||
| 719 | /* | |||
| 720 | * XXX - this will likely reboot/hang most machines | |||
| 721 | * since the console output buffer will be unmapped, | |||
| 722 | * but there's not much else we can do here. | |||
| 723 | */ | |||
| 724 | panic("rle short inflate error"); | |||
| 725 | } | |||
| 726 | ||||
| 727 | if (*rle < 0 || *rle > 1024) { | |||
| 728 | /* | |||
| 729 | * XXX - this will likely reboot/hang most machines | |||
| 730 | * since the console output buffer will be unmapped, | |||
| 731 | * but there's not much else we can do here. | |||
| 732 | */ | |||
| 733 | panic("invalid rle count"); | |||
| 734 | } | |||
| 735 | ||||
| 736 | if (i == Z_STREAM_END1) | |||
| 737 | return (1); | |||
| 738 | ||||
| 739 | if (*rle != 0) | |||
| 740 | return (0); | |||
| 741 | ||||
| 742 | /* Set up the stream for page inflate */ | |||
| 743 | hibernate_state->hib_stream.next_out = | |||
| 744 | (unsigned char *)HIBERNATE_INFLATE_PAGE((1 << 12) * 33); | |||
| 745 | hibernate_state->hib_stream.avail_out = PAGE_SIZE(1 << 12); | |||
| 746 | ||||
| 747 | /* Process next block of data */ | |||
| 748 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | |||
| 749 | if (i != Z_OK0 && i != Z_STREAM_END1) { | |||
| 750 | /* | |||
| 751 | * XXX - this will likely reboot/hang most machines | |||
| 752 | * since the console output buffer will be unmapped, | |||
| 753 | * but there's not much else we can do here. | |||
| 754 | */ | |||
| 755 | panic("inflate error"); | |||
| 756 | } | |||
| 757 | ||||
| 758 | /* We should always have extracted a full page ... */ | |||
| 759 | if (hibernate_state->hib_stream.avail_out != 0) { | |||
| 760 | /* | |||
| 761 | * XXX - this will likely reboot/hang most machines | |||
| 762 | * since the console output buffer will be unmapped, | |||
| 763 | * but there's not much else we can do here. | |||
| 764 | */ | |||
| 765 | panic("incomplete page"); | |||
| 766 | } | |||
| 767 | ||||
| 768 | return (i == Z_STREAM_END1); | |||
| 769 | } | |||
| 770 | ||||
| 771 | /* | |||
| 772 | * Inflate size bytes from src into dest, skipping any pages in | |||
| 773 | * [src..dest] that are special (see hibernate_inflate_skip) | |||
| 774 | * | |||
| 775 | * This function executes while using the resume-time stack | |||
| 776 | * and pmap, and therefore cannot use ddb/printf/etc. Doing so | |||
| 777 | * will likely hang or reset the machine since the console output buffer | |||
| 778 | * will be unmapped. | |||
| 779 | */ | |||
| 780 | void | |||
| 781 | hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, | |||
| 782 | paddr_t src, size_t size) | |||
| 783 | { | |||
| 784 | int end_stream = 0, rle, skip; | |||
| 785 | struct hibernate_zlib_state *hibernate_state; | |||
| 786 | ||||
| 787 | hibernate_state = | |||
| 788 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 789 | ||||
| 790 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | |||
| 791 | hibernate_state->hib_stream.avail_in = size; | |||
| 792 | ||||
| 793 | do { | |||
| 794 | /* | |||
| 795 | * Is this a special page? If yes, redirect the | |||
| 796 | * inflate output to a scratch page (eg, discard it) | |||
| 797 | */ | |||
| 798 | skip = hibernate_inflate_skip(hib, dest); | |||
| 799 | if (skip == HIB_SKIP1) { | |||
| 800 | hibernate_enter_resume_mapping( | |||
| 801 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
| 802 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), 0); | |||
| 803 | } else if (skip == HIB_MOVE2) { | |||
| 804 | /* | |||
| 805 | * Special case : retguard region. This gets moved | |||
| 806 | * temporarily into the piglet region and copied into | |||
| 807 | * place immediately before resume | |||
| 808 | */ | |||
| 809 | hibernate_enter_resume_mapping( | |||
| 810 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
| 811 | hib->piglet_pa + (110 * PAGE_SIZE(1 << 12)) + | |||
| 812 | hib->retguard_ofs, 0); | |||
| 813 | hib->retguard_ofs += PAGE_SIZE(1 << 12); | |||
| 814 | if (hib->retguard_ofs > 255 * PAGE_SIZE(1 << 12)) { | |||
| 815 | /* | |||
| 816 | * XXX - this will likely reboot/hang most | |||
| 817 | * machines since the console output | |||
| 818 | * buffer will be unmapped, but there's | |||
| 819 | * not much else we can do here. | |||
| 820 | */ | |||
| 821 | panic("retguard move error, out of space"); | |||
| 822 | } | |||
| 823 | } else { | |||
| 824 | hibernate_enter_resume_mapping( | |||
| 825 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), dest, 0); | |||
| 826 | } | |||
| 827 | ||||
| 828 | hibernate_flush(); | |||
| 829 | end_stream = hibernate_inflate_page(&rle); | |||
| 830 | ||||
| 831 | if (rle == 0) | |||
| 832 | dest += PAGE_SIZE(1 << 12); | |||
| 833 | else | |||
| 834 | dest += (rle * PAGE_SIZE(1 << 12)); | |||
| 835 | } while (!end_stream); | |||
| 836 | } | |||
| 837 | ||||
| 838 | /* | |||
| 839 | * deflate from src into the I/O page, up to 'remaining' bytes | |||
| 840 | * | |||
| 841 | * Returns number of input bytes consumed, and may reset | |||
| 842 | * the 'remaining' parameter if not all the output space was consumed | |||
| 843 | * (this information is needed to know how much to write to disk) | |||
| 844 | */ | |||
| 845 | size_t | |||
| 846 | hibernate_deflate(union hibernate_info *hib, paddr_t src, | |||
| 847 | size_t *remaining) | |||
| 848 | { | |||
| 849 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
| 850 | struct hibernate_zlib_state *hibernate_state; | |||
| 851 | ||||
| 852 | hibernate_state = | |||
| 853 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 854 | ||||
| 855 | /* Set up the stream for deflate */ | |||
| 856 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | |||
| 857 | hibernate_state->hib_stream.avail_in = PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1)); | |||
| 858 | hibernate_state->hib_stream.next_out = | |||
| 859 | (unsigned char *)hibernate_io_page + (PAGE_SIZE(1 << 12) - *remaining); | |||
| 860 | hibernate_state->hib_stream.avail_out = *remaining; | |||
| 861 | ||||
| 862 | /* Process next block of data */ | |||
| 863 | if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2) != Z_OK0) | |||
| 864 | panic("hibernate zlib deflate error"); | |||
| 865 | ||||
| 866 | /* Update pointers and return number of bytes consumed */ | |||
| 867 | *remaining = hibernate_state->hib_stream.avail_out; | |||
| 868 | return (PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1))) - | |||
| 869 | hibernate_state->hib_stream.avail_in; | |||
| 870 | } | |||
| 871 | ||||
| 872 | /* | |||
| 873 | * Write the hibernation information specified in hiber_info | |||
| 874 | * to the location in swap previously calculated (last block of | |||
| 875 | * swap), called the "signature block". | |||
| 876 | */ | |||
| 877 | int | |||
| 878 | hibernate_write_signature(union hibernate_info *hib) | |||
| 879 | { | |||
| 880 | /* Write hibernate info to disk */ | |||
| 881 | return (hib->io_func(hib->dev, hib->sig_offset, | |||
| 882 | (vaddr_t)hib, DEV_BSIZE(1 << 9), HIB_W1, | |||
| 883 | hib->io_page)); | |||
| 884 | } | |||
| 885 | ||||
| 886 | /* | |||
| 887 | * Write the memory chunk table to the area in swap immediately | |||
| 888 | * preceding the signature block. The chunk table is stored | |||
| 889 | * in the piglet when this function is called. Returns errno. | |||
| 890 | */ | |||
| 891 | int | |||
| 892 | hibernate_write_chunktable(union hibernate_info *hib) | |||
| 893 | { | |||
| 894 | vaddr_t hibernate_chunk_table_start; | |||
| 895 | size_t hibernate_chunk_table_size; | |||
| 896 | int i, err; | |||
| 897 | ||||
| 898 | hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000; | |||
| 899 | ||||
| 900 | hibernate_chunk_table_start = hib->piglet_va + | |||
| 901 | HIBERNATE_CHUNK_SIZE0x400000; | |||
| 902 | ||||
| 903 | /* Write chunk table */ | |||
| 904 | for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS(64 * 1024)) { | |||
| 905 | if ((err = hib->io_func(hib->dev, | |||
| 906 | hib->chunktable_offset + (i/DEV_BSIZE(1 << 9)), | |||
| 907 | (vaddr_t)(hibernate_chunk_table_start + i), | |||
| 908 | MAXPHYS(64 * 1024), HIB_W1, hib->io_page))) { | |||
| 909 | DPRINTF("chunktable write error: %d\n", err); | |||
| 910 | return (err); | |||
| 911 | } | |||
| 912 | } | |||
| 913 | ||||
| 914 | return (0); | |||
| 915 | } | |||
| 916 | ||||
| 917 | /* | |||
| 918 | * Write an empty hiber_info to the swap signature block, which is | |||
| 919 | * guaranteed to not match any valid hib. | |||
| 920 | */ | |||
| 921 | int | |||
| 922 | hibernate_clear_signature(union hibernate_info *hib) | |||
| 923 | { | |||
| 924 | union hibernate_info blank_hiber_info; | |||
| 925 | ||||
| 926 | /* Zero out a blank hiber_info */ | |||
| 927 | memset(&blank_hiber_info, 0, sizeof(union hibernate_info))__builtin_memset((&blank_hiber_info), (0), (sizeof(union hibernate_info ))); | |||
| 928 | ||||
| 929 | /* Write (zeroed) hibernate info to disk */ | |||
| 930 | DPRINTF("clearing hibernate signature block location: %lld\n", | |||
| 931 | hib->sig_offset); | |||
| 932 | if (hibernate_block_io(hib, | |||
| 933 | hib->sig_offset, | |||
| 934 | DEV_BSIZE(1 << 9), (vaddr_t)&blank_hiber_info, 1)) | |||
| 935 | printf("Warning: could not clear hibernate signature\n"); | |||
| 936 | ||||
| 937 | return (0); | |||
| 938 | } | |||
| 939 | ||||
| 940 | /* | |||
| 941 | * Compare two hibernate_infos to determine if they are the same (eg, | |||
| 942 | * we should be performing a hibernate resume on this machine. | |||
| 943 | * Not all fields are checked - just enough to verify that the machine | |||
| 944 | * has the same memory configuration and kernel as the one that | |||
| 945 | * wrote the signature previously. | |||
| 946 | */ | |||
| 947 | int | |||
| 948 | hibernate_compare_signature(union hibernate_info *mine, | |||
| 949 | union hibernate_info *disk) | |||
| 950 | { | |||
| 951 | u_int i; | |||
| 952 | ||||
| 953 | if (mine->nranges != disk->nranges) { | |||
| 954 | printf("unhibernate failed: memory layout changed\n"); | |||
| 955 | return (1); | |||
| 956 | } | |||
| 957 | ||||
| 958 | if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH32) != 0) { | |||
| 959 | printf("unhibernate failed: original kernel changed\n"); | |||
| 960 | return (1); | |||
| 961 | } | |||
| 962 | ||||
| 963 | for (i = 0; i < mine->nranges; i++) { | |||
| 964 | if ((mine->ranges[i].base != disk->ranges[i].base) || | |||
| 965 | (mine->ranges[i].end != disk->ranges[i].end) ) { | |||
| 966 | DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", | |||
| 967 | i, | |||
| 968 | (void *)mine->ranges[i].base, | |||
| 969 | (void *)mine->ranges[i].end, | |||
| 970 | (void *)disk->ranges[i].base, | |||
| 971 | (void *)disk->ranges[i].end); | |||
| 972 | printf("unhibernate failed: memory size changed\n"); | |||
| 973 | return (1); | |||
| 974 | } | |||
| 975 | } | |||
| 976 | ||||
| 977 | return (0); | |||
| 978 | } | |||
| 979 | ||||
| 980 | /* | |||
| 981 | * Transfers xfer_size bytes between the hibernate device specified in | |||
| 982 | * hib_info at offset blkctr and the vaddr specified at dest. | |||
| 983 | * | |||
| 984 | * Separate offsets and pages are used to handle misaligned reads (reads | |||
| 985 | * that span a page boundary). | |||
| 986 | * | |||
| 987 | * blkctr specifies a relative offset (relative to the start of swap), | |||
| 988 | * not an absolute disk offset | |||
| 989 | * | |||
| 990 | */ | |||
| 991 | int | |||
| 992 | hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, | |||
| 993 | size_t xfer_size, vaddr_t dest, int iswrite) | |||
| 994 | { | |||
| 995 | struct buf *bp; | |||
| 996 | struct bdevsw *bdsw; | |||
| 997 | int error; | |||
| 998 | ||||
| 999 | bp = geteblk(xfer_size); | |||
| 1000 | bdsw = &bdevsw[major(hib->dev)(((unsigned)(hib->dev) >> 8) & 0xff)]; | |||
| 1001 | ||||
| 1002 | error = (*bdsw->d_open)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1003 | if (error) { | |||
| 1004 | printf("hibernate_block_io open failed\n"); | |||
| 1005 | return (1); | |||
| 1006 | } | |||
| 1007 | ||||
| 1008 | if (iswrite) | |||
| 1009 | bcopy((caddr_t)dest, bp->b_data, xfer_size); | |||
| 1010 | ||||
| 1011 | bp->b_bcount = xfer_size; | |||
| 1012 | bp->b_blkno = blkctr; | |||
| 1013 | CLR(bp->b_flags, B_READ | B_WRITE | B_DONE)((bp->b_flags) &= ~(0x00008000 | 0x00000000 | 0x00000100 )); | |||
| 1014 | SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW)((bp->b_flags) |= (0x00000010 | (iswrite ? 0x00000000 : 0x00008000 ) | 0x00004000)); | |||
| 1015 | bp->b_dev = hib->dev; | |||
| 1016 | (*bdsw->d_strategy)(bp); | |||
| 1017 | ||||
| 1018 | error = biowait(bp); | |||
| 1019 | if (error) { | |||
| 1020 | printf("hib block_io biowait error %d blk %lld size %zu\n", | |||
| 1021 | error, (long long)blkctr, xfer_size); | |||
| 1022 | error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR0020000, | |||
| 1023 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1024 | if (error) | |||
| 1025 | printf("hibernate_block_io error close failed\n"); | |||
| 1026 | return (1); | |||
| 1027 | } | |||
| 1028 | ||||
| 1029 | error = (*bdsw->d_close)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1030 | if (error) { | |||
| 1031 | printf("hibernate_block_io close failed\n"); | |||
| 1032 | return (1); | |||
| 1033 | } | |||
| 1034 | ||||
| 1035 | if (!iswrite) | |||
| 1036 | bcopy(bp->b_data, (caddr_t)dest, xfer_size); | |||
| 1037 | ||||
| 1038 | bp->b_flags |= B_INVAL0x00000800; | |||
| 1039 | brelse(bp); | |||
| 1040 | ||||
| 1041 | return (0); | |||
| 1042 | } | |||
| 1043 | ||||
| 1044 | /* | |||
| 1045 | * Preserve one page worth of random data, generated from the resuming | |||
| 1046 | * kernel's arc4random. After resume, this preserved entropy can be used | |||
| 1047 | * to further improve the un-hibernated machine's entropy pool. This | |||
| 1048 | * random data is stored in the piglet, which is preserved across the | |||
| 1049 | * unpack operation, and is restored later in the resume process (see | |||
| 1050 | * hib_getentropy) | |||
| 1051 | */ | |||
| 1052 | void | |||
| 1053 | hibernate_preserve_entropy(union hibernate_info *hib) | |||
| 1054 | { | |||
| 1055 | void *entropy; | |||
| 1056 | ||||
| 1057 | entropy = km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_nowait); | |||
| 1058 | ||||
| 1059 | if (!entropy) | |||
| 1060 | return; | |||
| 1061 | ||||
| 1062 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1063 | pmap_kenter_pa((vaddr_t)entropy, | |||
| 1064 | (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE(1 << 12))), | |||
| 1065 | PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1066 | ||||
| 1067 | arc4random_buf((void *)entropy, PAGE_SIZE(1 << 12)); | |||
| 1068 | pmap_kremove((vaddr_t)entropy, PAGE_SIZE(1 << 12)); | |||
| 1069 | km_free(entropy, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | |||
| 1070 | } | |||
| 1071 | ||||
| 1072 | #ifndef NO_PROPOLICE | |||
| 1073 | vaddr_t | |||
| 1074 | hibernate_unprotect_ssp(void) | |||
| 1075 | { | |||
| 1076 | struct kmem_dyn_mode kd_avoidalias; | |||
| 1077 | vaddr_t va = trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | |||
| 1078 | paddr_t pa; | |||
| 1079 | ||||
| 1080 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
| 1081 | ||||
| 1082 | memset(&kd_avoidalias, 0, sizeof kd_avoidalias)__builtin_memset((&kd_avoidalias), (0), (sizeof kd_avoidalias )); | |||
| 1083 | kd_avoidalias.kd_prefer = pa; | |||
| 1084 | kd_avoidalias.kd_waitok = 1; | |||
| 1085 | va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_avoidalias); | |||
| 1086 | if (!va) | |||
| 1087 | panic("hibernate_unprotect_ssp"); | |||
| 1088 | ||||
| 1089 | pmap_kenter_pa(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1090 | pmap_update(pmap_kernel()); | |||
| 1091 | ||||
| 1092 | return va; | |||
| 1093 | } | |||
| 1094 | ||||
| 1095 | void | |||
| 1096 | hibernate_reprotect_ssp(vaddr_t va) | |||
| 1097 | { | |||
| 1098 | pmap_kremove(va, PAGE_SIZE(1 << 12)); | |||
| 1099 | km_free((void *)va, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | |||
| 1100 | } | |||
| 1101 | #endif /* NO_PROPOLICE */ | |||
| 1102 | ||||
| 1103 | /* | |||
| 1104 | * Reads the signature block from swap, checks against the current machine's | |||
| 1105 | * information. If the information matches, perform a resume by reading the | |||
| 1106 | * saved image into the pig area, and unpacking. | |||
| 1107 | * | |||
| 1108 | * Must be called with interrupts enabled. | |||
| 1109 | */ | |||
| 1110 | void | |||
| 1111 | hibernate_resume(void) | |||
| 1112 | { | |||
| 1113 | union hibernate_info hib; | |||
| 1114 | int s; | |||
| 1115 | #ifndef NO_PROPOLICE | |||
| 1116 | vsize_t off = (vaddr_t)&__guard_local - | |||
| 1117 | trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | |||
| 1118 | vaddr_t guard_va; | |||
| 1119 | #endif | |||
| 1120 | ||||
| 1121 | /* Get current running machine's hibernate info */ | |||
| 1122 | memset(&hib, 0, sizeof(hib))__builtin_memset((&hib), (0), (sizeof(hib))); | |||
| 1123 | if (get_hibernate_info(&hib, 0)) { | |||
| 1124 | DPRINTF("couldn't retrieve machine's hibernate info\n"); | |||
| 1125 | return; | |||
| 1126 | } | |||
| 1127 | ||||
| 1128 | /* Read hibernate info from disk */ | |||
| 1129 | s = splbio()splraise(0x3); | |||
| 1130 | ||||
| 1131 | DPRINTF("reading hibernate signature block location: %lld\n", | |||
| 1132 | hib.sig_offset); | |||
| 1133 | ||||
| 1134 | if (hibernate_block_io(&hib, | |||
| 1135 | hib.sig_offset, | |||
| 1136 | DEV_BSIZE(1 << 9), (vaddr_t)&disk_hib, 0)) { | |||
| 1137 | DPRINTF("error in hibernate read"); | |||
| 1138 | splx(s)spllower(s); | |||
| 1139 | return; | |||
| 1140 | } | |||
| 1141 | ||||
| 1142 | /* Check magic number */ | |||
| 1143 | if (disk_hib.magic != HIBERNATE_MAGIC0x0B5D0B5D) { | |||
| 1144 | DPRINTF("wrong magic number in hibernate signature: %x\n", | |||
| 1145 | disk_hib.magic); | |||
| 1146 | splx(s)spllower(s); | |||
| 1147 | return; | |||
| 1148 | } | |||
| 1149 | ||||
| 1150 | /* | |||
| 1151 | * We (possibly) found a hibernate signature. Clear signature first, | |||
| 1152 | * to prevent accidental resume or endless resume cycles later. | |||
| 1153 | */ | |||
| 1154 | if (hibernate_clear_signature(&hib)) { | |||
| 1155 | DPRINTF("error clearing hibernate signature block\n"); | |||
| 1156 | splx(s)spllower(s); | |||
| 1157 | return; | |||
| 1158 | } | |||
| 1159 | ||||
| 1160 | /* | |||
| 1161 | * If on-disk and in-memory hibernate signatures match, | |||
| 1162 | * this means we should do a resume from hibernate. | |||
| 1163 | */ | |||
| 1164 | if (hibernate_compare_signature(&hib, &disk_hib)) { | |||
| 1165 | DPRINTF("mismatched hibernate signature block\n"); | |||
| 1166 | splx(s)spllower(s); | |||
| 1167 | return; | |||
| 1168 | } | |||
| 1169 | disk_hib.dev = hib.dev; | |||
| 1170 | ||||
| 1171 | #ifdef MULTIPROCESSOR1 | |||
| 1172 | /* XXX - if we fail later, we may need to rehatch APs on some archs */ | |||
| 1173 | DPRINTF("hibernate: quiescing APs\n"); | |||
| 1174 | hibernate_quiesce_cpus(); | |||
| 1175 | #endif /* MULTIPROCESSOR */ | |||
| 1176 | ||||
| 1177 | /* Read the image from disk into the image (pig) area */ | |||
| 1178 | if (hibernate_read_image(&disk_hib)) | |||
| 1179 | goto fail; | |||
| 1180 | ||||
| 1181 | DPRINTF("hibernate: quiescing devices\n"); | |||
| 1182 | if (config_suspend_all(DVACT_QUIESCE2) != 0) | |||
| 1183 | goto fail; | |||
| 1184 | ||||
| 1185 | #ifndef NO_PROPOLICE | |||
| 1186 | guard_va = hibernate_unprotect_ssp(); | |||
| 1187 | #endif /* NO_PROPOLICE */ | |||
| 1188 | ||||
| 1189 | (void) splhigh()splraise(0xd); | |||
| 1190 | hibernate_disable_intr_machdep(); | |||
| 1191 | cold = 2; | |||
| 1192 | ||||
| 1193 | DPRINTF("hibernate: suspending devices\n"); | |||
| 1194 | if (config_suspend_all(DVACT_SUSPEND3) != 0) { | |||
| 1195 | cold = 0; | |||
| 1196 | hibernate_enable_intr_machdep(); | |||
| 1197 | #ifndef NO_PROPOLICE | |||
| 1198 | hibernate_reprotect_ssp(guard_va); | |||
| 1199 | #endif /* ! NO_PROPOLICE */ | |||
| 1200 | goto fail; | |||
| 1201 | } | |||
| 1202 | ||||
| 1203 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, | |||
| 1204 | &retguard_start_phys); | |||
| 1205 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, | |||
| 1206 | &retguard_end_phys); | |||
| 1207 | ||||
| 1208 | hibernate_preserve_entropy(&disk_hib); | |||
| 1209 | ||||
| 1210 | printf("Unpacking image...\n"); | |||
| 1211 | ||||
| 1212 | /* Switch stacks */ | |||
| 1213 | DPRINTF("hibernate: switching stacks\n"); | |||
| 1214 | hibernate_switch_stack_machdep(); | |||
| 1215 | ||||
| 1216 | #ifndef NO_PROPOLICE | |||
| 1217 | /* Start using suspended kernel's propolice guard */ | |||
| 1218 | *(long *)(guard_va + off) = disk_hib.guard; | |||
| 1219 | hibernate_reprotect_ssp(guard_va); | |||
| 1220 | #endif /* ! NO_PROPOLICE */ | |||
| 1221 | ||||
| 1222 | /* Unpack and resume */ | |||
| 1223 | hibernate_unpack_image(&disk_hib); | |||
| 1224 | ||||
| 1225 | fail: | |||
| 1226 | splx(s)spllower(s); | |||
| 1227 | printf("\nUnable to resume hibernated image\n"); | |||
| 1228 | } | |||
| 1229 | ||||
| 1230 | /* | |||
| 1231 | * Unpack image from pig area to original location by looping through the | |||
| 1232 | * list of output chunks in the order they should be restored (fchunks). | |||
| 1233 | * | |||
| 1234 | * Note that due to the stack smash protector and the fact that we have | |||
| 1235 | * switched stacks, it is not permitted to return from this function. | |||
| 1236 | */ | |||
| 1237 | void | |||
| 1238 | hibernate_unpack_image(union hibernate_info *hib) | |||
| 1239 | { | |||
| 1240 | struct hibernate_disk_chunk *chunks; | |||
| 1241 | union hibernate_info local_hib; | |||
| 1242 | paddr_t image_cur = global_pig_start; | |||
| 1243 | short i, *fchunks; | |||
| 1244 | char *pva; | |||
| 1245 | ||||
| 1246 | /* Piglet will be identity mapped (VA == PA) */ | |||
| 1247 | pva = (char *)hib->piglet_pa; | |||
| 1248 | ||||
| 1249 | fchunks = (short *)(pva + (4 * PAGE_SIZE(1 << 12))); | |||
| 1250 | ||||
| 1251 | chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE0x400000); | |||
| 1252 | ||||
| 1253 | /* Can't use hiber_info that's passed in after this point */ | |||
| 1254 | bcopy(hib, &local_hib, sizeof(union hibernate_info)); | |||
| 1255 | local_hib.retguard_ofs = 0; | |||
| 1256 | ||||
| 1257 | /* VA == PA */ | |||
| 1258 | local_hib.piglet_va = local_hib.piglet_pa; | |||
| 1259 | ||||
| 1260 | /* | |||
| 1261 | * Point of no return. Once we pass this point, only kernel code can | |||
| 1262 | * be accessed. No global variables or other kernel data structures | |||
| 1263 | * are guaranteed to be coherent after unpack starts. | |||
| 1264 | * | |||
| 1265 | * The image is now in high memory (pig area), we unpack from the pig | |||
| 1266 | * to the correct location in memory. We'll eventually end up copying | |||
| 1267 | * on top of ourself, but we are assured the kernel code here is the | |||
| 1268 | * same between the hibernated and resuming kernel, and we are running | |||
| 1269 | * on our own stack, so the overwrite is ok. | |||
| 1270 | */ | |||
| 1271 | DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); | |||
| 1272 | hibernate_activate_resume_pt_machdep(); | |||
| 1273 | ||||
| 1274 | for (i = 0; i < local_hib.chunk_ctr; i++) { | |||
| 1275 | /* Reset zlib for inflate */ | |||
| 1276 | if (hibernate_zlib_reset(&local_hib, 0) != Z_OK0) | |||
| 1277 | panic("hibernate failed to reset zlib for inflate"); | |||
| 1278 | ||||
| 1279 | hibernate_process_chunk(&local_hib, &chunks[fchunks[i]], | |||
| 1280 | image_cur); | |||
| 1281 | ||||
| 1282 | image_cur += chunks[fchunks[i]].compressed_size; | |||
| 1283 | } | |||
| 1284 | ||||
| 1285 | /* | |||
| 1286 | * Resume the loaded kernel by jumping to the MD resume vector. | |||
| 1287 | * We won't be returning from this call. We pass the location of | |||
| 1288 | * the retguard save area so the MD code can replace it before | |||
| 1289 | * resuming. See the piglet layout at the top of this file for | |||
| 1290 | * more information on the layout of the piglet area. | |||
| 1291 | * | |||
| 1292 | * We use 'global_piglet_va' here since by the time we are at | |||
| 1293 | * this point, we have already unpacked the image, and we want | |||
| 1294 | * the suspended kernel's view of what the piglet was, before | |||
| 1295 | * suspend occurred (since we will need to use that in the retguard | |||
| 1296 | * copy code in hibernate_resume_machdep.) | |||
| 1297 | */ | |||
| 1298 | hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE(1 << 12))); | |||
| 1299 | } | |||
| 1300 | ||||
| 1301 | /* | |||
| 1302 | * Bounce a compressed image chunk to the piglet, entering mappings for the | |||
| 1303 | * copied pages as needed | |||
| 1304 | */ | |||
| 1305 | void | |||
| 1306 | hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) | |||
| 1307 | { | |||
| 1308 | size_t ct, ofs; | |||
| 1309 | paddr_t src = img_cur; | |||
| 1310 | vaddr_t dest = piglet; | |||
| 1311 | ||||
| 1312 | /* Copy first partial page */ | |||
| 1313 | ct = (PAGE_SIZE(1 << 12)) - (src & PAGE_MASK((1 << 12) - 1)); | |||
| 1314 | ofs = (src & PAGE_MASK((1 << 12) - 1)); | |||
| 1315 | ||||
| 1316 | if (ct < PAGE_SIZE(1 << 12)) { | |||
| 1317 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
| 1318 | (src - ofs), 0); | |||
| 1319 | hibernate_flush(); | |||
| 1320 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33) + ofs), (caddr_t)dest, ct); | |||
| 1321 | src += ct; | |||
| 1322 | dest += ct; | |||
| 1323 | } | |||
| 1324 | ||||
| 1325 | /* Copy remaining pages */ | |||
| 1326 | while (src < size + img_cur) { | |||
| 1327 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), src, 0); | |||
| 1328 | hibernate_flush(); | |||
| 1329 | ct = PAGE_SIZE(1 << 12); | |||
| 1330 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33)), (caddr_t)dest, ct); | |||
| 1331 | hibernate_flush(); | |||
| 1332 | src += ct; | |||
| 1333 | dest += ct; | |||
| 1334 | } | |||
| 1335 | } | |||
| 1336 | ||||
| 1337 | /* | |||
| 1338 | * Process a chunk by bouncing it to the piglet, followed by unpacking | |||
| 1339 | */ | |||
| 1340 | void | |||
| 1341 | hibernate_process_chunk(union hibernate_info *hib, | |||
| 1342 | struct hibernate_disk_chunk *chunk, paddr_t img_cur) | |||
| 1343 | { | |||
| 1344 | char *pva = (char *)hib->piglet_va; | |||
| 1345 | ||||
| 1346 | hibernate_copy_chunk_to_piglet(img_cur, | |||
| 1347 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), chunk->compressed_size); | |||
| 1348 | hibernate_inflate_region(hib, chunk->base, | |||
| 1349 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), | |||
| 1350 | chunk->compressed_size); | |||
| 1351 | } | |||
| 1352 | ||||
| 1353 | /* | |||
| 1354 | * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between | |||
| 1355 | * inaddr and range_end. | |||
| 1356 | */ | |||
| 1357 | int | |||
| 1358 | hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) | |||
| 1359 | { | |||
| 1360 | int rle; | |||
| 1361 | ||||
| 1362 | rle = uvm_page_rle(inaddr); | |||
| 1363 | KASSERT(rle >= 0 && rle <= MAX_RLE)((rle >= 0 && rle <= (0x400000 / (1 << 12 ))) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1363, "rle >= 0 && rle <= MAX_RLE")); | |||
| 1364 | ||||
| 1365 | /* Clamp RLE to range end */ | |||
| 1366 | if (rle > 0 && inaddr + (rle * PAGE_SIZE(1 << 12)) > range_end) | |||
| 1367 | rle = (range_end - inaddr) / PAGE_SIZE(1 << 12); | |||
| 1368 | ||||
| 1369 | return (rle); | |||
| 1370 | } | |||
| 1371 | ||||
| 1372 | /* | |||
| 1373 | * Write the RLE byte for page at 'inaddr' to the output stream. | |||
| 1374 | * Returns the number of pages to be skipped at 'inaddr'. | |||
| 1375 | */ | |||
| 1376 | int | |||
| 1377 | hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, | |||
| 1378 | paddr_t range_end, daddr_t *blkctr, | |||
| 1379 | size_t *out_remaining) | |||
| 1380 | { | |||
| 1381 | int rle, err, *rleloc; | |||
| 1382 | struct hibernate_zlib_state *hibernate_state; | |||
| 1383 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
| 1384 | ||||
| 1385 | hibernate_state = | |||
| 1386 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 1387 | ||||
| 1388 | rle = hibernate_calc_rle(inaddr, range_end); | |||
| 1389 | ||||
| 1390 | rleloc = (int *)hibernate_rle_page + MAX_RLE(0x400000 / (1 << 12)) - 1; | |||
| 1391 | *rleloc = rle; | |||
| 1392 | ||||
| 1393 | /* Deflate the RLE byte into the stream */ | |||
| 1394 | hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); | |||
| 1395 | ||||
| 1396 | /* Did we fill the output page? If so, flush to disk */ | |||
| 1397 | if (*out_remaining == 0) { | |||
| 1398 | if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset, | |||
| 1399 | (vaddr_t)hibernate_io_page, PAGE_SIZE(1 << 12), HIB_W1, | |||
| 1400 | hib->io_page))) { | |||
| 1401 | DPRINTF("hib write error %d\n", err); | |||
| 1402 | return (err); | |||
| 1403 | } | |||
| 1404 | ||||
| 1405 | *blkctr += PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | |||
| 1406 | *out_remaining = PAGE_SIZE(1 << 12); | |||
| 1407 | ||||
| 1408 | /* If we didn't deflate the entire RLE byte, finish it now */ | |||
| 1409 | if (hibernate_state->hib_stream.avail_in != 0) | |||
| 1410 | hibernate_deflate(hib, | |||
| 1411 | (vaddr_t)hibernate_state->hib_stream.next_in, | |||
| 1412 | out_remaining); | |||
| 1413 | } | |||
| 1414 | ||||
| 1415 | return (rle); | |||
| 1416 | } | |||
| 1417 | ||||
| 1418 | /* | |||
| 1419 | * Write a compressed version of this machine's memory to disk, at the | |||
| 1420 | * precalculated swap offset: | |||
| 1421 | * | |||
| 1422 | * end of swap - signature block size - chunk table size - memory size | |||
| 1423 | * | |||
| 1424 | * The function begins by looping through each phys mem range, cutting each | |||
| 1425 | * one into MD sized chunks. These chunks are then compressed individually | |||
| 1426 | * and written out to disk, in phys mem order. Some chunks might compress | |||
| 1427 | * more than others, and for this reason, each chunk's size is recorded | |||
| 1428 | * in the chunk table, which is written to disk after the image has | |||
| 1429 | * properly been compressed and written (in hibernate_write_chunktable). | |||
| 1430 | * | |||
| 1431 | * When this function is called, the machine is nearly suspended - most | |||
| 1432 | * devices are quiesced/suspended, interrupts are off, and cold has | |||
| 1433 | * been set. This means that there can be no side effects once the | |||
| 1434 | * write has started, and the write function itself can also have no | |||
| 1435 | * side effects. This also means no printfs are permitted (since printf | |||
| 1436 | * has side effects.) | |||
| 1437 | * | |||
| 1438 | * Return values : | |||
| 1439 | * | |||
| 1440 | * 0 - success | |||
| 1441 | * EIO - I/O error occurred writing the chunks | |||
| 1442 | * EINVAL - Failed to write a complete range | |||
| 1443 | * ENOMEM - Memory allocation failure during preparation of the zlib arena | |||
| 1444 | */ | |||
| 1445 | int | |||
| 1446 | hibernate_write_chunks(union hibernate_info *hib) | |||
| 1447 | { | |||
| 1448 | paddr_t range_base, range_end, inaddr, temp_inaddr; | |||
| 1449 | size_t nblocks, out_remaining, used; | |||
| 1450 | struct hibernate_disk_chunk *chunks; | |||
| 1451 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
| 1452 | daddr_t blkctr = 0; | |||
| 1453 | int i, rle, err; | |||
| 1454 | struct hibernate_zlib_state *hibernate_state; | |||
| 1455 | ||||
| 1456 | hibernate_state = | |||
| 1457 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 1458 | ||||
| 1459 | hib->chunk_ctr = 0; | |||
| 1460 | ||||
| 1461 | /* | |||
| 1462 | * Map the utility VAs to the piglet. See the piglet map at the | |||
| 1463 | * top of this file for piglet layout information. | |||
| 1464 | */ | |||
| 1465 | hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE(1 << 12); | |||
| 1466 | hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE(1 << 12); | |||
| 1467 | ||||
| 1468 | chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + | |||
| 1469 | HIBERNATE_CHUNK_SIZE0x400000); | |||
| 1470 | ||||
| 1471 | /* Calculate the chunk regions */ | |||
| 1472 | for (i = 0; i < hib->nranges; i++) { | |||
| 1473 | range_base = hib->ranges[i].base; | |||
| 1474 | range_end = hib->ranges[i].end; | |||
| 1475 | ||||
| 1476 | inaddr = range_base; | |||
| 1477 | ||||
| 1478 | while (inaddr < range_end) { | |||
| 1479 | chunks[hib->chunk_ctr].base = inaddr; | |||
| 1480 | if (inaddr + HIBERNATE_CHUNK_SIZE0x400000 < range_end) | |||
| 1481 | chunks[hib->chunk_ctr].end = inaddr + | |||
| 1482 | HIBERNATE_CHUNK_SIZE0x400000; | |||
| 1483 | else | |||
| 1484 | chunks[hib->chunk_ctr].end = range_end; | |||
| 1485 | ||||
| 1486 | inaddr += HIBERNATE_CHUNK_SIZE0x400000; | |||
| 1487 | hib->chunk_ctr ++; | |||
| 1488 | } | |||
| 1489 | } | |||
| 1490 | ||||
| 1491 | uvm_pmr_dirty_everything(); | |||
| 1492 | uvm_pmr_zero_everything(); | |||
| 1493 | ||||
| 1494 | /* Compress and write the chunks in the chunktable */ | |||
| 1495 | for (i = 0; i < hib->chunk_ctr; i++) { | |||
| 1496 | range_base = chunks[i].base; | |||
| 1497 | range_end = chunks[i].end; | |||
| 1498 | ||||
| 1499 | chunks[i].offset = blkctr + hib->image_offset; | |||
| 1500 | ||||
| 1501 | /* Reset zlib for deflate */ | |||
| 1502 | if (hibernate_zlib_reset(hib, 1) != Z_OK0) { | |||
| 1503 | DPRINTF("hibernate_zlib_reset failed for deflate\n"); | |||
| 1504 | return (ENOMEM12); | |||
| 1505 | } | |||
| 1506 | ||||
| 1507 | inaddr = range_base; | |||
| 1508 | ||||
| 1509 | /* | |||
| 1510 | * For each range, loop through its phys mem region | |||
| 1511 | * and write out the chunks (the last chunk might be | |||
| 1512 | * smaller than the chunk size). | |||
| 1513 | */ | |||
| 1514 | while (inaddr < range_end) { | |||
| 1515 | out_remaining = PAGE_SIZE(1 << 12); | |||
| 1516 | while (out_remaining > 0 && inaddr < range_end) { | |||
| 1517 | /* | |||
| 1518 | * Adjust for regions that are not evenly | |||
| 1519 | * divisible by PAGE_SIZE or overflowed | |||
| 1520 | * pages from the previous iteration. | |||
| 1521 | */ | |||
| 1522 | temp_inaddr = (inaddr & PAGE_MASK((1 << 12) - 1)) + | |||
| 1523 | hibernate_copy_page; | |||
| 1524 | ||||
| 1525 | /* Deflate from temp_inaddr to IO page */ | |||
| 1526 | if (inaddr != range_end) { | |||
| 1527 | if (inaddr % PAGE_SIZE(1 << 12) == 0) { | |||
| 1528 | rle = hibernate_write_rle(hib, | |||
| 1529 | inaddr, | |||
| 1530 | range_end, | |||
| 1531 | &blkctr, | |||
| 1532 | &out_remaining); | |||
| 1533 | } | |||
| 1534 | ||||
| 1535 | if (rle == 0) { | |||
| 1536 | pmap_kenter_pa(hibernate_temp_page, | |||
| 1537 | inaddr & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1)), | |||
| 1538 | PROT_READ0x01); | |||
| 1539 | ||||
| 1540 | bcopy((caddr_t)hibernate_temp_page, | |||
| 1541 | (caddr_t)hibernate_copy_page, | |||
| 1542 | PAGE_SIZE(1 << 12)); | |||
| 1543 | inaddr += hibernate_deflate(hib, | |||
| 1544 | temp_inaddr, | |||
| 1545 | &out_remaining); | |||
| 1546 | } else { | |||
| 1547 | inaddr += rle * PAGE_SIZE(1 << 12); | |||
| 1548 | if (inaddr > range_end) | |||
| 1549 | inaddr = range_end; | |||
| 1550 | } | |||
| 1551 | ||||
| 1552 | } | |||
| 1553 | ||||
| 1554 | if (out_remaining == 0) { | |||
| 1555 | /* Filled up the page */ | |||
| 1556 | nblocks = PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | |||
| 1557 | ||||
| 1558 | if ((err = hib->io_func(hib->dev, | |||
| 1559 | blkctr + hib->image_offset, | |||
| 1560 | (vaddr_t)hibernate_io_page, | |||
| 1561 | PAGE_SIZE(1 << 12), HIB_W1, hib->io_page))) { | |||
| 1562 | DPRINTF("hib write error %d\n", | |||
| 1563 | err); | |||
| 1564 | return (err); | |||
| 1565 | } | |||
| 1566 | ||||
| 1567 | blkctr += nblocks; | |||
| 1568 | } | |||
| 1569 | } | |||
| 1570 | } | |||
| 1571 | ||||
| 1572 | if (inaddr != range_end) { | |||
| 1573 | DPRINTF("deflate range ended prematurely\n"); | |||
| 1574 | return (EINVAL22); | |||
| 1575 | } | |||
| 1576 | ||||
| 1577 | /* | |||
| 1578 | * End of range. Round up to next secsize bytes | |||
| 1579 | * after finishing compress | |||
| 1580 | */ | |||
| 1581 | if (out_remaining == 0) | |||
| 
 | ||||
| 1582 | out_remaining = PAGE_SIZE(1 << 12); | |||
| 1583 | ||||
| 1584 | /* Finish compress */ | |||
| 1585 | hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; | |||
| 1586 | hibernate_state->hib_stream.avail_in = 0; | |||
| 1587 | hibernate_state->hib_stream.next_out = | |||
| 1588 | (unsigned char *)hibernate_io_page + | |||
| 1589 | (PAGE_SIZE(1 << 12) - out_remaining); | |||
| 1590 | ||||
| 1591 | /* We have an extra output page available for finalize */ | |||
| 1592 | hibernate_state->hib_stream.avail_out = | |||
| 1593 | out_remaining + PAGE_SIZE(1 << 12); | |||
| 1594 | ||||
| 1595 | if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH4)) != | |||
| 1596 | Z_STREAM_END1) { | |||
| 1597 | DPRINTF("deflate error in output stream: %d\n", err); | |||
| 1598 | return (err); | |||
| 1599 | } | |||
| 1600 | ||||
| 1601 | out_remaining = hibernate_state->hib_stream.avail_out; | |||
| 1602 | ||||
| 1603 | used = 2 * PAGE_SIZE(1 << 12) - out_remaining; | |||
| 1604 | nblocks = used / DEV_BSIZE(1 << 9); | |||
| 1605 | ||||
| 1606 | /* Round up to next block if needed */ | |||
| 1607 | if (used % DEV_BSIZE(1 << 9) != 0) | |||
| 1608 | nblocks ++; | |||
| 1609 | ||||
| 1610 | /* Write final block(s) for this chunk */ | |||
| 1611 | if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, | |||
| 1612 | (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE(1 << 9), | |||
| 1613 | HIB_W1, hib->io_page))) { | |||
| 1614 | DPRINTF("hib final write error %d\n", err); | |||
| 1615 | return (err); | |||
| 1616 | } | |||
| 1617 | ||||
| 1618 | blkctr += nblocks; | |||
| 1619 | ||||
| 1620 | chunks[i].compressed_size = (blkctr + hib->image_offset - | |||
| 1621 | chunks[i].offset) * DEV_BSIZE(1 << 9); | |||
| 1622 | } | |||
| 1623 | ||||
| 1624 | hib->chunktable_offset = hib->image_offset + blkctr; | |||
| 1625 | return (0); | |||
| 1626 | } | |||
| 1627 | ||||
| 1628 | /* | |||
| 1629 | * Reset the zlib stream state and allocate a new hiballoc area for either | |||
| 1630 | * inflate or deflate. This function is called once for each hibernate chunk. | |||
| 1631 | * Calling hiballoc_init multiple times is acceptable since the memory it is | |||
| 1632 | * provided is unmanaged memory (stolen). We use the memory provided to us | |||
| 1633 | * by the piglet allocated via the supplied hib. | |||
| 1634 | */ | |||
| 1635 | int | |||
| 1636 | hibernate_zlib_reset(union hibernate_info *hib, int deflate) | |||
| 1637 | { | |||
| 1638 | vaddr_t hibernate_zlib_start; | |||
| 1639 | size_t hibernate_zlib_size; | |||
| 1640 | char *pva = (char *)hib->piglet_va; | |||
| 1641 | struct hibernate_zlib_state *hibernate_state; | |||
| 1642 | ||||
| 1643 | hibernate_state = | |||
| 1644 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
| 1645 | ||||
| 1646 | if (!deflate) | |||
| 1647 | pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK(((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL )))); | |||
| 1648 | ||||
| 1649 | /* | |||
| 1650 | * See piglet layout information at the start of this file for | |||
| 1651 | * information on the zlib page assignments. | |||
| 1652 | */ | |||
| 1653 | hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE(1 << 12))); | |||
| 1654 | hibernate_zlib_size = 80 * PAGE_SIZE(1 << 12); | |||
| 1655 | ||||
| 1656 | memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size)__builtin_memset(((void *)hibernate_zlib_start), (0), (hibernate_zlib_size )); | |||
| 1657 | memset(hibernate_state, 0, PAGE_SIZE)__builtin_memset((hibernate_state), (0), ((1 << 12))); | |||
| 1658 | ||||
| 1659 | /* Set up stream structure */ | |||
| 1660 | hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; | |||
| 1661 | hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; | |||
| 1662 | ||||
| 1663 | /* Initialize the hiballoc arena for zlib allocs/frees */ | |||
| 1664 | hiballoc_init(&hibernate_state->hiballoc_arena, | |||
| 1665 | (caddr_t)hibernate_zlib_start, hibernate_zlib_size); | |||
| 1666 | ||||
| 1667 | if (deflate) { | |||
| 1668 | return deflateInit(&hibernate_state->hib_stream,deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)) | |||
| 1669 | Z_BEST_SPEED)deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)); | |||
| 1670 | } else | |||
| 1671 | return inflateInit(&hibernate_state->hib_stream)inflateInit_((&hibernate_state->hib_stream), "1.3.0.1-motley" , (int)sizeof(z_stream)); | |||
| 1672 | } | |||
| 1673 | ||||
| 1674 | /* | |||
| 1675 | * Reads the hibernated memory image from disk, whose location and | |||
| 1676 | * size are recorded in hib. Begin by reading the persisted | |||
| 1677 | * chunk table, which records the original chunk placement location | |||
| 1678 | * and compressed size for each. Next, allocate a pig region of | |||
| 1679 | * sufficient size to hold the compressed image. Next, read the | |||
| 1680 | * chunks into the pig area (calling hibernate_read_chunks to do this), | |||
| 1681 | * and finally, if all of the above succeeds, clear the hibernate signature. | |||
| 1682 | * The function will then return to hibernate_resume, which will proceed | |||
| 1683 | * to unpack the pig image to the correct place in memory. | |||
| 1684 | */ | |||
| 1685 | int | |||
| 1686 | hibernate_read_image(union hibernate_info *hib) | |||
| 1687 | { | |||
| 1688 | size_t compressed_size, disk_size, chunktable_size, pig_sz; | |||
| 1689 | paddr_t image_start, image_end, pig_start, pig_end; | |||
| 1690 | struct hibernate_disk_chunk *chunks; | |||
| 1691 | daddr_t blkctr; | |||
| 1692 | vaddr_t chunktable = (vaddr_t)NULL((void *)0); | |||
| 1693 | paddr_t piglet_chunktable = hib->piglet_pa + | |||
| 1694 | HIBERNATE_CHUNK_SIZE0x400000; | |||
| 1695 | int i, status; | |||
| 1696 | ||||
| 1697 | status = 0; | |||
| 1698 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1699 | ||||
| 1700 | /* Calculate total chunk table size in disk blocks */ | |||
| 1701 | chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000 / DEV_BSIZE(1 << 9); | |||
| 1702 | ||||
| 1703 | blkctr = hib->chunktable_offset; | |||
| 1704 | ||||
| 1705 | chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE0x100000, &kv_any, | |||
| 1706 | &kp_none, &kd_nowait); | |||
| 1707 | ||||
| 1708 | if (!chunktable) | |||
| 1709 | return (1); | |||
| 1710 | ||||
| 1711 | /* Map chunktable pages */ | |||
| 1712 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; i += PAGE_SIZE(1 << 12)) | |||
| 1713 | pmap_kenter_pa(chunktable + i, piglet_chunktable + i, | |||
| 1714 | PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1715 | pmap_update(pmap_kernel()); | |||
| 1716 | ||||
| 1717 | /* Read the chunktable from disk into the piglet chunktable */ | |||
| 1718 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; | |||
| 1719 | i += MAXPHYS(64 * 1024), blkctr += MAXPHYS(64 * 1024)/DEV_BSIZE(1 << 9)) | |||
| 1720 | hibernate_block_io(hib, blkctr, MAXPHYS(64 * 1024), | |||
| 1721 | chunktable + i, 0); | |||
| 1722 | ||||
| 1723 | blkctr = hib->image_offset; | |||
| 1724 | compressed_size = 0; | |||
| 1725 | ||||
| 1726 | chunks = (struct hibernate_disk_chunk *)chunktable; | |||
| 1727 | ||||
| 1728 | for (i = 0; i < hib->chunk_ctr; i++) | |||
| 1729 | compressed_size += chunks[i].compressed_size; | |||
| 1730 | ||||
| 1731 | disk_size = compressed_size; | |||
| 1732 | ||||
| 1733 | printf("unhibernating @ block %lld length %luMB\n", | |||
| 1734 | hib->sig_offset - chunktable_size, | |||
| 1735 | compressed_size / (1024 * 1024)); | |||
| 1736 | ||||
| 1737 | /* Allocate the pig area */ | |||
| 1738 | pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE0x400000; | |||
| 1739 | if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM12) { | |||
| 1740 | status = 1; | |||
| 1741 | goto unmap; | |||
| 1742 | } | |||
| 1743 | ||||
| 1744 | pig_end = pig_start + pig_sz; | |||
| 1745 | ||||
| 1746 | /* Calculate image extents. Pig image must end on a chunk boundary. */ | |||
| 1747 | image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE0x400000 - 1); | |||
| 1748 | image_start = image_end - disk_size; | |||
| 1749 | ||||
| 1750 | hibernate_read_chunks(hib, image_start, image_end, disk_size, | |||
| 1751 | chunks); | |||
| 1752 | ||||
| 1753 | /* Prepare the resume time pmap/page table */ | |||
| 1754 | hibernate_populate_resume_pt(hib, image_start, image_end); | |||
| 1755 | ||||
| 1756 | unmap: | |||
| 1757 | /* Unmap chunktable pages */ | |||
| 1758 | pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE0x100000); | |||
| 1759 | pmap_update(pmap_kernel()); | |||
| 1760 | ||||
| 1761 | return (status); | |||
| 1762 | } | |||
| 1763 | ||||
| 1764 | /* | |||
| 1765 | * Read the hibernated memory chunks from disk (chunk information at this | |||
| 1766 | * point is stored in the piglet) into the pig area specified by | |||
| 1767 | * [pig_start .. pig_end]. Order the chunks so that the final chunk is the | |||
| 1768 | * only chunk with overlap possibilities. | |||
| 1769 | */ | |||
| 1770 | int | |||
| 1771 | hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, | |||
| 1772 | paddr_t pig_end, size_t image_compr_size, | |||
| 1773 | struct hibernate_disk_chunk *chunks) | |||
| 1774 | { | |||
| 1775 | paddr_t img_cur, piglet_base; | |||
| 1776 | daddr_t blkctr; | |||
| 1777 | size_t processed, compressed_size, read_size; | |||
| 1778 | int nchunks, nfchunks, num_io_pages; | |||
| 1779 | vaddr_t tempva, hibernate_fchunk_area; | |||
| 1780 | short *fchunks, i, j; | |||
| 1781 | ||||
| 1782 | tempva = (vaddr_t)NULL((void *)0); | |||
| 1783 | hibernate_fchunk_area = (vaddr_t)NULL((void *)0); | |||
| 1784 | nfchunks = 0; | |||
| 1785 | piglet_base = hib->piglet_pa; | |||
| 1786 | global_pig_start = pig_start; | |||
| 1787 | ||||
| 1788 | /* | |||
| 1789 | * These mappings go into the resuming kernel's page table, and are | |||
| 1790 | * used only during image read. They disappear from existence | |||
| 1791 | * when the suspended kernel is unpacked on top of us. | |||
| 1792 | */ | |||
| 1793 | tempva = (vaddr_t)km_alloc(MAXPHYS(64 * 1024) + PAGE_SIZE(1 << 12), &kv_any, &kp_none, | |||
| 1794 | &kd_nowait); | |||
| 1795 | if (!tempva) | |||
| 1796 | return (1); | |||
| 1797 | hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE(1 << 12), &kv_any, | |||
| 1798 | &kp_none, &kd_nowait); | |||
| 1799 | if (!hibernate_fchunk_area) | |||
| 1800 | return (1); | |||
| 1801 | ||||
| 1802 | /* Final output chunk ordering VA */ | |||
| 1803 | fchunks = (short *)hibernate_fchunk_area; | |||
| 1804 | ||||
| 1805 | /* Map the chunk ordering region */ | |||
| 1806 | for(i = 0; i < 24 ; i++) | |||
| 1807 | pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE(1 << 12)), | |||
| 1808 | piglet_base + ((4 + i) * PAGE_SIZE(1 << 12)), | |||
| 1809 | PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1810 | pmap_update(pmap_kernel()); | |||
| 1811 | ||||
| 1812 | nchunks = hib->chunk_ctr; | |||
| 1813 | ||||
| 1814 | /* Initially start all chunks as unplaced */ | |||
| 1815 | for (i = 0; i < nchunks; i++) | |||
| 1816 | chunks[i].flags = 0; | |||
| 1817 | ||||
| 1818 | /* | |||
| 1819 | * Search the list for chunks that are outside the pig area. These | |||
| 1820 | * can be placed first in the final output list. | |||
| 1821 | */ | |||
| 1822 | for (i = 0; i < nchunks; i++) { | |||
| 1823 | if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { | |||
| 1824 | fchunks[nfchunks] = i; | |||
| 1825 | nfchunks++; | |||
| 1826 | chunks[i].flags |= HIBERNATE_CHUNK_PLACED4; | |||
| 1827 | } | |||
| 1828 | } | |||
| 1829 | ||||
| 1830 | /* | |||
| 1831 | * Walk the ordering, place the chunks in ascending memory order. | |||
| 1832 | */ | |||
| 1833 | for (i = 0; i < nchunks; i++) { | |||
| 1834 | if (chunks[i].flags != HIBERNATE_CHUNK_PLACED4) { | |||
| 1835 | fchunks[nfchunks] = i; | |||
| 1836 | nfchunks++; | |||
| 1837 | chunks[i].flags = HIBERNATE_CHUNK_PLACED4; | |||
| 1838 | } | |||
| 1839 | } | |||
| 1840 | ||||
| 1841 | img_cur = pig_start; | |||
| 1842 | ||||
| 1843 | for (i = 0; i < nfchunks; i++) { | |||
| 1844 | blkctr = chunks[fchunks[i]].offset; | |||
| 1845 | processed = 0; | |||
| 1846 | compressed_size = chunks[fchunks[i]].compressed_size; | |||
| 1847 | ||||
| 1848 | while (processed < compressed_size) { | |||
| 1849 | if (compressed_size - processed >= MAXPHYS(64 * 1024)) | |||
| 1850 | read_size = MAXPHYS(64 * 1024); | |||
| 1851 | else | |||
| 1852 | read_size = compressed_size - processed; | |||
| 1853 | ||||
| 1854 | /* | |||
| 1855 | * We're reading read_size bytes, offset from the | |||
| 1856 | * start of a page by img_cur % PAGE_SIZE, so the | |||
| 1857 | * end will be read_size + (img_cur % PAGE_SIZE) | |||
| 1858 | * from the start of the first page. Round that | |||
| 1859 | * up to the next page size. | |||
| 1860 | */ | |||
| 1861 | num_io_pages = (read_size + (img_cur % PAGE_SIZE(1 << 12)) | |||
| 1862 | + PAGE_SIZE(1 << 12) - 1) / PAGE_SIZE(1 << 12); | |||
| 1863 | ||||
| 1864 | KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1)((num_io_pages <= (64 * 1024)/(1 << 12) + 1) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1864, "num_io_pages <= MAXPHYS/PAGE_SIZE + 1")); | |||
| 1865 | ||||
| 1866 | /* Map pages for this read */ | |||
| 1867 | for (j = 0; j < num_io_pages; j ++) | |||
| 1868 | pmap_kenter_pa(tempva + j * PAGE_SIZE(1 << 12), | |||
| 1869 | img_cur + j * PAGE_SIZE(1 << 12), | |||
| 1870 | PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1871 | ||||
| 1872 | pmap_update(pmap_kernel()); | |||
| 1873 | ||||
| 1874 | hibernate_block_io(hib, blkctr, read_size, | |||
| 1875 | tempva + (img_cur & PAGE_MASK((1 << 12) - 1)), 0); | |||
| 1876 | ||||
| 1877 | blkctr += (read_size / DEV_BSIZE(1 << 9)); | |||
| 1878 | ||||
| 1879 | pmap_kremove(tempva, num_io_pages * PAGE_SIZE(1 << 12)); | |||
| 1880 | pmap_update(pmap_kernel()); | |||
| 1881 | ||||
| 1882 | processed += read_size; | |||
| 1883 | img_cur += read_size; | |||
| 1884 | } | |||
| 1885 | } | |||
| 1886 | ||||
| 1887 | pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE(1 << 12)); | |||
| 1888 | pmap_update(pmap_kernel()); | |||
| 1889 | ||||
| 1890 | return (0); | |||
| 1891 | } | |||
| 1892 | ||||
| 1893 | /* | |||
| 1894 | * Hibernating a machine comprises the following operations: | |||
| 1895 | * 1. Calculating this machine's hibernate_info information | |||
| 1896 | * 2. Allocating a piglet and saving the piglet's physaddr | |||
| 1897 | * 3. Calculating the memory chunks | |||
| 1898 | * 4. Writing the compressed chunks to disk | |||
| 1899 | * 5. Writing the chunk table | |||
| 1900 | * 6. Writing the signature block (hibernate_info) | |||
| 1901 | * | |||
| 1902 | * On most architectures, the function calling hibernate_suspend would | |||
| 1903 | * then power off the machine using some MD-specific implementation. | |||
| 1904 | */ | |||
| 1905 | int | |||
| 1906 | hibernate_suspend(void) | |||
| 1907 | { | |||
| 1908 | union hibernate_info hib; | |||
| 1909 | u_long start, end; | |||
| 1910 | ||||
| 1911 | /* | |||
| 1912 | * Calculate memory ranges, swap offsets, etc. | |||
| 1913 | * This also allocates a piglet whose physaddr is stored in | |||
| 1914 | * hib->piglet_pa and vaddr stored in hib->piglet_va | |||
| 1915 | */ | |||
| 1916 | if (get_hibernate_info(&hib, 1)) { | |||
| 
 | ||||
| 1917 | DPRINTF("failed to obtain hibernate info\n"); | |||
| 1918 | return (1); | |||
| 1919 | } | |||
| 1920 | ||||
| 1921 | /* Find a page-addressed region in swap [start,end] */ | |||
| 1922 | if (uvm_hibswap(hib.dev, &start, &end)) { | |||
| 1923 | printf("hibernate: cannot find any swap\n"); | |||
| 1924 | return (1); | |||
| 1925 | } | |||
| 1926 | ||||
| 1927 | if (end - start < 1000) { | |||
| 1928 | printf("hibernate: insufficient swap (%lu is too small)\n", | |||
| 1929 | end - start + 1); | |||
| 1930 | return (1); | |||
| 1931 | } | |||
| 1932 | ||||
| 1933 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, | |||
| 1934 | &retguard_start_phys); | |||
| 1935 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, | |||
| 1936 | &retguard_end_phys); | |||
| 1937 | ||||
| 1938 | /* Calculate block offsets in swap */ | |||
| 1939 | hib.image_offset = ctod(start)((start) << (12 - 9)); | |||
| 1940 | ||||
| 1941 | DPRINTF("hibernate @ block %lld max-length %lu blocks\n", | |||
| 1942 | hib.image_offset, ctod(end) - ctod(start) + 1); | |||
| 1943 | ||||
| 1944 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1945 | DPRINTF("hibernate: writing chunks\n"); | |||
| 1946 | if (hibernate_write_chunks(&hib)) { | |||
| 1947 | DPRINTF("hibernate_write_chunks failed\n"); | |||
| 1948 | return (1); | |||
| 1949 | } | |||
| 1950 | ||||
| 1951 | DPRINTF("hibernate: writing chunktable\n"); | |||
| 1952 | if (hibernate_write_chunktable(&hib)) { | |||
| 1953 | DPRINTF("hibernate_write_chunktable failed\n"); | |||
| 1954 | return (1); | |||
| 1955 | } | |||
| 1956 | ||||
| 1957 | DPRINTF("hibernate: writing signature\n"); | |||
| 1958 | if (hibernate_write_signature(&hib)) { | |||
| 1959 | DPRINTF("hibernate_write_signature failed\n"); | |||
| 1960 | return (1); | |||
| 1961 | } | |||
| 1962 | ||||
| 1963 | /* Allow the disk to settle */ | |||
| 1964 | delay(500000)(*delay_func)(500000); | |||
| 1965 | ||||
| 1966 | /* | |||
| 1967 | * Give the device-specific I/O function a notification that we're | |||
| 1968 | * done, and that it can clean up or shutdown as needed. | |||
| 1969 | */ | |||
| 1970 | hib.io_func(hib.dev, 0, (vaddr_t)NULL((void *)0), 0, HIB_DONE-2, hib.io_page); | |||
| 1971 | return (0); | |||
| 1972 | } | |||
| 1973 | ||||
| 1974 | int | |||
| 1975 | hibernate_alloc(void) | |||
| 1976 | { | |||
| 1977 | KASSERT(global_piglet_va == 0)((global_piglet_va == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1977, "global_piglet_va == 0")); | |||
| 1978 | KASSERT(hibernate_temp_page == 0)((hibernate_temp_page == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 1978, "hibernate_temp_page == 0" )); | |||
| 1979 | ||||
| 1980 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 1981 | pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), | |||
| 1982 | PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1983 | ||||
| 1984 | /* Allocate a piglet, store its addresses in the supplied globals */ | |||
| 1985 | if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, | |||
| 1986 | HIBERNATE_CHUNK_SIZE0x400000 * 4, HIBERNATE_CHUNK_SIZE0x400000)) | |||
| 1987 | goto unmap; | |||
| 1988 | ||||
| 1989 | /* | |||
| 1990 | * Allocate VA for the temp page. | |||
| 1991 | * | |||
| 1992 | * This will become part of the suspended kernel and will | |||
| 1993 | * be freed in hibernate_free, upon resume (or hibernate | |||
| 1994 | * failure) | |||
| 1995 | */ | |||
| 1996 | hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, | |||
| 1997 | &kp_none, &kd_nowait); | |||
| 1998 | if (!hibernate_temp_page) { | |||
| 1999 | uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE0x400000); | |||
| 2000 | global_piglet_va = 0; | |||
| 2001 | goto unmap; | |||
| 2002 | } | |||
| 2003 | return (0); | |||
| 2004 | unmap: | |||
| 2005 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | |||
| 2006 | pmap_update(pmap_kernel()); | |||
| 2007 | return (ENOMEM12); | |||
| 2008 | } | |||
| 2009 | ||||
| 2010 | /* | |||
| 2011 | * Free items allocated by hibernate_alloc() | |||
| 2012 | */ | |||
| 2013 | void | |||
| 2014 | hibernate_free(void) | |||
| 2015 | { | |||
| 2016 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
| 2017 | ||||
| 2018 | if (global_piglet_va) | |||
| 2019 | uvm_pmr_free_piglet(global_piglet_va, | |||
| 2020 | 4 * HIBERNATE_CHUNK_SIZE0x400000); | |||
| 2021 | ||||
| 2022 | if (hibernate_temp_page) { | |||
| 2023 | pmap_kremove(hibernate_temp_page, PAGE_SIZE(1 << 12)); | |||
| 2024 | km_free((void *)hibernate_temp_page, PAGE_SIZE(1 << 12), | |||
| 2025 | &kv_any, &kp_none); | |||
| 2026 | } | |||
| 2027 | ||||
| 2028 | global_piglet_va = 0; | |||
| 2029 | hibernate_temp_page = 0; | |||
| 2030 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | |||
| 2031 | pmap_update(pmap_kernel()); | |||
| 2032 | } |