File: | kern/subr_hibernate.c |
Warning: | line 1535, column 14 The left operand of '==' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: subr_hibernate.c,v 1.132 2022/01/07 02:47:07 guenther Exp $ */ | ||||
2 | |||||
3 | /* | ||||
4 | * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> | ||||
5 | * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> | ||||
6 | * | ||||
7 | * Permission to use, copy, modify, and distribute this software for any | ||||
8 | * purpose with or without fee is hereby granted, provided that the above | ||||
9 | * copyright notice and this permission notice appear in all copies. | ||||
10 | * | ||||
11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||||
12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||||
13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||||
14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||
15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||||
16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||||
17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
18 | */ | ||||
19 | |||||
20 | #include <sys/hibernate.h> | ||||
21 | #include <sys/malloc.h> | ||||
22 | #include <sys/param.h> | ||||
23 | #include <sys/tree.h> | ||||
24 | #include <sys/systm.h> | ||||
25 | #include <sys/disklabel.h> | ||||
26 | #include <sys/disk.h> | ||||
27 | #include <sys/conf.h> | ||||
28 | #include <sys/buf.h> | ||||
29 | #include <sys/fcntl.h> | ||||
30 | #include <sys/stat.h> | ||||
31 | #include <sys/atomic.h> | ||||
32 | |||||
33 | #include <uvm/uvm.h> | ||||
34 | #include <uvm/uvm_swap.h> | ||||
35 | |||||
36 | #include <machine/hibernate.h> | ||||
37 | |||||
38 | /* Make sure the signature can fit in one block */ | ||||
39 | CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE)extern char _ctassert[(sizeof(union hibernate_info) <= (1 << 9)) ? 1 : -1 ] __attribute__((__unused__)); | ||||
40 | |||||
41 | /* | ||||
42 | * Hibernate piglet layout information | ||||
43 | * | ||||
44 | * The piglet is a scratch area of memory allocated by the suspending kernel. | ||||
45 | * Its phys and virt addrs are recorded in the signature block. The piglet is | ||||
46 | * used to guarantee an unused area of memory that can be used by the resuming | ||||
47 | * kernel for various things. The piglet is excluded during unpack operations. | ||||
48 | * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). | ||||
49 | * | ||||
50 | * Offset from piglet_base Purpose | ||||
51 | * ---------------------------------------------------------------------------- | ||||
52 | * 0 Private page for suspend I/O write functions | ||||
53 | * 1*PAGE_SIZE I/O page used during hibernate suspend | ||||
54 | * 2*PAGE_SIZE I/O page used during hibernate suspend | ||||
55 | * 3*PAGE_SIZE copy page used during hibernate suspend | ||||
56 | * 4*PAGE_SIZE final chunk ordering list (24 pages) | ||||
57 | * 28*PAGE_SIZE RLE utility page | ||||
58 | * 29*PAGE_SIZE start of hiballoc area | ||||
59 | * 30*PAGE_SIZE preserved entropy | ||||
60 | * 110*PAGE_SIZE end of hiballoc area (80 pages) | ||||
61 | * 366*PAGE_SIZE end of retguard preservation region (256 pages) | ||||
62 | * ... unused | ||||
63 | * HIBERNATE_CHUNK_SIZE start of hibernate chunk table | ||||
64 | * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked | ||||
65 | * 4*HIBERNATE_CHUNK_SIZE end of piglet | ||||
66 | */ | ||||
67 | |||||
68 | /* Temporary vaddr ranges used during hibernate */ | ||||
69 | vaddr_t hibernate_temp_page; | ||||
70 | vaddr_t hibernate_copy_page; | ||||
71 | vaddr_t hibernate_rle_page; | ||||
72 | |||||
73 | /* Hibernate info as read from disk during resume */ | ||||
74 | union hibernate_info disk_hib; | ||||
75 | |||||
76 | /* | ||||
77 | * Global copy of the pig start address. This needs to be a global as we | ||||
78 | * switch stacks after computing it - it can't be stored on the stack. | ||||
79 | */ | ||||
80 | paddr_t global_pig_start; | ||||
81 | |||||
82 | /* | ||||
83 | * Global copies of the piglet start addresses (PA/VA). We store these | ||||
84 | * as globals to avoid having to carry them around as parameters, as the | ||||
85 | * piglet is allocated early and freed late - its lifecycle extends beyond | ||||
86 | * that of the hibernate info union which is calculated on suspend/resume. | ||||
87 | */ | ||||
88 | vaddr_t global_piglet_va; | ||||
89 | paddr_t global_piglet_pa; | ||||
90 | |||||
91 | /* #define HIB_DEBUG */ | ||||
92 | #ifdef HIB_DEBUG | ||||
93 | int hib_debug = 99; | ||||
94 | #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) | ||||
95 | #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) | ||||
96 | #else | ||||
97 | #define DPRINTF(x...) | ||||
98 | #define DNPRINTF(n,x...) | ||||
99 | #endif | ||||
100 | |||||
101 | #ifndef NO_PROPOLICE | ||||
102 | extern long __guard_local; | ||||
103 | #endif /* ! NO_PROPOLICE */ | ||||
104 | |||||
105 | void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); | ||||
106 | int hibernate_calc_rle(paddr_t, paddr_t); | ||||
107 | int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, | ||||
108 | size_t *); | ||||
109 | |||||
110 | #define MAX_RLE(0x400000 / (1 << 12)) (HIBERNATE_CHUNK_SIZE0x400000 / PAGE_SIZE(1 << 12)) | ||||
111 | |||||
112 | /* | ||||
113 | * Hib alloc enforced alignment. | ||||
114 | */ | ||||
115 | #define HIB_ALIGN8 8 /* bytes alignment */ | ||||
116 | |||||
117 | /* | ||||
118 | * sizeof builtin operation, but with alignment constraint. | ||||
119 | */ | ||||
120 | #define HIB_SIZEOF(_type)((((sizeof(_type))+((8)-1))/(8))*(8)) roundup(sizeof(_type), HIB_ALIGN)((((sizeof(_type))+((8)-1))/(8))*(8)) | ||||
121 | |||||
122 | struct hiballoc_entry { | ||||
123 | size_t hibe_use; | ||||
124 | size_t hibe_space; | ||||
125 | RBT_ENTRY(hiballoc_entry)struct rb_entry hibe_entry; | ||||
126 | }; | ||||
127 | |||||
128 | /* | ||||
129 | * Sort hibernate memory ranges by ascending PA | ||||
130 | */ | ||||
131 | void | ||||
132 | hibernate_sort_ranges(union hibernate_info *hib_info) | ||||
133 | { | ||||
134 | int i, j; | ||||
135 | struct hibernate_memory_range *ranges; | ||||
136 | paddr_t base, end; | ||||
137 | |||||
138 | ranges = hib_info->ranges; | ||||
139 | |||||
140 | for (i = 1; i < hib_info->nranges; i++) { | ||||
141 | j = i; | ||||
142 | while (j > 0 && ranges[j - 1].base > ranges[j].base) { | ||||
143 | base = ranges[j].base; | ||||
144 | end = ranges[j].end; | ||||
145 | ranges[j].base = ranges[j - 1].base; | ||||
146 | ranges[j].end = ranges[j - 1].end; | ||||
147 | ranges[j - 1].base = base; | ||||
148 | ranges[j - 1].end = end; | ||||
149 | j--; | ||||
150 | } | ||||
151 | } | ||||
152 | } | ||||
153 | |||||
154 | /* | ||||
155 | * Compare hiballoc entries based on the address they manage. | ||||
156 | * | ||||
157 | * Since the address is fixed, relative to struct hiballoc_entry, | ||||
158 | * we just compare the hiballoc_entry pointers. | ||||
159 | */ | ||||
160 | static __inline int | ||||
161 | hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) | ||||
162 | { | ||||
163 | vaddr_t vl = (vaddr_t)l; | ||||
164 | vaddr_t vr = (vaddr_t)r; | ||||
165 | |||||
166 | return vl < vr ? -1 : (vl > vr); | ||||
167 | } | ||||
168 | |||||
169 | RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)extern const struct rb_type *const hiballoc_addr_RBT_TYPE; __attribute__ ((__unused__)) static inline void hiballoc_addr_RBT_INIT(struct hiballoc_addr *head) { _rb_init(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_INSERT (struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_insert(hiballoc_addr_RBT_TYPE, &head->rbh_root, elm ); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_REMOVE(struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_remove(hiballoc_addr_RBT_TYPE , &head->rbh_root, elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_FIND (struct hiballoc_addr *head, const struct hiballoc_entry *key ) { return _rb_find(hiballoc_addr_RBT_TYPE, &head->rbh_root , key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NFIND(struct hiballoc_addr *head, const struct hiballoc_entry *key) { return _rb_nfind(hiballoc_addr_RBT_TYPE , &head->rbh_root, key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_ROOT (struct hiballoc_addr *head) { return _rb_root(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_EMPTY(struct hiballoc_addr *head ) { return _rb_empty(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MIN (struct hiballoc_addr *head) { return _rb_min(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MAX(struct hiballoc_addr *head) { return _rb_max(hiballoc_addr_RBT_TYPE, &head-> rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NEXT(struct hiballoc_entry *elm) { return _rb_next(hiballoc_addr_RBT_TYPE, elm); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PREV (struct hiballoc_entry *elm) { return _rb_prev(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_LEFT(struct hiballoc_entry *elm) { return _rb_left(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline struct hiballoc_entry * hiballoc_addr_RBT_RIGHT (struct hiballoc_entry *elm) { return _rb_right(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PARENT(struct hiballoc_entry *elm) { return _rb_parent(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline void hiballoc_addr_RBT_SET_LEFT(struct hiballoc_entry *elm, struct hiballoc_entry *left) { _rb_set_left(hiballoc_addr_RBT_TYPE , elm, left); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_RIGHT(struct hiballoc_entry *elm, struct hiballoc_entry *right) { _rb_set_right(hiballoc_addr_RBT_TYPE , elm, right); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_PARENT(struct hiballoc_entry *elm, struct hiballoc_entry *parent) { _rb_set_parent(hiballoc_addr_RBT_TYPE , elm, parent); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_POISON(struct hiballoc_entry *elm, unsigned long poison) { _rb_poison(hiballoc_addr_RBT_TYPE, elm, poison ); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_CHECK (struct hiballoc_entry *elm, unsigned long poison) { return _rb_check (hiballoc_addr_RBT_TYPE, elm, poison); } | ||||
170 | |||||
171 | /* | ||||
172 | * Given a hiballoc entry, return the address it manages. | ||||
173 | */ | ||||
174 | static __inline void * | ||||
175 | hib_entry_to_addr(struct hiballoc_entry *entry) | ||||
176 | { | ||||
177 | caddr_t addr; | ||||
178 | |||||
179 | addr = (caddr_t)entry; | ||||
180 | addr += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | ||||
181 | return addr; | ||||
182 | } | ||||
183 | |||||
184 | /* | ||||
185 | * Given an address, find the hiballoc that corresponds. | ||||
186 | */ | ||||
187 | static __inline struct hiballoc_entry* | ||||
188 | hib_addr_to_entry(void *addr_param) | ||||
189 | { | ||||
190 | caddr_t addr; | ||||
191 | |||||
192 | addr = (caddr_t)addr_param; | ||||
193 | addr -= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | ||||
194 | return (struct hiballoc_entry*)addr; | ||||
195 | } | ||||
196 | |||||
197 | RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)static int hiballoc_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct hiballoc_entry *l = lptr, *r = rptr; return hibe_cmp(l, r); } static const struct rb_type hiballoc_addr_RBT_INFO = { hiballoc_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof (struct hiballoc_entry, hibe_entry), }; const struct rb_type * const hiballoc_addr_RBT_TYPE = &hiballoc_addr_RBT_INFO; | ||||
198 | |||||
199 | /* | ||||
200 | * Allocate memory from the arena. | ||||
201 | * | ||||
202 | * Returns NULL if no memory is available. | ||||
203 | */ | ||||
204 | void * | ||||
205 | hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) | ||||
206 | { | ||||
207 | struct hiballoc_entry *entry, *new_entry; | ||||
208 | size_t find_sz; | ||||
209 | |||||
210 | /* | ||||
211 | * Enforce alignment of HIB_ALIGN bytes. | ||||
212 | * | ||||
213 | * Note that, because the entry is put in front of the allocation, | ||||
214 | * 0-byte allocations are guaranteed a unique address. | ||||
215 | */ | ||||
216 | alloc_sz = roundup(alloc_sz, HIB_ALIGN)((((alloc_sz)+((8)-1))/(8))*(8)); | ||||
217 | |||||
218 | /* | ||||
219 | * Find an entry with hibe_space >= find_sz. | ||||
220 | * | ||||
221 | * If the root node is not large enough, we switch to tree traversal. | ||||
222 | * Because all entries are made at the bottom of the free space, | ||||
223 | * traversal from the end has a slightly better chance of yielding | ||||
224 | * a sufficiently large space. | ||||
225 | */ | ||||
226 | find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | ||||
227 | entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_ROOT(&arena->hib_addrs); | ||||
228 | if (entry != NULL((void *)0) && entry->hibe_space < find_sz) { | ||||
229 | RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs)for ((entry) = hiballoc_addr_RBT_MAX((&arena->hib_addrs )); (entry) != ((void *)0); (entry) = hiballoc_addr_RBT_PREV( (entry))) { | ||||
230 | if (entry->hibe_space >= find_sz) | ||||
231 | break; | ||||
232 | } | ||||
233 | } | ||||
234 | |||||
235 | /* | ||||
236 | * Insufficient or too fragmented memory. | ||||
237 | */ | ||||
238 | if (entry == NULL((void *)0)) | ||||
239 | return NULL((void *)0); | ||||
240 | |||||
241 | /* | ||||
242 | * Create new entry in allocated space. | ||||
243 | */ | ||||
244 | new_entry = (struct hiballoc_entry*)( | ||||
245 | (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); | ||||
246 | new_entry->hibe_space = entry->hibe_space - find_sz; | ||||
247 | new_entry->hibe_use = alloc_sz; | ||||
248 | |||||
249 | /* | ||||
250 | * Insert entry. | ||||
251 | */ | ||||
252 | if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, new_entry) != NULL((void *)0)) | ||||
253 | panic("hib_alloc: insert failure"); | ||||
254 | entry->hibe_space = 0; | ||||
255 | |||||
256 | /* Return address managed by entry. */ | ||||
257 | return hib_entry_to_addr(new_entry); | ||||
258 | } | ||||
259 | |||||
260 | void | ||||
261 | hib_getentropy(char **bufp, size_t *bufplen) | ||||
262 | { | ||||
263 | if (!bufp || !bufplen) | ||||
264 | return; | ||||
265 | |||||
266 | *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE(1 << 12))); | ||||
267 | *bufplen = PAGE_SIZE(1 << 12); | ||||
268 | } | ||||
269 | |||||
270 | /* | ||||
271 | * Free a pointer previously allocated from this arena. | ||||
272 | * | ||||
273 | * If addr is NULL, this will be silently accepted. | ||||
274 | */ | ||||
275 | void | ||||
276 | hib_free(struct hiballoc_arena *arena, void *addr) | ||||
277 | { | ||||
278 | struct hiballoc_entry *entry, *prev; | ||||
279 | |||||
280 | if (addr == NULL((void *)0)) | ||||
281 | return; | ||||
282 | |||||
283 | /* | ||||
284 | * Derive entry from addr and check it is really in this arena. | ||||
285 | */ | ||||
286 | entry = hib_addr_to_entry(addr); | ||||
287 | if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_FIND(&arena->hib_addrs, entry) != entry) | ||||
288 | panic("hib_free: freed item %p not in hib arena", addr); | ||||
289 | |||||
290 | /* | ||||
291 | * Give the space in entry to its predecessor. | ||||
292 | * | ||||
293 | * If entry has no predecessor, change its used space into free space | ||||
294 | * instead. | ||||
295 | */ | ||||
296 | prev = RBT_PREV(hiballoc_addr, entry)hiballoc_addr_RBT_PREV(entry); | ||||
297 | if (prev != NULL((void *)0) && | ||||
298 | (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | ||||
299 | prev->hibe_use + prev->hibe_space) == entry) { | ||||
300 | /* Merge entry. */ | ||||
301 | RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_REMOVE(&arena->hib_addrs, entry); | ||||
302 | prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | ||||
303 | entry->hibe_use + entry->hibe_space; | ||||
304 | } else { | ||||
305 | /* Flip used memory to free space. */ | ||||
306 | entry->hibe_space += entry->hibe_use; | ||||
307 | entry->hibe_use = 0; | ||||
308 | } | ||||
309 | } | ||||
310 | |||||
311 | /* | ||||
312 | * Initialize hiballoc. | ||||
313 | * | ||||
314 | * The allocator will manage memory at ptr, which is len bytes. | ||||
315 | */ | ||||
316 | int | ||||
317 | hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) | ||||
318 | { | ||||
319 | struct hiballoc_entry *entry; | ||||
320 | caddr_t ptr; | ||||
321 | size_t len; | ||||
322 | |||||
323 | RBT_INIT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_INIT(&arena->hib_addrs); | ||||
324 | |||||
325 | /* | ||||
326 | * Hib allocator enforces HIB_ALIGN alignment. | ||||
327 | * Fixup ptr and len. | ||||
328 | */ | ||||
329 | ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN)(((((vaddr_t)p_ptr)+((8)-1))/(8))*(8)); | ||||
330 | len = p_len - ((size_t)ptr - (size_t)p_ptr); | ||||
331 | len &= ~((size_t)HIB_ALIGN8 - 1); | ||||
332 | |||||
333 | /* | ||||
334 | * Insufficient memory to be able to allocate and also do bookkeeping. | ||||
335 | */ | ||||
336 | if (len <= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8))) | ||||
337 | return ENOMEM12; | ||||
338 | |||||
339 | /* | ||||
340 | * Create entry describing space. | ||||
341 | */ | ||||
342 | entry = (struct hiballoc_entry*)ptr; | ||||
343 | entry->hibe_use = 0; | ||||
344 | entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | ||||
345 | RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, entry); | ||||
346 | |||||
347 | return 0; | ||||
348 | } | ||||
349 | |||||
350 | /* | ||||
351 | * Zero all free memory. | ||||
352 | */ | ||||
353 | void | ||||
354 | uvm_pmr_zero_everything(void) | ||||
355 | { | ||||
356 | struct uvm_pmemrange *pmr; | ||||
357 | struct vm_page *pg; | ||||
358 | int i; | ||||
359 | |||||
360 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
361 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||
362 | /* Zero single pages. */ | ||||
363 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])((&pmr->single[0])->tqh_first)) | ||||
364 | != NULL((void *)0)) { | ||||
365 | uvm_pmr_remove(pmr, pg); | ||||
366 | uvm_pagezero(pg); | ||||
367 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||
368 | uvmexp.zeropages++; | ||||
369 | uvm_pmr_insert(pmr, pg, 0); | ||||
370 | } | ||||
371 | |||||
372 | /* Zero multi page ranges. */ | ||||
373 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[0]) | ||||
374 | &pmr->size[UVM_PMR_MEMTYPE_DIRTY])uvm_pmr_size_RBT_ROOT(&pmr->size[0])) != NULL((void *)0)) { | ||||
375 | pg--; /* Size tree always has second page. */ | ||||
376 | uvm_pmr_remove(pmr, pg); | ||||
377 | for (i = 0; i < pg->fpgsz; i++) { | ||||
378 | uvm_pagezero(&pg[i]); | ||||
379 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | ||||
380 | uvmexp.zeropages++; | ||||
381 | } | ||||
382 | uvm_pmr_insert(pmr, pg, 0); | ||||
383 | } | ||||
384 | } | ||||
385 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
386 | } | ||||
387 | |||||
388 | /* | ||||
389 | * Mark all memory as dirty. | ||||
390 | * | ||||
391 | * Used to inform the system that the clean memory isn't clean for some | ||||
392 | * reason, for example because we just came back from hibernate. | ||||
393 | */ | ||||
394 | void | ||||
395 | uvm_pmr_dirty_everything(void) | ||||
396 | { | ||||
397 | struct uvm_pmemrange *pmr; | ||||
398 | struct vm_page *pg; | ||||
399 | int i; | ||||
400 | |||||
401 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
402 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||
403 | /* Dirty single pages. */ | ||||
404 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])((&pmr->single[1])->tqh_first)) | ||||
405 | != NULL((void *)0)) { | ||||
406 | uvm_pmr_remove(pmr, pg); | ||||
407 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||
408 | uvm_pmr_insert(pmr, pg, 0); | ||||
409 | } | ||||
410 | |||||
411 | /* Dirty multi page ranges. */ | ||||
412 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[1]) | ||||
413 | &pmr->size[UVM_PMR_MEMTYPE_ZERO])uvm_pmr_size_RBT_ROOT(&pmr->size[1])) != NULL((void *)0)) { | ||||
414 | pg--; /* Size tree always has second page. */ | ||||
415 | uvm_pmr_remove(pmr, pg); | ||||
416 | for (i = 0; i < pg->fpgsz; i++) | ||||
417 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | ||||
418 | uvm_pmr_insert(pmr, pg, 0); | ||||
419 | } | ||||
420 | } | ||||
421 | |||||
422 | uvmexp.zeropages = 0; | ||||
423 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
424 | } | ||||
425 | |||||
426 | /* | ||||
427 | * Allocate an area that can hold sz bytes and doesn't overlap with | ||||
428 | * the piglet at piglet_pa. | ||||
429 | */ | ||||
430 | int | ||||
431 | uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) | ||||
432 | { | ||||
433 | struct uvm_constraint_range pig_constraint; | ||||
434 | struct kmem_pa_mode kp_pig = { | ||||
435 | .kp_constraint = &pig_constraint, | ||||
436 | .kp_maxseg = 1 | ||||
437 | }; | ||||
438 | vaddr_t va; | ||||
439 | |||||
440 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | ||||
441 | |||||
442 | pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE0x400000; | ||||
443 | pig_constraint.ucr_high = -1; | ||||
444 | |||||
445 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | ||||
446 | if (va == 0) { | ||||
447 | pig_constraint.ucr_low = 0; | ||||
448 | pig_constraint.ucr_high = piglet_pa - 1; | ||||
449 | |||||
450 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | ||||
451 | if (va == 0) | ||||
452 | return ENOMEM12; | ||||
453 | } | ||||
454 | |||||
455 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, pa); | ||||
456 | return 0; | ||||
457 | } | ||||
458 | |||||
459 | /* | ||||
460 | * Allocate a piglet area. | ||||
461 | * | ||||
462 | * This needs to be in DMA-safe memory. | ||||
463 | * Piglets are aligned. | ||||
464 | * | ||||
465 | * sz and align in bytes. | ||||
466 | * | ||||
467 | * The call will sleep for the pagedaemon to attempt to free memory. | ||||
468 | * The pagedaemon may decide its not possible to free enough memory, causing | ||||
469 | * the allocation to fail. | ||||
470 | */ | ||||
471 | int | ||||
472 | uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) | ||||
473 | { | ||||
474 | struct kmem_pa_mode kp_piglet = { | ||||
475 | .kp_constraint = &dma_constraint, | ||||
476 | .kp_align = align, | ||||
477 | .kp_maxseg = 1 | ||||
478 | }; | ||||
479 | |||||
480 | /* Ensure align is a power of 2 */ | ||||
481 | KASSERT((align & (align - 1)) == 0)(((align & (align - 1)) == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 481, "(align & (align - 1)) == 0" )); | ||||
482 | |||||
483 | /* | ||||
484 | * Fixup arguments: align must be at least PAGE_SIZE, | ||||
485 | * sz will be converted to pagecount, since that is what | ||||
486 | * pmemrange uses internally. | ||||
487 | */ | ||||
488 | if (align < PAGE_SIZE(1 << 12)) | ||||
489 | kp_piglet.kp_align = PAGE_SIZE(1 << 12); | ||||
490 | |||||
491 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | ||||
492 | |||||
493 | *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); | ||||
494 | if (*va == 0) | ||||
495 | return ENOMEM12; | ||||
496 | |||||
497 | pmap_extract(pmap_kernel()(&kernel_pmap_store), *va, pa); | ||||
498 | return 0; | ||||
499 | } | ||||
500 | |||||
501 | /* | ||||
502 | * Free a piglet area. | ||||
503 | */ | ||||
504 | void | ||||
505 | uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) | ||||
506 | { | ||||
507 | /* | ||||
508 | * Fix parameters. | ||||
509 | */ | ||||
510 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | ||||
511 | |||||
512 | /* | ||||
513 | * Free the physical and virtual memory. | ||||
514 | */ | ||||
515 | km_free((void *)va, sz, &kv_any, &kp_dma_contig); | ||||
516 | } | ||||
517 | |||||
518 | /* | ||||
519 | * Physmem RLE compression support. | ||||
520 | * | ||||
521 | * Given a physical page address, return the number of pages starting at the | ||||
522 | * address that are free. Clamps to the number of pages in | ||||
523 | * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. | ||||
524 | */ | ||||
525 | int | ||||
526 | uvm_page_rle(paddr_t addr) | ||||
527 | { | ||||
528 | struct vm_page *pg, *pg_end; | ||||
529 | struct vm_physseg *vmp; | ||||
530 | int pseg_idx, off_idx; | ||||
531 | |||||
532 | pseg_idx = vm_physseg_find(atop(addr)((addr) >> 12), &off_idx); | ||||
533 | if (pseg_idx == -1) | ||||
534 | return 0; | ||||
535 | |||||
536 | vmp = &vm_physmem[pseg_idx]; | ||||
537 | pg = &vmp->pgs[off_idx]; | ||||
538 | if (!(pg->pg_flags & PQ_FREE0x00010000)) | ||||
539 | return 0; | ||||
540 | |||||
541 | /* | ||||
542 | * Search for the first non-free page after pg. | ||||
543 | * Note that the page may not be the first page in a free pmemrange, | ||||
544 | * therefore pg->fpgsz cannot be used. | ||||
545 | */ | ||||
546 | for (pg_end = pg; pg_end <= vmp->lastpg && | ||||
547 | (pg_end->pg_flags & PQ_FREE0x00010000) == PQ_FREE0x00010000 && | ||||
548 | (pg_end - pg) < HIBERNATE_CHUNK_SIZE0x400000/PAGE_SIZE(1 << 12); pg_end++) | ||||
549 | ; | ||||
550 | return pg_end - pg; | ||||
551 | } | ||||
552 | |||||
553 | /* | ||||
554 | * Calculate a hopefully unique version # for this kernel, based upon | ||||
555 | * how it was linked. | ||||
556 | */ | ||||
557 | u_int32_t | ||||
558 | hibsum(void) | ||||
559 | { | ||||
560 | return ((long)malloc ^ (long)km_alloc ^ (long)printf ^ (long)strlen); | ||||
561 | } | ||||
562 | |||||
563 | |||||
564 | /* | ||||
565 | * Fills out the hibernate_info union pointed to by hib | ||||
566 | * with information about this machine (swap signature block | ||||
567 | * offsets, number of memory ranges, kernel in use, etc) | ||||
568 | */ | ||||
569 | int | ||||
570 | get_hibernate_info(union hibernate_info *hib, int suspend) | ||||
571 | { | ||||
572 | struct disklabel dl; | ||||
573 | char err_string[128], *dl_ret; | ||||
574 | int part; | ||||
575 | |||||
576 | #ifndef NO_PROPOLICE | ||||
577 | /* Save propolice guard */ | ||||
578 | hib->guard = __guard_local; | ||||
579 | #endif /* ! NO_PROPOLICE */ | ||||
580 | |||||
581 | /* Determine I/O function to use */ | ||||
582 | hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev); | ||||
583 | if (hib->io_func == NULL((void *)0)) | ||||
584 | return (1); | ||||
585 | |||||
586 | /* Calculate hibernate device */ | ||||
587 | hib->dev = swdevt[0].sw_dev; | ||||
588 | |||||
589 | /* Read disklabel (used to calculate signature and image offsets) */ | ||||
590 | dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); | ||||
591 | |||||
592 | if (dl_ret) { | ||||
593 | printf("Hibernate error reading disklabel: %s\n", dl_ret); | ||||
594 | return (1); | ||||
595 | } | ||||
596 | |||||
597 | /* Make sure we have a swap partition. */ | ||||
598 | part = DISKPART(hib->dev)(((unsigned)((hib->dev) & 0xff) | (((hib->dev) & 0xffff0000) >> 8)) % 16); | ||||
599 | if (dl.d_npartitions <= part || | ||||
600 | dl.d_partitions[part].p_fstype != FS_SWAP1 || | ||||
601 | DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) == 0) | ||||
602 | return (1); | ||||
603 | |||||
604 | /* Magic number */ | ||||
605 | hib->magic = HIBERNATE_MAGIC0x0B5D0B5D; | ||||
606 | |||||
607 | /* Calculate signature block location */ | ||||
608 | hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) - | ||||
609 | sizeof(union hibernate_info)/DEV_BSIZE(1 << 9); | ||||
610 | |||||
611 | /* Stash kernel version information */ | ||||
612 | memset(&hib->kernel_version, 0, 128)__builtin_memset((&hib->kernel_version), (0), (128)); | ||||
613 | bcopy(version, &hib->kernel_version, | ||||
614 | min(strlen(version), sizeof(hib->kernel_version)-1)); | ||||
615 | hib->kernel_sum = hibsum(); | ||||
616 | |||||
617 | if (suspend) { | ||||
618 | /* Grab the previously-allocated piglet addresses */ | ||||
619 | hib->piglet_va = global_piglet_va; | ||||
620 | hib->piglet_pa = global_piglet_pa; | ||||
621 | hib->io_page = (void *)hib->piglet_va; | ||||
622 | |||||
623 | /* | ||||
624 | * Initialization of the hibernate IO function for drivers | ||||
625 | * that need to do prep work (such as allocating memory or | ||||
626 | * setting up data structures that cannot safely be done | ||||
627 | * during suspend without causing side effects). There is | ||||
628 | * a matching HIB_DONE call performed after the write is | ||||
629 | * completed. | ||||
630 | */ | ||||
631 | if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_offseth << 32) + (&dl.d_partitions[part])->p_offset), | ||||
632 | (vaddr_t)NULL((void *)0), DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size), | ||||
633 | HIB_INIT-1, hib->io_page)) | ||||
634 | goto fail; | ||||
635 | |||||
636 | } else { | ||||
637 | /* | ||||
638 | * Resuming kernels use a regular private page for the driver | ||||
639 | * No need to free this I/O page as it will vanish as part of | ||||
640 | * the resume. | ||||
641 | */ | ||||
642 | hib->io_page = malloc(PAGE_SIZE(1 << 12), M_DEVBUF2, M_NOWAIT0x0002); | ||||
643 | if (!hib->io_page) | ||||
644 | goto fail; | ||||
645 | } | ||||
646 | |||||
647 | if (get_hibernate_info_md(hib)) | ||||
648 | goto fail; | ||||
649 | |||||
650 | return (0); | ||||
651 | |||||
652 | fail: | ||||
653 | return (1); | ||||
654 | } | ||||
655 | |||||
656 | /* | ||||
657 | * Allocate nitems*size bytes from the hiballoc area presently in use | ||||
658 | */ | ||||
659 | void * | ||||
660 | hibernate_zlib_alloc(void *unused, int nitems, int size) | ||||
661 | { | ||||
662 | struct hibernate_zlib_state *hibernate_state; | ||||
663 | |||||
664 | hibernate_state = | ||||
665 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
666 | |||||
667 | return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); | ||||
668 | } | ||||
669 | |||||
670 | /* | ||||
671 | * Free the memory pointed to by addr in the hiballoc area presently in | ||||
672 | * use | ||||
673 | */ | ||||
674 | void | ||||
675 | hibernate_zlib_free(void *unused, void *addr) | ||||
676 | { | ||||
677 | struct hibernate_zlib_state *hibernate_state; | ||||
678 | |||||
679 | hibernate_state = | ||||
680 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
681 | |||||
682 | hib_free(&hibernate_state->hiballoc_arena, addr); | ||||
683 | } | ||||
684 | |||||
685 | /* | ||||
686 | * Inflate next page of data from the image stream. | ||||
687 | * The rle parameter is modified on exit to contain the number of pages to | ||||
688 | * skip in the output stream (or 0 if this page was inflated into). | ||||
689 | * | ||||
690 | * Returns 0 if the stream contains additional data, or 1 if the stream is | ||||
691 | * finished. | ||||
692 | */ | ||||
693 | int | ||||
694 | hibernate_inflate_page(int *rle) | ||||
695 | { | ||||
696 | struct hibernate_zlib_state *hibernate_state; | ||||
697 | int i; | ||||
698 | |||||
699 | hibernate_state = | ||||
700 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
701 | |||||
702 | /* Set up the stream for RLE code inflate */ | ||||
703 | hibernate_state->hib_stream.next_out = (unsigned char *)rle; | ||||
704 | hibernate_state->hib_stream.avail_out = sizeof(*rle); | ||||
705 | |||||
706 | /* Inflate RLE code */ | ||||
707 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | ||||
708 | if (i != Z_OK0 && i != Z_STREAM_END1) { | ||||
709 | /* | ||||
710 | * XXX - this will likely reboot/hang most machines | ||||
711 | * since the console output buffer will be unmapped, | ||||
712 | * but there's not much else we can do here. | ||||
713 | */ | ||||
714 | panic("rle inflate stream error"); | ||||
715 | } | ||||
716 | |||||
717 | if (hibernate_state->hib_stream.avail_out != 0) { | ||||
718 | /* | ||||
719 | * XXX - this will likely reboot/hang most machines | ||||
720 | * since the console output buffer will be unmapped, | ||||
721 | * but there's not much else we can do here. | ||||
722 | */ | ||||
723 | panic("rle short inflate error"); | ||||
724 | } | ||||
725 | |||||
726 | if (*rle < 0 || *rle > 1024) { | ||||
727 | /* | ||||
728 | * XXX - this will likely reboot/hang most machines | ||||
729 | * since the console output buffer will be unmapped, | ||||
730 | * but there's not much else we can do here. | ||||
731 | */ | ||||
732 | panic("invalid rle count"); | ||||
733 | } | ||||
734 | |||||
735 | if (i == Z_STREAM_END1) | ||||
736 | return (1); | ||||
737 | |||||
738 | if (*rle != 0) | ||||
739 | return (0); | ||||
740 | |||||
741 | /* Set up the stream for page inflate */ | ||||
742 | hibernate_state->hib_stream.next_out = | ||||
743 | (unsigned char *)HIBERNATE_INFLATE_PAGE((1 << 12) * 33); | ||||
744 | hibernate_state->hib_stream.avail_out = PAGE_SIZE(1 << 12); | ||||
745 | |||||
746 | /* Process next block of data */ | ||||
747 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | ||||
748 | if (i != Z_OK0 && i != Z_STREAM_END1) { | ||||
749 | /* | ||||
750 | * XXX - this will likely reboot/hang most machines | ||||
751 | * since the console output buffer will be unmapped, | ||||
752 | * but there's not much else we can do here. | ||||
753 | */ | ||||
754 | panic("inflate error"); | ||||
755 | } | ||||
756 | |||||
757 | /* We should always have extracted a full page ... */ | ||||
758 | if (hibernate_state->hib_stream.avail_out != 0) { | ||||
759 | /* | ||||
760 | * XXX - this will likely reboot/hang most machines | ||||
761 | * since the console output buffer will be unmapped, | ||||
762 | * but there's not much else we can do here. | ||||
763 | */ | ||||
764 | panic("incomplete page"); | ||||
765 | } | ||||
766 | |||||
767 | return (i == Z_STREAM_END1); | ||||
768 | } | ||||
769 | |||||
770 | /* | ||||
771 | * Inflate size bytes from src into dest, skipping any pages in | ||||
772 | * [src..dest] that are special (see hibernate_inflate_skip) | ||||
773 | * | ||||
774 | * This function executes while using the resume-time stack | ||||
775 | * and pmap, and therefore cannot use ddb/printf/etc. Doing so | ||||
776 | * will likely hang or reset the machine since the console output buffer | ||||
777 | * will be unmapped. | ||||
778 | */ | ||||
779 | void | ||||
780 | hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, | ||||
781 | paddr_t src, size_t size) | ||||
782 | { | ||||
783 | int end_stream = 0, rle, skip; | ||||
784 | struct hibernate_zlib_state *hibernate_state; | ||||
785 | |||||
786 | hibernate_state = | ||||
787 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
788 | |||||
789 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | ||||
790 | hibernate_state->hib_stream.avail_in = size; | ||||
791 | |||||
792 | do { | ||||
793 | /* | ||||
794 | * Is this a special page? If yes, redirect the | ||||
795 | * inflate output to a scratch page (eg, discard it) | ||||
796 | */ | ||||
797 | skip = hibernate_inflate_skip(hib, dest); | ||||
798 | if (skip == HIB_SKIP1) { | ||||
799 | hibernate_enter_resume_mapping( | ||||
800 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | ||||
801 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), 0); | ||||
802 | } else if (skip == HIB_MOVE2) { | ||||
803 | /* | ||||
804 | * Special case : retguard region. This gets moved | ||||
805 | * temporarily into the piglet region and copied into | ||||
806 | * place immediately before resume | ||||
807 | */ | ||||
808 | hibernate_enter_resume_mapping( | ||||
809 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | ||||
810 | hib->piglet_pa + (110 * PAGE_SIZE(1 << 12)) + | ||||
811 | hib->retguard_ofs, 0); | ||||
812 | hib->retguard_ofs += PAGE_SIZE(1 << 12); | ||||
813 | if (hib->retguard_ofs > 255 * PAGE_SIZE(1 << 12)) { | ||||
814 | /* | ||||
815 | * XXX - this will likely reboot/hang most | ||||
816 | * machines since the console output | ||||
817 | * buffer will be unmapped, but there's | ||||
818 | * not much else we can do here. | ||||
819 | */ | ||||
820 | panic("retguard move error, out of space"); | ||||
821 | } | ||||
822 | } else { | ||||
823 | hibernate_enter_resume_mapping( | ||||
824 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), dest, 0); | ||||
825 | } | ||||
826 | |||||
827 | hibernate_flush(); | ||||
828 | end_stream = hibernate_inflate_page(&rle); | ||||
829 | |||||
830 | if (rle == 0) | ||||
831 | dest += PAGE_SIZE(1 << 12); | ||||
832 | else | ||||
833 | dest += (rle * PAGE_SIZE(1 << 12)); | ||||
834 | } while (!end_stream); | ||||
835 | } | ||||
836 | |||||
837 | /* | ||||
838 | * deflate from src into the I/O page, up to 'remaining' bytes | ||||
839 | * | ||||
840 | * Returns number of input bytes consumed, and may reset | ||||
841 | * the 'remaining' parameter if not all the output space was consumed | ||||
842 | * (this information is needed to know how much to write to disk) | ||||
843 | */ | ||||
844 | size_t | ||||
845 | hibernate_deflate(union hibernate_info *hib, paddr_t src, | ||||
846 | size_t *remaining) | ||||
847 | { | ||||
848 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | ||||
849 | struct hibernate_zlib_state *hibernate_state; | ||||
850 | |||||
851 | hibernate_state = | ||||
852 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
853 | |||||
854 | /* Set up the stream for deflate */ | ||||
855 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | ||||
856 | hibernate_state->hib_stream.avail_in = PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1)); | ||||
857 | hibernate_state->hib_stream.next_out = | ||||
858 | (unsigned char *)hibernate_io_page + (PAGE_SIZE(1 << 12) - *remaining); | ||||
859 | hibernate_state->hib_stream.avail_out = *remaining; | ||||
860 | |||||
861 | /* Process next block of data */ | ||||
862 | if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2) != Z_OK0) | ||||
863 | panic("hibernate zlib deflate error"); | ||||
864 | |||||
865 | /* Update pointers and return number of bytes consumed */ | ||||
866 | *remaining = hibernate_state->hib_stream.avail_out; | ||||
867 | return (PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1))) - | ||||
868 | hibernate_state->hib_stream.avail_in; | ||||
869 | } | ||||
870 | |||||
871 | /* | ||||
872 | * Write the hibernation information specified in hiber_info | ||||
873 | * to the location in swap previously calculated (last block of | ||||
874 | * swap), called the "signature block". | ||||
875 | */ | ||||
876 | int | ||||
877 | hibernate_write_signature(union hibernate_info *hib) | ||||
878 | { | ||||
879 | /* Write hibernate info to disk */ | ||||
880 | return (hib->io_func(hib->dev, hib->sig_offset, | ||||
881 | (vaddr_t)hib, DEV_BSIZE(1 << 9), HIB_W1, | ||||
882 | hib->io_page)); | ||||
883 | } | ||||
884 | |||||
885 | /* | ||||
886 | * Write the memory chunk table to the area in swap immediately | ||||
887 | * preceding the signature block. The chunk table is stored | ||||
888 | * in the piglet when this function is called. Returns errno. | ||||
889 | */ | ||||
890 | int | ||||
891 | hibernate_write_chunktable(union hibernate_info *hib) | ||||
892 | { | ||||
893 | vaddr_t hibernate_chunk_table_start; | ||||
894 | size_t hibernate_chunk_table_size; | ||||
895 | int i, err; | ||||
896 | |||||
897 | hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000; | ||||
898 | |||||
899 | hibernate_chunk_table_start = hib->piglet_va + | ||||
900 | HIBERNATE_CHUNK_SIZE0x400000; | ||||
901 | |||||
902 | /* Write chunk table */ | ||||
903 | for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS(64 * 1024)) { | ||||
904 | if ((err = hib->io_func(hib->dev, | ||||
905 | hib->chunktable_offset + (i/DEV_BSIZE(1 << 9)), | ||||
906 | (vaddr_t)(hibernate_chunk_table_start + i), | ||||
907 | MAXPHYS(64 * 1024), HIB_W1, hib->io_page))) { | ||||
908 | DPRINTF("chunktable write error: %d\n", err); | ||||
909 | return (err); | ||||
910 | } | ||||
911 | } | ||||
912 | |||||
913 | return (0); | ||||
914 | } | ||||
915 | |||||
916 | /* | ||||
917 | * Write an empty hiber_info to the swap signature block, which is | ||||
918 | * guaranteed to not match any valid hib. | ||||
919 | */ | ||||
920 | int | ||||
921 | hibernate_clear_signature(union hibernate_info *hib) | ||||
922 | { | ||||
923 | union hibernate_info blank_hiber_info; | ||||
924 | |||||
925 | /* Zero out a blank hiber_info */ | ||||
926 | memset(&blank_hiber_info, 0, sizeof(union hibernate_info))__builtin_memset((&blank_hiber_info), (0), (sizeof(union hibernate_info ))); | ||||
927 | |||||
928 | /* Write (zeroed) hibernate info to disk */ | ||||
929 | DPRINTF("clearing hibernate signature block location: %lld\n", | ||||
930 | hib->sig_offset); | ||||
931 | if (hibernate_block_io(hib, | ||||
932 | hib->sig_offset, | ||||
933 | DEV_BSIZE(1 << 9), (vaddr_t)&blank_hiber_info, 1)) | ||||
934 | printf("Warning: could not clear hibernate signature\n"); | ||||
935 | |||||
936 | return (0); | ||||
937 | } | ||||
938 | |||||
939 | /* | ||||
940 | * Compare two hibernate_infos to determine if they are the same (eg, | ||||
941 | * we should be performing a hibernate resume on this machine. | ||||
942 | * Not all fields are checked - just enough to verify that the machine | ||||
943 | * has the same memory configuration and kernel as the one that | ||||
944 | * wrote the signature previously. | ||||
945 | */ | ||||
946 | int | ||||
947 | hibernate_compare_signature(union hibernate_info *mine, | ||||
948 | union hibernate_info *disk) | ||||
949 | { | ||||
950 | u_int i; | ||||
951 | |||||
952 | if (mine->nranges != disk->nranges) { | ||||
953 | printf("unhibernate failed: memory layout changed\n"); | ||||
954 | return (1); | ||||
955 | } | ||||
956 | |||||
957 | if (strcmp(mine->kernel_version, disk->kernel_version) != 0) { | ||||
958 | printf("unhibernate failed: original kernel changed\n"); | ||||
959 | return (1); | ||||
960 | } | ||||
961 | |||||
962 | if (hibsum() != disk->kernel_sum) { | ||||
963 | printf("unhibernate failed: original kernel changed\n"); | ||||
964 | return (1); | ||||
965 | } | ||||
966 | |||||
967 | for (i = 0; i < mine->nranges; i++) { | ||||
968 | if ((mine->ranges[i].base != disk->ranges[i].base) || | ||||
969 | (mine->ranges[i].end != disk->ranges[i].end) ) { | ||||
970 | DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", | ||||
971 | i, | ||||
972 | (void *)mine->ranges[i].base, | ||||
973 | (void *)mine->ranges[i].end, | ||||
974 | (void *)disk->ranges[i].base, | ||||
975 | (void *)disk->ranges[i].end); | ||||
976 | printf("unhibernate failed: memory size changed\n"); | ||||
977 | return (1); | ||||
978 | } | ||||
979 | } | ||||
980 | |||||
981 | return (0); | ||||
982 | } | ||||
983 | |||||
984 | /* | ||||
985 | * Transfers xfer_size bytes between the hibernate device specified in | ||||
986 | * hib_info at offset blkctr and the vaddr specified at dest. | ||||
987 | * | ||||
988 | * Separate offsets and pages are used to handle misaligned reads (reads | ||||
989 | * that span a page boundary). | ||||
990 | * | ||||
991 | * blkctr specifies a relative offset (relative to the start of swap), | ||||
992 | * not an absolute disk offset | ||||
993 | * | ||||
994 | */ | ||||
995 | int | ||||
996 | hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, | ||||
997 | size_t xfer_size, vaddr_t dest, int iswrite) | ||||
998 | { | ||||
999 | struct buf *bp; | ||||
1000 | struct bdevsw *bdsw; | ||||
1001 | int error; | ||||
1002 | |||||
1003 | bp = geteblk(xfer_size); | ||||
1004 | bdsw = &bdevsw[major(hib->dev)(((unsigned)(hib->dev) >> 8) & 0xff)]; | ||||
1005 | |||||
1006 | error = (*bdsw->d_open)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1007 | if (error) { | ||||
1008 | printf("hibernate_block_io open failed\n"); | ||||
1009 | return (1); | ||||
1010 | } | ||||
1011 | |||||
1012 | if (iswrite) | ||||
1013 | bcopy((caddr_t)dest, bp->b_data, xfer_size); | ||||
1014 | |||||
1015 | bp->b_bcount = xfer_size; | ||||
1016 | bp->b_blkno = blkctr; | ||||
1017 | CLR(bp->b_flags, B_READ | B_WRITE | B_DONE)((bp->b_flags) &= ~(0x00008000 | 0x00000000 | 0x00000100 )); | ||||
1018 | SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW)((bp->b_flags) |= (0x00000010 | (iswrite ? 0x00000000 : 0x00008000 ) | 0x00004000)); | ||||
1019 | bp->b_dev = hib->dev; | ||||
1020 | (*bdsw->d_strategy)(bp); | ||||
1021 | |||||
1022 | error = biowait(bp); | ||||
1023 | if (error) { | ||||
1024 | printf("hib block_io biowait error %d blk %lld size %zu\n", | ||||
1025 | error, (long long)blkctr, xfer_size); | ||||
1026 | error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR0020000, | ||||
1027 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1028 | if (error) | ||||
1029 | printf("hibernate_block_io error close failed\n"); | ||||
1030 | return (1); | ||||
1031 | } | ||||
1032 | |||||
1033 | error = (*bdsw->d_close)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1034 | if (error) { | ||||
1035 | printf("hibernate_block_io close failed\n"); | ||||
1036 | return (1); | ||||
1037 | } | ||||
1038 | |||||
1039 | if (!iswrite) | ||||
1040 | bcopy(bp->b_data, (caddr_t)dest, xfer_size); | ||||
1041 | |||||
1042 | bp->b_flags |= B_INVAL0x00000800; | ||||
1043 | brelse(bp); | ||||
1044 | |||||
1045 | return (0); | ||||
1046 | } | ||||
1047 | |||||
1048 | /* | ||||
1049 | * Preserve one page worth of random data, generated from the resuming | ||||
1050 | * kernel's arc4random. After resume, this preserved entropy can be used | ||||
1051 | * to further improve the un-hibernated machine's entropy pool. This | ||||
1052 | * random data is stored in the piglet, which is preserved across the | ||||
1053 | * unpack operation, and is restored later in the resume process (see | ||||
1054 | * hib_getentropy) | ||||
1055 | */ | ||||
1056 | void | ||||
1057 | hibernate_preserve_entropy(union hibernate_info *hib) | ||||
1058 | { | ||||
1059 | void *entropy; | ||||
1060 | |||||
1061 | entropy = km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_nowait); | ||||
1062 | |||||
1063 | if (!entropy) | ||||
1064 | return; | ||||
1065 | |||||
1066 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1067 | pmap_kenter_pa((vaddr_t)entropy, | ||||
1068 | (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE(1 << 12))), | ||||
1069 | PROT_READ0x01 | PROT_WRITE0x02); | ||||
1070 | |||||
1071 | arc4random_buf((void *)entropy, PAGE_SIZE(1 << 12)); | ||||
1072 | pmap_kremove((vaddr_t)entropy, PAGE_SIZE(1 << 12)); | ||||
1073 | km_free(entropy, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | ||||
1074 | } | ||||
1075 | |||||
1076 | #ifndef NO_PROPOLICE | ||||
1077 | vaddr_t | ||||
1078 | hibernate_unprotect_ssp(void) | ||||
1079 | { | ||||
1080 | struct kmem_dyn_mode kd_avoidalias; | ||||
1081 | vaddr_t va = trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | ||||
1082 | paddr_t pa; | ||||
1083 | |||||
1084 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | ||||
1085 | |||||
1086 | memset(&kd_avoidalias, 0, sizeof kd_avoidalias)__builtin_memset((&kd_avoidalias), (0), (sizeof kd_avoidalias )); | ||||
1087 | kd_avoidalias.kd_prefer = pa; | ||||
1088 | kd_avoidalias.kd_waitok = 1; | ||||
1089 | va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_avoidalias); | ||||
1090 | if (!va) | ||||
1091 | panic("hibernate_unprotect_ssp"); | ||||
1092 | |||||
1093 | pmap_kenter_pa(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | ||||
1094 | pmap_update(pmap_kernel()); | ||||
1095 | |||||
1096 | return va; | ||||
1097 | } | ||||
1098 | |||||
1099 | void | ||||
1100 | hibernate_reprotect_ssp(vaddr_t va) | ||||
1101 | { | ||||
1102 | pmap_kremove(va, PAGE_SIZE(1 << 12)); | ||||
1103 | km_free((void *)va, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | ||||
1104 | } | ||||
1105 | #endif /* NO_PROPOLICE */ | ||||
1106 | |||||
1107 | /* | ||||
1108 | * Reads the signature block from swap, checks against the current machine's | ||||
1109 | * information. If the information matches, perform a resume by reading the | ||||
1110 | * saved image into the pig area, and unpacking. | ||||
1111 | * | ||||
1112 | * Must be called with interrupts enabled. | ||||
1113 | */ | ||||
1114 | void | ||||
1115 | hibernate_resume(void) | ||||
1116 | { | ||||
1117 | union hibernate_info hib; | ||||
1118 | int s; | ||||
1119 | #ifndef NO_PROPOLICE | ||||
1120 | vsize_t off = (vaddr_t)&__guard_local - | ||||
1121 | trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | ||||
1122 | vaddr_t guard_va; | ||||
1123 | #endif | ||||
1124 | |||||
1125 | /* Get current running machine's hibernate info */ | ||||
1126 | memset(&hib, 0, sizeof(hib))__builtin_memset((&hib), (0), (sizeof(hib))); | ||||
1127 | if (get_hibernate_info(&hib, 0)) { | ||||
1128 | DPRINTF("couldn't retrieve machine's hibernate info\n"); | ||||
1129 | return; | ||||
1130 | } | ||||
1131 | |||||
1132 | /* Read hibernate info from disk */ | ||||
1133 | s = splbio()splraise(0x6); | ||||
1134 | |||||
1135 | DPRINTF("reading hibernate signature block location: %lld\n", | ||||
1136 | hib.sig_offset); | ||||
1137 | |||||
1138 | if (hibernate_block_io(&hib, | ||||
1139 | hib.sig_offset, | ||||
1140 | DEV_BSIZE(1 << 9), (vaddr_t)&disk_hib, 0)) { | ||||
1141 | DPRINTF("error in hibernate read"); | ||||
1142 | splx(s)spllower(s); | ||||
1143 | return; | ||||
1144 | } | ||||
1145 | |||||
1146 | /* Check magic number */ | ||||
1147 | if (disk_hib.magic != HIBERNATE_MAGIC0x0B5D0B5D) { | ||||
1148 | DPRINTF("wrong magic number in hibernate signature: %x\n", | ||||
1149 | disk_hib.magic); | ||||
1150 | splx(s)spllower(s); | ||||
1151 | return; | ||||
1152 | } | ||||
1153 | |||||
1154 | /* | ||||
1155 | * We (possibly) found a hibernate signature. Clear signature first, | ||||
1156 | * to prevent accidental resume or endless resume cycles later. | ||||
1157 | */ | ||||
1158 | if (hibernate_clear_signature(&hib)) { | ||||
1159 | DPRINTF("error clearing hibernate signature block\n"); | ||||
1160 | splx(s)spllower(s); | ||||
1161 | return; | ||||
1162 | } | ||||
1163 | |||||
1164 | /* | ||||
1165 | * If on-disk and in-memory hibernate signatures match, | ||||
1166 | * this means we should do a resume from hibernate. | ||||
1167 | */ | ||||
1168 | if (hibernate_compare_signature(&hib, &disk_hib)) { | ||||
1169 | DPRINTF("mismatched hibernate signature block\n"); | ||||
1170 | splx(s)spllower(s); | ||||
1171 | return; | ||||
1172 | } | ||||
1173 | disk_hib.dev = hib.dev; | ||||
1174 | |||||
1175 | #ifdef MULTIPROCESSOR1 | ||||
1176 | /* XXX - if we fail later, we may need to rehatch APs on some archs */ | ||||
1177 | DPRINTF("hibernate: quiescing APs\n"); | ||||
1178 | hibernate_quiesce_cpus(); | ||||
1179 | #endif /* MULTIPROCESSOR */ | ||||
1180 | |||||
1181 | /* Read the image from disk into the image (pig) area */ | ||||
1182 | if (hibernate_read_image(&disk_hib)) | ||||
1183 | goto fail; | ||||
1184 | |||||
1185 | DPRINTF("hibernate: quiescing devices\n"); | ||||
1186 | if (config_suspend_all(DVACT_QUIESCE2) != 0) | ||||
1187 | goto fail; | ||||
1188 | |||||
1189 | #ifndef NO_PROPOLICE | ||||
1190 | guard_va = hibernate_unprotect_ssp(); | ||||
1191 | #endif /* NO_PROPOLICE */ | ||||
1192 | |||||
1193 | (void) splhigh()splraise(0xd); | ||||
1194 | hibernate_disable_intr_machdep(); | ||||
1195 | cold = 1; | ||||
1196 | |||||
1197 | DPRINTF("hibernate: suspending devices\n"); | ||||
1198 | if (config_suspend_all(DVACT_SUSPEND3) != 0) { | ||||
1199 | cold = 0; | ||||
1200 | hibernate_enable_intr_machdep(); | ||||
1201 | #ifndef NO_PROPOLICE | ||||
1202 | hibernate_reprotect_ssp(guard_va); | ||||
1203 | #endif /* ! NO_PROPOLICE */ | ||||
1204 | goto fail; | ||||
1205 | } | ||||
1206 | |||||
1207 | hibernate_preserve_entropy(&disk_hib); | ||||
1208 | |||||
1209 | printf("Unpacking image...\n"); | ||||
1210 | |||||
1211 | /* Switch stacks */ | ||||
1212 | DPRINTF("hibernate: switching stacks\n"); | ||||
1213 | hibernate_switch_stack_machdep(); | ||||
1214 | |||||
1215 | #ifndef NO_PROPOLICE | ||||
1216 | /* Start using suspended kernel's propolice guard */ | ||||
1217 | *(long *)(guard_va + off) = disk_hib.guard; | ||||
1218 | hibernate_reprotect_ssp(guard_va); | ||||
1219 | #endif /* ! NO_PROPOLICE */ | ||||
1220 | |||||
1221 | /* Unpack and resume */ | ||||
1222 | hibernate_unpack_image(&disk_hib); | ||||
1223 | |||||
1224 | fail: | ||||
1225 | splx(s)spllower(s); | ||||
1226 | printf("\nUnable to resume hibernated image\n"); | ||||
1227 | } | ||||
1228 | |||||
1229 | /* | ||||
1230 | * Unpack image from pig area to original location by looping through the | ||||
1231 | * list of output chunks in the order they should be restored (fchunks). | ||||
1232 | * | ||||
1233 | * Note that due to the stack smash protector and the fact that we have | ||||
1234 | * switched stacks, it is not permitted to return from this function. | ||||
1235 | */ | ||||
1236 | void | ||||
1237 | hibernate_unpack_image(union hibernate_info *hib) | ||||
1238 | { | ||||
1239 | struct hibernate_disk_chunk *chunks; | ||||
1240 | union hibernate_info local_hib; | ||||
1241 | paddr_t image_cur = global_pig_start; | ||||
1242 | short i, *fchunks; | ||||
1243 | char *pva; | ||||
1244 | |||||
1245 | /* Piglet will be identity mapped (VA == PA) */ | ||||
1246 | pva = (char *)hib->piglet_pa; | ||||
1247 | |||||
1248 | fchunks = (short *)(pva + (4 * PAGE_SIZE(1 << 12))); | ||||
1249 | |||||
1250 | chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE0x400000); | ||||
1251 | |||||
1252 | /* Can't use hiber_info that's passed in after this point */ | ||||
1253 | bcopy(hib, &local_hib, sizeof(union hibernate_info)); | ||||
1254 | local_hib.retguard_ofs = 0; | ||||
1255 | |||||
1256 | /* VA == PA */ | ||||
1257 | local_hib.piglet_va = local_hib.piglet_pa; | ||||
1258 | |||||
1259 | /* | ||||
1260 | * Point of no return. Once we pass this point, only kernel code can | ||||
1261 | * be accessed. No global variables or other kernel data structures | ||||
1262 | * are guaranteed to be coherent after unpack starts. | ||||
1263 | * | ||||
1264 | * The image is now in high memory (pig area), we unpack from the pig | ||||
1265 | * to the correct location in memory. We'll eventually end up copying | ||||
1266 | * on top of ourself, but we are assured the kernel code here is the | ||||
1267 | * same between the hibernated and resuming kernel, and we are running | ||||
1268 | * on our own stack, so the overwrite is ok. | ||||
1269 | */ | ||||
1270 | DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); | ||||
1271 | hibernate_activate_resume_pt_machdep(); | ||||
1272 | |||||
1273 | for (i = 0; i < local_hib.chunk_ctr; i++) { | ||||
1274 | /* Reset zlib for inflate */ | ||||
1275 | if (hibernate_zlib_reset(&local_hib, 0) != Z_OK0) | ||||
1276 | panic("hibernate failed to reset zlib for inflate"); | ||||
1277 | |||||
1278 | hibernate_process_chunk(&local_hib, &chunks[fchunks[i]], | ||||
1279 | image_cur); | ||||
1280 | |||||
1281 | image_cur += chunks[fchunks[i]].compressed_size; | ||||
1282 | |||||
1283 | } | ||||
1284 | |||||
1285 | /* | ||||
1286 | * Resume the loaded kernel by jumping to the MD resume vector. | ||||
1287 | * We won't be returning from this call. We pass the location of | ||||
1288 | * the retguard save area so the MD code can replace it before | ||||
1289 | * resuming. See the piglet layout at the top of this file for | ||||
1290 | * more information on the layout of the piglet area. | ||||
1291 | * | ||||
1292 | * We use 'global_piglet_va' here since by the time we are at | ||||
1293 | * this point, we have already unpacked the image, and we want | ||||
1294 | * the suspended kernel's view of what the piglet was, before | ||||
1295 | * suspend occurred (since we will need to use that in the retguard | ||||
1296 | * copy code in hibernate_resume_machdep.) | ||||
1297 | */ | ||||
1298 | hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE(1 << 12))); | ||||
1299 | } | ||||
1300 | |||||
1301 | /* | ||||
1302 | * Bounce a compressed image chunk to the piglet, entering mappings for the | ||||
1303 | * copied pages as needed | ||||
1304 | */ | ||||
1305 | void | ||||
1306 | hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) | ||||
1307 | { | ||||
1308 | size_t ct, ofs; | ||||
1309 | paddr_t src = img_cur; | ||||
1310 | vaddr_t dest = piglet; | ||||
1311 | |||||
1312 | /* Copy first partial page */ | ||||
1313 | ct = (PAGE_SIZE(1 << 12)) - (src & PAGE_MASK((1 << 12) - 1)); | ||||
1314 | ofs = (src & PAGE_MASK((1 << 12) - 1)); | ||||
1315 | |||||
1316 | if (ct < PAGE_SIZE(1 << 12)) { | ||||
1317 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | ||||
1318 | (src - ofs), 0); | ||||
1319 | hibernate_flush(); | ||||
1320 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33) + ofs), (caddr_t)dest, ct); | ||||
1321 | src += ct; | ||||
1322 | dest += ct; | ||||
1323 | } | ||||
1324 | |||||
1325 | /* Copy remaining pages */ | ||||
1326 | while (src < size + img_cur) { | ||||
1327 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), src, 0); | ||||
1328 | hibernate_flush(); | ||||
1329 | ct = PAGE_SIZE(1 << 12); | ||||
1330 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33)), (caddr_t)dest, ct); | ||||
1331 | hibernate_flush(); | ||||
1332 | src += ct; | ||||
1333 | dest += ct; | ||||
1334 | } | ||||
1335 | } | ||||
1336 | |||||
1337 | /* | ||||
1338 | * Process a chunk by bouncing it to the piglet, followed by unpacking | ||||
1339 | */ | ||||
1340 | void | ||||
1341 | hibernate_process_chunk(union hibernate_info *hib, | ||||
1342 | struct hibernate_disk_chunk *chunk, paddr_t img_cur) | ||||
1343 | { | ||||
1344 | char *pva = (char *)hib->piglet_va; | ||||
1345 | |||||
1346 | hibernate_copy_chunk_to_piglet(img_cur, | ||||
1347 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), chunk->compressed_size); | ||||
1348 | hibernate_inflate_region(hib, chunk->base, | ||||
1349 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), | ||||
1350 | chunk->compressed_size); | ||||
1351 | } | ||||
1352 | |||||
1353 | /* | ||||
1354 | * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between | ||||
1355 | * inaddr and range_end. | ||||
1356 | */ | ||||
1357 | int | ||||
1358 | hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) | ||||
1359 | { | ||||
1360 | int rle; | ||||
1361 | |||||
1362 | rle = uvm_page_rle(inaddr); | ||||
1363 | KASSERT(rle >= 0 && rle <= MAX_RLE)((rle >= 0 && rle <= (0x400000 / (1 << 12 ))) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1363, "rle >= 0 && rle <= MAX_RLE")); | ||||
1364 | |||||
1365 | /* Clamp RLE to range end */ | ||||
1366 | if (rle > 0 && inaddr + (rle * PAGE_SIZE(1 << 12)) > range_end) | ||||
1367 | rle = (range_end - inaddr) / PAGE_SIZE(1 << 12); | ||||
1368 | |||||
1369 | return (rle); | ||||
1370 | } | ||||
1371 | |||||
1372 | /* | ||||
1373 | * Write the RLE byte for page at 'inaddr' to the output stream. | ||||
1374 | * Returns the number of pages to be skipped at 'inaddr'. | ||||
1375 | */ | ||||
1376 | int | ||||
1377 | hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, | ||||
1378 | paddr_t range_end, daddr_t *blkctr, | ||||
1379 | size_t *out_remaining) | ||||
1380 | { | ||||
1381 | int rle, err, *rleloc; | ||||
1382 | struct hibernate_zlib_state *hibernate_state; | ||||
1383 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | ||||
1384 | |||||
1385 | hibernate_state = | ||||
1386 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
1387 | |||||
1388 | rle = hibernate_calc_rle(inaddr, range_end); | ||||
1389 | |||||
1390 | rleloc = (int *)hibernate_rle_page + MAX_RLE(0x400000 / (1 << 12)) - 1; | ||||
1391 | *rleloc = rle; | ||||
1392 | |||||
1393 | /* Deflate the RLE byte into the stream */ | ||||
1394 | hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); | ||||
1395 | |||||
1396 | /* Did we fill the output page? If so, flush to disk */ | ||||
1397 | if (*out_remaining == 0) { | ||||
1398 | if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset, | ||||
1399 | (vaddr_t)hibernate_io_page, PAGE_SIZE(1 << 12), HIB_W1, | ||||
1400 | hib->io_page))) { | ||||
1401 | DPRINTF("hib write error %d\n", err); | ||||
1402 | return (err); | ||||
1403 | } | ||||
1404 | |||||
1405 | *blkctr += PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | ||||
1406 | *out_remaining = PAGE_SIZE(1 << 12); | ||||
1407 | |||||
1408 | /* If we didn't deflate the entire RLE byte, finish it now */ | ||||
1409 | if (hibernate_state->hib_stream.avail_in != 0) | ||||
1410 | hibernate_deflate(hib, | ||||
1411 | (vaddr_t)hibernate_state->hib_stream.next_in, | ||||
1412 | out_remaining); | ||||
1413 | } | ||||
1414 | |||||
1415 | return (rle); | ||||
1416 | } | ||||
1417 | |||||
1418 | /* | ||||
1419 | * Write a compressed version of this machine's memory to disk, at the | ||||
1420 | * precalculated swap offset: | ||||
1421 | * | ||||
1422 | * end of swap - signature block size - chunk table size - memory size | ||||
1423 | * | ||||
1424 | * The function begins by looping through each phys mem range, cutting each | ||||
1425 | * one into MD sized chunks. These chunks are then compressed individually | ||||
1426 | * and written out to disk, in phys mem order. Some chunks might compress | ||||
1427 | * more than others, and for this reason, each chunk's size is recorded | ||||
1428 | * in the chunk table, which is written to disk after the image has | ||||
1429 | * properly been compressed and written (in hibernate_write_chunktable). | ||||
1430 | * | ||||
1431 | * When this function is called, the machine is nearly suspended - most | ||||
1432 | * devices are quiesced/suspended, interrupts are off, and cold has | ||||
1433 | * been set. This means that there can be no side effects once the | ||||
1434 | * write has started, and the write function itself can also have no | ||||
1435 | * side effects. This also means no printfs are permitted (since printf | ||||
1436 | * has side effects.) | ||||
1437 | * | ||||
1438 | * Return values : | ||||
1439 | * | ||||
1440 | * 0 - success | ||||
1441 | * EIO - I/O error occurred writing the chunks | ||||
1442 | * EINVAL - Failed to write a complete range | ||||
1443 | * ENOMEM - Memory allocation failure during preparation of the zlib arena | ||||
1444 | */ | ||||
1445 | int | ||||
1446 | hibernate_write_chunks(union hibernate_info *hib) | ||||
1447 | { | ||||
1448 | paddr_t range_base, range_end, inaddr, temp_inaddr; | ||||
1449 | size_t nblocks, out_remaining, used; | ||||
1450 | struct hibernate_disk_chunk *chunks; | ||||
1451 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | ||||
1452 | daddr_t blkctr = 0; | ||||
1453 | int i, rle, err; | ||||
1454 | struct hibernate_zlib_state *hibernate_state; | ||||
1455 | |||||
1456 | hibernate_state = | ||||
1457 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
1458 | |||||
1459 | hib->chunk_ctr = 0; | ||||
1460 | |||||
1461 | /* | ||||
1462 | * Map the utility VAs to the piglet. See the piglet map at the | ||||
1463 | * top of this file for piglet layout information. | ||||
1464 | */ | ||||
1465 | hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE(1 << 12); | ||||
1466 | hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE(1 << 12); | ||||
1467 | |||||
1468 | chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + | ||||
1469 | HIBERNATE_CHUNK_SIZE0x400000); | ||||
1470 | |||||
1471 | /* Calculate the chunk regions */ | ||||
1472 | for (i = 0; i < hib->nranges; i++) { | ||||
1473 | range_base = hib->ranges[i].base; | ||||
1474 | range_end = hib->ranges[i].end; | ||||
1475 | |||||
1476 | inaddr = range_base; | ||||
1477 | |||||
1478 | while (inaddr
| ||||
1479 | chunks[hib->chunk_ctr].base = inaddr; | ||||
1480 | if (inaddr + HIBERNATE_CHUNK_SIZE0x400000 < range_end) | ||||
1481 | chunks[hib->chunk_ctr].end = inaddr + | ||||
1482 | HIBERNATE_CHUNK_SIZE0x400000; | ||||
1483 | else | ||||
1484 | chunks[hib->chunk_ctr].end = range_end; | ||||
1485 | |||||
1486 | inaddr += HIBERNATE_CHUNK_SIZE0x400000; | ||||
1487 | hib->chunk_ctr ++; | ||||
1488 | } | ||||
1489 | } | ||||
1490 | |||||
1491 | uvm_pmr_dirty_everything(); | ||||
1492 | uvm_pmr_zero_everything(); | ||||
1493 | |||||
1494 | /* Compress and write the chunks in the chunktable */ | ||||
1495 | for (i = 0; i < hib->chunk_ctr; i++) { | ||||
1496 | range_base = chunks[i].base; | ||||
1497 | range_end = chunks[i].end; | ||||
1498 | |||||
1499 | chunks[i].offset = blkctr + hib->image_offset; | ||||
1500 | |||||
1501 | /* Reset zlib for deflate */ | ||||
1502 | if (hibernate_zlib_reset(hib, 1) != Z_OK0) { | ||||
1503 | DPRINTF("hibernate_zlib_reset failed for deflate\n"); | ||||
1504 | return (ENOMEM12); | ||||
1505 | } | ||||
1506 | |||||
1507 | inaddr = range_base; | ||||
1508 | |||||
1509 | /* | ||||
1510 | * For each range, loop through its phys mem region | ||||
1511 | * and write out the chunks (the last chunk might be | ||||
1512 | * smaller than the chunk size). | ||||
1513 | */ | ||||
1514 | while (inaddr
| ||||
1515 | out_remaining = PAGE_SIZE(1 << 12); | ||||
1516 | while (out_remaining
| ||||
1517 | /* | ||||
1518 | * Adjust for regions that are not evenly | ||||
1519 | * divisible by PAGE_SIZE or overflowed | ||||
1520 | * pages from the previous iteration. | ||||
1521 | */ | ||||
1522 | temp_inaddr = (inaddr & PAGE_MASK((1 << 12) - 1)) + | ||||
1523 | hibernate_copy_page; | ||||
1524 | |||||
1525 | /* Deflate from temp_inaddr to IO page */ | ||||
1526 | if (inaddr
| ||||
1527 | if (inaddr % PAGE_SIZE(1 << 12) == 0) { | ||||
1528 | rle = hibernate_write_rle(hib, | ||||
1529 | inaddr, | ||||
1530 | range_end, | ||||
1531 | &blkctr, | ||||
1532 | &out_remaining); | ||||
1533 | } | ||||
1534 | |||||
1535 | if (rle == 0) { | ||||
| |||||
1536 | pmap_kenter_pa(hibernate_temp_page, | ||||
1537 | inaddr & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1)), | ||||
1538 | PROT_READ0x01); | ||||
1539 | |||||
1540 | bcopy((caddr_t)hibernate_temp_page, | ||||
1541 | (caddr_t)hibernate_copy_page, | ||||
1542 | PAGE_SIZE(1 << 12)); | ||||
1543 | inaddr += hibernate_deflate(hib, | ||||
1544 | temp_inaddr, | ||||
1545 | &out_remaining); | ||||
1546 | } else { | ||||
1547 | inaddr += rle * PAGE_SIZE(1 << 12); | ||||
1548 | if (inaddr > range_end) | ||||
1549 | inaddr = range_end; | ||||
1550 | } | ||||
1551 | |||||
1552 | } | ||||
1553 | |||||
1554 | if (out_remaining == 0) { | ||||
1555 | /* Filled up the page */ | ||||
1556 | nblocks = PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | ||||
1557 | |||||
1558 | if ((err = hib->io_func(hib->dev, | ||||
1559 | blkctr + hib->image_offset, | ||||
1560 | (vaddr_t)hibernate_io_page, | ||||
1561 | PAGE_SIZE(1 << 12), HIB_W1, hib->io_page))) { | ||||
1562 | DPRINTF("hib write error %d\n", | ||||
1563 | err); | ||||
1564 | return (err); | ||||
1565 | } | ||||
1566 | |||||
1567 | blkctr += nblocks; | ||||
1568 | } | ||||
1569 | } | ||||
1570 | } | ||||
1571 | |||||
1572 | if (inaddr != range_end) { | ||||
1573 | DPRINTF("deflate range ended prematurely\n"); | ||||
1574 | return (EINVAL22); | ||||
1575 | } | ||||
1576 | |||||
1577 | /* | ||||
1578 | * End of range. Round up to next secsize bytes | ||||
1579 | * after finishing compress | ||||
1580 | */ | ||||
1581 | if (out_remaining == 0) | ||||
1582 | out_remaining = PAGE_SIZE(1 << 12); | ||||
1583 | |||||
1584 | /* Finish compress */ | ||||
1585 | hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; | ||||
1586 | hibernate_state->hib_stream.avail_in = 0; | ||||
1587 | hibernate_state->hib_stream.next_out = | ||||
1588 | (unsigned char *)hibernate_io_page + | ||||
1589 | (PAGE_SIZE(1 << 12) - out_remaining); | ||||
1590 | |||||
1591 | /* We have an extra output page available for finalize */ | ||||
1592 | hibernate_state->hib_stream.avail_out = | ||||
1593 | out_remaining + PAGE_SIZE(1 << 12); | ||||
1594 | |||||
1595 | if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH4)) != | ||||
1596 | Z_STREAM_END1) { | ||||
1597 | DPRINTF("deflate error in output stream: %d\n", err); | ||||
1598 | return (err); | ||||
1599 | } | ||||
1600 | |||||
1601 | out_remaining = hibernate_state->hib_stream.avail_out; | ||||
1602 | |||||
1603 | used = 2 * PAGE_SIZE(1 << 12) - out_remaining; | ||||
1604 | nblocks = used / DEV_BSIZE(1 << 9); | ||||
1605 | |||||
1606 | /* Round up to next block if needed */ | ||||
1607 | if (used % DEV_BSIZE(1 << 9) != 0) | ||||
1608 | nblocks ++; | ||||
1609 | |||||
1610 | /* Write final block(s) for this chunk */ | ||||
1611 | if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, | ||||
1612 | (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE(1 << 9), | ||||
1613 | HIB_W1, hib->io_page))) { | ||||
1614 | DPRINTF("hib final write error %d\n", err); | ||||
1615 | return (err); | ||||
1616 | } | ||||
1617 | |||||
1618 | blkctr += nblocks; | ||||
1619 | |||||
1620 | chunks[i].compressed_size = (blkctr + hib->image_offset - | ||||
1621 | chunks[i].offset) * DEV_BSIZE(1 << 9); | ||||
1622 | } | ||||
1623 | |||||
1624 | hib->chunktable_offset = hib->image_offset + blkctr; | ||||
1625 | return (0); | ||||
1626 | } | ||||
1627 | |||||
1628 | /* | ||||
1629 | * Reset the zlib stream state and allocate a new hiballoc area for either | ||||
1630 | * inflate or deflate. This function is called once for each hibernate chunk. | ||||
1631 | * Calling hiballoc_init multiple times is acceptable since the memory it is | ||||
1632 | * provided is unmanaged memory (stolen). We use the memory provided to us | ||||
1633 | * by the piglet allocated via the supplied hib. | ||||
1634 | */ | ||||
1635 | int | ||||
1636 | hibernate_zlib_reset(union hibernate_info *hib, int deflate) | ||||
1637 | { | ||||
1638 | vaddr_t hibernate_zlib_start; | ||||
1639 | size_t hibernate_zlib_size; | ||||
1640 | char *pva = (char *)hib->piglet_va; | ||||
1641 | struct hibernate_zlib_state *hibernate_state; | ||||
1642 | |||||
1643 | hibernate_state = | ||||
1644 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | ||||
1645 | |||||
1646 | if (!deflate) | ||||
1647 | pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK(((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL )))); | ||||
1648 | |||||
1649 | /* | ||||
1650 | * See piglet layout information at the start of this file for | ||||
1651 | * information on the zlib page assignments. | ||||
1652 | */ | ||||
1653 | hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE(1 << 12))); | ||||
1654 | hibernate_zlib_size = 80 * PAGE_SIZE(1 << 12); | ||||
1655 | |||||
1656 | memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size)__builtin_memset(((void *)hibernate_zlib_start), (0), (hibernate_zlib_size )); | ||||
1657 | memset(hibernate_state, 0, PAGE_SIZE)__builtin_memset((hibernate_state), (0), ((1 << 12))); | ||||
1658 | |||||
1659 | /* Set up stream structure */ | ||||
1660 | hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; | ||||
1661 | hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; | ||||
1662 | |||||
1663 | /* Initialize the hiballoc arena for zlib allocs/frees */ | ||||
1664 | hiballoc_init(&hibernate_state->hiballoc_arena, | ||||
1665 | (caddr_t)hibernate_zlib_start, hibernate_zlib_size); | ||||
1666 | |||||
1667 | if (deflate) { | ||||
1668 | return deflateInit(&hibernate_state->hib_stream,deflateInit_((&hibernate_state->hib_stream), (1), "1.2.11" , (int)sizeof(z_stream)) | ||||
1669 | Z_BEST_SPEED)deflateInit_((&hibernate_state->hib_stream), (1), "1.2.11" , (int)sizeof(z_stream)); | ||||
1670 | } else | ||||
1671 | return inflateInit(&hibernate_state->hib_stream)inflateInit_((&hibernate_state->hib_stream), "1.2.11", (int)sizeof(z_stream)); | ||||
1672 | } | ||||
1673 | |||||
1674 | /* | ||||
1675 | * Reads the hibernated memory image from disk, whose location and | ||||
1676 | * size are recorded in hib. Begin by reading the persisted | ||||
1677 | * chunk table, which records the original chunk placement location | ||||
1678 | * and compressed size for each. Next, allocate a pig region of | ||||
1679 | * sufficient size to hold the compressed image. Next, read the | ||||
1680 | * chunks into the pig area (calling hibernate_read_chunks to do this), | ||||
1681 | * and finally, if all of the above succeeds, clear the hibernate signature. | ||||
1682 | * The function will then return to hibernate_resume, which will proceed | ||||
1683 | * to unpack the pig image to the correct place in memory. | ||||
1684 | */ | ||||
1685 | int | ||||
1686 | hibernate_read_image(union hibernate_info *hib) | ||||
1687 | { | ||||
1688 | size_t compressed_size, disk_size, chunktable_size, pig_sz; | ||||
1689 | paddr_t image_start, image_end, pig_start, pig_end; | ||||
1690 | struct hibernate_disk_chunk *chunks; | ||||
1691 | daddr_t blkctr; | ||||
1692 | vaddr_t chunktable = (vaddr_t)NULL((void *)0); | ||||
1693 | paddr_t piglet_chunktable = hib->piglet_pa + | ||||
1694 | HIBERNATE_CHUNK_SIZE0x400000; | ||||
1695 | int i, status; | ||||
1696 | |||||
1697 | status = 0; | ||||
1698 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1699 | |||||
1700 | /* Calculate total chunk table size in disk blocks */ | ||||
1701 | chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000 / DEV_BSIZE(1 << 9); | ||||
1702 | |||||
1703 | blkctr = hib->chunktable_offset; | ||||
1704 | |||||
1705 | chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE0x100000, &kv_any, | ||||
1706 | &kp_none, &kd_nowait); | ||||
1707 | |||||
1708 | if (!chunktable) | ||||
1709 | return (1); | ||||
1710 | |||||
1711 | /* Map chunktable pages */ | ||||
1712 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; i += PAGE_SIZE(1 << 12)) | ||||
1713 | pmap_kenter_pa(chunktable + i, piglet_chunktable + i, | ||||
1714 | PROT_READ0x01 | PROT_WRITE0x02); | ||||
1715 | pmap_update(pmap_kernel()); | ||||
1716 | |||||
1717 | /* Read the chunktable from disk into the piglet chunktable */ | ||||
1718 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; | ||||
1719 | i += MAXPHYS(64 * 1024), blkctr += MAXPHYS(64 * 1024)/DEV_BSIZE(1 << 9)) | ||||
1720 | hibernate_block_io(hib, blkctr, MAXPHYS(64 * 1024), | ||||
1721 | chunktable + i, 0); | ||||
1722 | |||||
1723 | blkctr = hib->image_offset; | ||||
1724 | compressed_size = 0; | ||||
1725 | |||||
1726 | chunks = (struct hibernate_disk_chunk *)chunktable; | ||||
1727 | |||||
1728 | for (i = 0; i < hib->chunk_ctr; i++) | ||||
1729 | compressed_size += chunks[i].compressed_size; | ||||
1730 | |||||
1731 | disk_size = compressed_size; | ||||
1732 | |||||
1733 | printf("unhibernating @ block %lld length %luMB\n", | ||||
1734 | hib->sig_offset - chunktable_size, | ||||
1735 | compressed_size / (1024 * 1024)); | ||||
1736 | |||||
1737 | /* Allocate the pig area */ | ||||
1738 | pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE0x400000; | ||||
1739 | if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM12) { | ||||
1740 | status = 1; | ||||
1741 | goto unmap; | ||||
1742 | } | ||||
1743 | |||||
1744 | pig_end = pig_start + pig_sz; | ||||
1745 | |||||
1746 | /* Calculate image extents. Pig image must end on a chunk boundary. */ | ||||
1747 | image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE0x400000 - 1); | ||||
1748 | image_start = image_end - disk_size; | ||||
1749 | |||||
1750 | hibernate_read_chunks(hib, image_start, image_end, disk_size, | ||||
1751 | chunks); | ||||
1752 | |||||
1753 | /* Prepare the resume time pmap/page table */ | ||||
1754 | hibernate_populate_resume_pt(hib, image_start, image_end); | ||||
1755 | |||||
1756 | unmap: | ||||
1757 | /* Unmap chunktable pages */ | ||||
1758 | pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE0x100000); | ||||
1759 | pmap_update(pmap_kernel()); | ||||
1760 | |||||
1761 | return (status); | ||||
1762 | } | ||||
1763 | |||||
1764 | /* | ||||
1765 | * Read the hibernated memory chunks from disk (chunk information at this | ||||
1766 | * point is stored in the piglet) into the pig area specified by | ||||
1767 | * [pig_start .. pig_end]. Order the chunks so that the final chunk is the | ||||
1768 | * only chunk with overlap possibilities. | ||||
1769 | */ | ||||
1770 | int | ||||
1771 | hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, | ||||
1772 | paddr_t pig_end, size_t image_compr_size, | ||||
1773 | struct hibernate_disk_chunk *chunks) | ||||
1774 | { | ||||
1775 | paddr_t img_cur, piglet_base; | ||||
1776 | daddr_t blkctr; | ||||
1777 | size_t processed, compressed_size, read_size; | ||||
1778 | int nchunks, nfchunks, num_io_pages; | ||||
1779 | vaddr_t tempva, hibernate_fchunk_area; | ||||
1780 | short *fchunks, i, j; | ||||
1781 | |||||
1782 | tempva = (vaddr_t)NULL((void *)0); | ||||
1783 | hibernate_fchunk_area = (vaddr_t)NULL((void *)0); | ||||
1784 | nfchunks = 0; | ||||
1785 | piglet_base = hib->piglet_pa; | ||||
1786 | global_pig_start = pig_start; | ||||
1787 | |||||
1788 | /* | ||||
1789 | * These mappings go into the resuming kernel's page table, and are | ||||
1790 | * used only during image read. They disappear from existence | ||||
1791 | * when the suspended kernel is unpacked on top of us. | ||||
1792 | */ | ||||
1793 | tempva = (vaddr_t)km_alloc(MAXPHYS(64 * 1024) + PAGE_SIZE(1 << 12), &kv_any, &kp_none, | ||||
1794 | &kd_nowait); | ||||
1795 | if (!tempva) | ||||
1796 | return (1); | ||||
1797 | hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE(1 << 12), &kv_any, | ||||
1798 | &kp_none, &kd_nowait); | ||||
1799 | if (!hibernate_fchunk_area) | ||||
1800 | return (1); | ||||
1801 | |||||
1802 | /* Final output chunk ordering VA */ | ||||
1803 | fchunks = (short *)hibernate_fchunk_area; | ||||
1804 | |||||
1805 | /* Map the chunk ordering region */ | ||||
1806 | for(i = 0; i < 24 ; i++) | ||||
1807 | pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE(1 << 12)), | ||||
1808 | piglet_base + ((4 + i) * PAGE_SIZE(1 << 12)), | ||||
1809 | PROT_READ0x01 | PROT_WRITE0x02); | ||||
1810 | pmap_update(pmap_kernel()); | ||||
1811 | |||||
1812 | nchunks = hib->chunk_ctr; | ||||
1813 | |||||
1814 | /* Initially start all chunks as unplaced */ | ||||
1815 | for (i = 0; i < nchunks; i++) | ||||
1816 | chunks[i].flags = 0; | ||||
1817 | |||||
1818 | /* | ||||
1819 | * Search the list for chunks that are outside the pig area. These | ||||
1820 | * can be placed first in the final output list. | ||||
1821 | */ | ||||
1822 | for (i = 0; i < nchunks; i++) { | ||||
1823 | if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { | ||||
1824 | fchunks[nfchunks] = i; | ||||
1825 | nfchunks++; | ||||
1826 | chunks[i].flags |= HIBERNATE_CHUNK_PLACED4; | ||||
1827 | } | ||||
1828 | } | ||||
1829 | |||||
1830 | /* | ||||
1831 | * Walk the ordering, place the chunks in ascending memory order. | ||||
1832 | */ | ||||
1833 | for (i = 0; i < nchunks; i++) { | ||||
1834 | if (chunks[i].flags != HIBERNATE_CHUNK_PLACED4) { | ||||
1835 | fchunks[nfchunks] = i; | ||||
1836 | nfchunks++; | ||||
1837 | chunks[i].flags = HIBERNATE_CHUNK_PLACED4; | ||||
1838 | } | ||||
1839 | } | ||||
1840 | |||||
1841 | img_cur = pig_start; | ||||
1842 | |||||
1843 | for (i = 0; i < nfchunks; i++) { | ||||
1844 | blkctr = chunks[fchunks[i]].offset; | ||||
1845 | processed = 0; | ||||
1846 | compressed_size = chunks[fchunks[i]].compressed_size; | ||||
1847 | |||||
1848 | while (processed < compressed_size) { | ||||
1849 | if (compressed_size - processed >= MAXPHYS(64 * 1024)) | ||||
1850 | read_size = MAXPHYS(64 * 1024); | ||||
1851 | else | ||||
1852 | read_size = compressed_size - processed; | ||||
1853 | |||||
1854 | /* | ||||
1855 | * We're reading read_size bytes, offset from the | ||||
1856 | * start of a page by img_cur % PAGE_SIZE, so the | ||||
1857 | * end will be read_size + (img_cur % PAGE_SIZE) | ||||
1858 | * from the start of the first page. Round that | ||||
1859 | * up to the next page size. | ||||
1860 | */ | ||||
1861 | num_io_pages = (read_size + (img_cur % PAGE_SIZE(1 << 12)) | ||||
1862 | + PAGE_SIZE(1 << 12) - 1) / PAGE_SIZE(1 << 12); | ||||
1863 | |||||
1864 | KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1)((num_io_pages <= (64 * 1024)/(1 << 12) + 1) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1864, "num_io_pages <= MAXPHYS/PAGE_SIZE + 1")); | ||||
1865 | |||||
1866 | /* Map pages for this read */ | ||||
1867 | for (j = 0; j < num_io_pages; j ++) | ||||
1868 | pmap_kenter_pa(tempva + j * PAGE_SIZE(1 << 12), | ||||
1869 | img_cur + j * PAGE_SIZE(1 << 12), | ||||
1870 | PROT_READ0x01 | PROT_WRITE0x02); | ||||
1871 | |||||
1872 | pmap_update(pmap_kernel()); | ||||
1873 | |||||
1874 | hibernate_block_io(hib, blkctr, read_size, | ||||
1875 | tempva + (img_cur & PAGE_MASK((1 << 12) - 1)), 0); | ||||
1876 | |||||
1877 | blkctr += (read_size / DEV_BSIZE(1 << 9)); | ||||
1878 | |||||
1879 | pmap_kremove(tempva, num_io_pages * PAGE_SIZE(1 << 12)); | ||||
1880 | pmap_update(pmap_kernel()); | ||||
1881 | |||||
1882 | processed += read_size; | ||||
1883 | img_cur += read_size; | ||||
1884 | } | ||||
1885 | } | ||||
1886 | |||||
1887 | pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE(1 << 12)); | ||||
1888 | pmap_update(pmap_kernel()); | ||||
1889 | |||||
1890 | return (0); | ||||
1891 | } | ||||
1892 | |||||
1893 | /* | ||||
1894 | * Hibernating a machine comprises the following operations: | ||||
1895 | * 1. Calculating this machine's hibernate_info information | ||||
1896 | * 2. Allocating a piglet and saving the piglet's physaddr | ||||
1897 | * 3. Calculating the memory chunks | ||||
1898 | * 4. Writing the compressed chunks to disk | ||||
1899 | * 5. Writing the chunk table | ||||
1900 | * 6. Writing the signature block (hibernate_info) | ||||
1901 | * | ||||
1902 | * On most architectures, the function calling hibernate_suspend would | ||||
1903 | * then power off the machine using some MD-specific implementation. | ||||
1904 | */ | ||||
1905 | int | ||||
1906 | hibernate_suspend(void) | ||||
1907 | { | ||||
1908 | union hibernate_info hib; | ||||
1909 | u_long start, end; | ||||
1910 | |||||
1911 | /* | ||||
1912 | * Calculate memory ranges, swap offsets, etc. | ||||
1913 | * This also allocates a piglet whose physaddr is stored in | ||||
1914 | * hib->piglet_pa and vaddr stored in hib->piglet_va | ||||
1915 | */ | ||||
1916 | if (get_hibernate_info(&hib, 1)) { | ||||
| |||||
1917 | DPRINTF("failed to obtain hibernate info\n"); | ||||
1918 | return (1); | ||||
1919 | } | ||||
1920 | |||||
1921 | /* Find a page-addressed region in swap [start,end] */ | ||||
1922 | if (uvm_hibswap(hib.dev, &start, &end)) { | ||||
1923 | printf("hibernate: cannot find any swap\n"); | ||||
1924 | return (1); | ||||
1925 | } | ||||
1926 | |||||
1927 | if (end - start < 1000) { | ||||
1928 | printf("hibernate: insufficient swap (%lu is too small)\n", | ||||
1929 | end - start); | ||||
1930 | return (1); | ||||
1931 | } | ||||
1932 | |||||
1933 | /* Calculate block offsets in swap */ | ||||
1934 | hib.image_offset = ctod(start)((start) << (12 - 9)); | ||||
1935 | |||||
1936 | DPRINTF("hibernate @ block %lld max-length %lu blocks\n", | ||||
1937 | hib.image_offset, ctod(end) - ctod(start)); | ||||
1938 | |||||
1939 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1940 | DPRINTF("hibernate: writing chunks\n"); | ||||
1941 | if (hibernate_write_chunks(&hib)) { | ||||
1942 | DPRINTF("hibernate_write_chunks failed\n"); | ||||
1943 | return (1); | ||||
1944 | } | ||||
1945 | |||||
1946 | DPRINTF("hibernate: writing chunktable\n"); | ||||
1947 | if (hibernate_write_chunktable(&hib)) { | ||||
1948 | DPRINTF("hibernate_write_chunktable failed\n"); | ||||
1949 | return (1); | ||||
1950 | } | ||||
1951 | |||||
1952 | DPRINTF("hibernate: writing signature\n"); | ||||
1953 | if (hibernate_write_signature(&hib)) { | ||||
1954 | DPRINTF("hibernate_write_signature failed\n"); | ||||
1955 | return (1); | ||||
1956 | } | ||||
1957 | |||||
1958 | /* Allow the disk to settle */ | ||||
1959 | delay(500000)(*delay_func)(500000); | ||||
1960 | |||||
1961 | /* | ||||
1962 | * Give the device-specific I/O function a notification that we're | ||||
1963 | * done, and that it can clean up or shutdown as needed. | ||||
1964 | */ | ||||
1965 | hib.io_func(hib.dev, 0, (vaddr_t)NULL((void *)0), 0, HIB_DONE-2, hib.io_page); | ||||
1966 | return (0); | ||||
1967 | } | ||||
1968 | |||||
1969 | int | ||||
1970 | hibernate_alloc(void) | ||||
1971 | { | ||||
1972 | KASSERT(global_piglet_va == 0)((global_piglet_va == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1972, "global_piglet_va == 0")); | ||||
1973 | KASSERT(hibernate_temp_page == 0)((hibernate_temp_page == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 1973, "hibernate_temp_page == 0" )); | ||||
1974 | |||||
1975 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
1976 | pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), | ||||
1977 | PROT_READ0x01 | PROT_WRITE0x02); | ||||
1978 | |||||
1979 | /* Allocate a piglet, store its addresses in the supplied globals */ | ||||
1980 | if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, | ||||
1981 | HIBERNATE_CHUNK_SIZE0x400000 * 4, HIBERNATE_CHUNK_SIZE0x400000)) | ||||
1982 | goto unmap; | ||||
1983 | |||||
1984 | /* | ||||
1985 | * Allocate VA for the temp page. | ||||
1986 | * | ||||
1987 | * This will become part of the suspended kernel and will | ||||
1988 | * be freed in hibernate_free, upon resume (or hibernate | ||||
1989 | * failure) | ||||
1990 | */ | ||||
1991 | hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, | ||||
1992 | &kp_none, &kd_nowait); | ||||
1993 | if (!hibernate_temp_page) { | ||||
1994 | uvm_pmr_free_piglet(global_piglet_va, | ||||
1995 | 4 * HIBERNATE_CHUNK_SIZE0x400000); | ||||
1996 | global_piglet_va = 0; | ||||
1997 | goto unmap; | ||||
1998 | } | ||||
1999 | return (0); | ||||
2000 | unmap: | ||||
2001 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | ||||
2002 | pmap_update(pmap_kernel()); | ||||
2003 | return (ENOMEM12); | ||||
2004 | } | ||||
2005 | |||||
2006 | /* | ||||
2007 | * Free items allocated by hibernate_alloc() | ||||
2008 | */ | ||||
2009 | void | ||||
2010 | hibernate_free(void) | ||||
2011 | { | ||||
2012 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | ||||
2013 | |||||
2014 | if (global_piglet_va) | ||||
2015 | uvm_pmr_free_piglet(global_piglet_va, | ||||
2016 | 4 * HIBERNATE_CHUNK_SIZE0x400000); | ||||
2017 | |||||
2018 | if (hibernate_temp_page) { | ||||
2019 | pmap_kremove(hibernate_temp_page, PAGE_SIZE(1 << 12)); | ||||
2020 | km_free((void *)hibernate_temp_page, PAGE_SIZE(1 << 12), | ||||
2021 | &kv_any, &kp_none); | ||||
2022 | } | ||||
2023 | |||||
2024 | global_piglet_va = 0; | ||||
2025 | hibernate_temp_page = 0; | ||||
2026 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | ||||
2027 | pmap_update(pmap_kernel()); | ||||
2028 | } |