File: | kern/subr_hibernate.c |
Warning: | line 727, column 11 The left operand of '<' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: subr_hibernate.c,v 1.138 2022/09/03 18:17:15 mlarkin Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> | |||
5 | * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> | |||
6 | * | |||
7 | * Permission to use, copy, modify, and distribute this software for any | |||
8 | * purpose with or without fee is hereby granted, provided that the above | |||
9 | * copyright notice and this permission notice appear in all copies. | |||
10 | * | |||
11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
18 | */ | |||
19 | ||||
20 | #include <sys/hibernate.h> | |||
21 | #include <sys/malloc.h> | |||
22 | #include <sys/param.h> | |||
23 | #include <sys/tree.h> | |||
24 | #include <sys/systm.h> | |||
25 | #include <sys/disklabel.h> | |||
26 | #include <sys/disk.h> | |||
27 | #include <sys/conf.h> | |||
28 | #include <sys/buf.h> | |||
29 | #include <sys/fcntl.h> | |||
30 | #include <sys/stat.h> | |||
31 | #include <sys/atomic.h> | |||
32 | ||||
33 | #include <uvm/uvm.h> | |||
34 | #include <uvm/uvm_swap.h> | |||
35 | ||||
36 | #include <machine/hibernate.h> | |||
37 | ||||
38 | /* Make sure the signature can fit in one block */ | |||
39 | CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE)extern char _ctassert[(sizeof(union hibernate_info) <= (1 << 9)) ? 1 : -1 ] __attribute__((__unused__)); | |||
40 | ||||
41 | /* | |||
42 | * Hibernate piglet layout information | |||
43 | * | |||
44 | * The piglet is a scratch area of memory allocated by the suspending kernel. | |||
45 | * Its phys and virt addrs are recorded in the signature block. The piglet is | |||
46 | * used to guarantee an unused area of memory that can be used by the resuming | |||
47 | * kernel for various things. The piglet is excluded during unpack operations. | |||
48 | * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). | |||
49 | * | |||
50 | * Offset from piglet_base Purpose | |||
51 | * ---------------------------------------------------------------------------- | |||
52 | * 0 Private page for suspend I/O write functions | |||
53 | * 1*PAGE_SIZE I/O page used during hibernate suspend | |||
54 | * 2*PAGE_SIZE I/O page used during hibernate suspend | |||
55 | * 3*PAGE_SIZE copy page used during hibernate suspend | |||
56 | * 4*PAGE_SIZE final chunk ordering list (24 pages) | |||
57 | * 28*PAGE_SIZE RLE utility page | |||
58 | * 29*PAGE_SIZE start of hiballoc area | |||
59 | * 30*PAGE_SIZE preserved entropy | |||
60 | * 110*PAGE_SIZE end of hiballoc area (80 pages) | |||
61 | * 366*PAGE_SIZE end of retguard preservation region (256 pages) | |||
62 | * ... unused | |||
63 | * HIBERNATE_CHUNK_SIZE start of hibernate chunk table | |||
64 | * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked | |||
65 | * 4*HIBERNATE_CHUNK_SIZE end of piglet | |||
66 | */ | |||
67 | ||||
68 | /* Temporary vaddr ranges used during hibernate */ | |||
69 | vaddr_t hibernate_temp_page; | |||
70 | vaddr_t hibernate_copy_page; | |||
71 | vaddr_t hibernate_rle_page; | |||
72 | ||||
73 | /* Hibernate info as read from disk during resume */ | |||
74 | union hibernate_info disk_hib; | |||
75 | ||||
76 | /* | |||
77 | * Global copy of the pig start address. This needs to be a global as we | |||
78 | * switch stacks after computing it - it can't be stored on the stack. | |||
79 | */ | |||
80 | paddr_t global_pig_start; | |||
81 | ||||
82 | /* | |||
83 | * Global copies of the piglet start addresses (PA/VA). We store these | |||
84 | * as globals to avoid having to carry them around as parameters, as the | |||
85 | * piglet is allocated early and freed late - its lifecycle extends beyond | |||
86 | * that of the hibernate info union which is calculated on suspend/resume. | |||
87 | */ | |||
88 | vaddr_t global_piglet_va; | |||
89 | paddr_t global_piglet_pa; | |||
90 | ||||
91 | /* #define HIB_DEBUG */ | |||
92 | #ifdef HIB_DEBUG | |||
93 | int hib_debug = 99; | |||
94 | #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) | |||
95 | #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) | |||
96 | #else | |||
97 | #define DPRINTF(x...) | |||
98 | #define DNPRINTF(n,x...) | |||
99 | #endif | |||
100 | ||||
101 | #ifndef NO_PROPOLICE | |||
102 | extern long __guard_local; | |||
103 | #endif /* ! NO_PROPOLICE */ | |||
104 | ||||
105 | /* Retguard phys address (need to skip this region during unpack) */ | |||
106 | paddr_t retguard_start_phys, retguard_end_phys; | |||
107 | extern char __retguard_start, __retguard_end; | |||
108 | ||||
109 | void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); | |||
110 | int hibernate_calc_rle(paddr_t, paddr_t); | |||
111 | int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, | |||
112 | size_t *); | |||
113 | ||||
114 | #define MAX_RLE(0x400000 / (1 << 12)) (HIBERNATE_CHUNK_SIZE0x400000 / PAGE_SIZE(1 << 12)) | |||
115 | ||||
116 | /* | |||
117 | * Hib alloc enforced alignment. | |||
118 | */ | |||
119 | #define HIB_ALIGN8 8 /* bytes alignment */ | |||
120 | ||||
121 | /* | |||
122 | * sizeof builtin operation, but with alignment constraint. | |||
123 | */ | |||
124 | #define HIB_SIZEOF(_type)((((sizeof(_type))+((8)-1))/(8))*(8)) roundup(sizeof(_type), HIB_ALIGN)((((sizeof(_type))+((8)-1))/(8))*(8)) | |||
125 | ||||
126 | struct hiballoc_entry { | |||
127 | size_t hibe_use; | |||
128 | size_t hibe_space; | |||
129 | RBT_ENTRY(hiballoc_entry)struct rb_entry hibe_entry; | |||
130 | }; | |||
131 | ||||
132 | /* | |||
133 | * Sort hibernate memory ranges by ascending PA | |||
134 | */ | |||
135 | void | |||
136 | hibernate_sort_ranges(union hibernate_info *hib_info) | |||
137 | { | |||
138 | int i, j; | |||
139 | struct hibernate_memory_range *ranges; | |||
140 | paddr_t base, end; | |||
141 | ||||
142 | ranges = hib_info->ranges; | |||
143 | ||||
144 | for (i = 1; i < hib_info->nranges; i++) { | |||
145 | j = i; | |||
146 | while (j > 0 && ranges[j - 1].base > ranges[j].base) { | |||
147 | base = ranges[j].base; | |||
148 | end = ranges[j].end; | |||
149 | ranges[j].base = ranges[j - 1].base; | |||
150 | ranges[j].end = ranges[j - 1].end; | |||
151 | ranges[j - 1].base = base; | |||
152 | ranges[j - 1].end = end; | |||
153 | j--; | |||
154 | } | |||
155 | } | |||
156 | } | |||
157 | ||||
158 | /* | |||
159 | * Compare hiballoc entries based on the address they manage. | |||
160 | * | |||
161 | * Since the address is fixed, relative to struct hiballoc_entry, | |||
162 | * we just compare the hiballoc_entry pointers. | |||
163 | */ | |||
164 | static __inline int | |||
165 | hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) | |||
166 | { | |||
167 | vaddr_t vl = (vaddr_t)l; | |||
168 | vaddr_t vr = (vaddr_t)r; | |||
169 | ||||
170 | return vl < vr ? -1 : (vl > vr); | |||
171 | } | |||
172 | ||||
173 | RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)extern const struct rb_type *const hiballoc_addr_RBT_TYPE; __attribute__ ((__unused__)) static inline void hiballoc_addr_RBT_INIT(struct hiballoc_addr *head) { _rb_init(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_INSERT (struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_insert(hiballoc_addr_RBT_TYPE, &head->rbh_root, elm ); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_REMOVE(struct hiballoc_addr *head, struct hiballoc_entry *elm) { return _rb_remove(hiballoc_addr_RBT_TYPE , &head->rbh_root, elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_FIND (struct hiballoc_addr *head, const struct hiballoc_entry *key ) { return _rb_find(hiballoc_addr_RBT_TYPE, &head->rbh_root , key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NFIND(struct hiballoc_addr *head, const struct hiballoc_entry *key) { return _rb_nfind(hiballoc_addr_RBT_TYPE , &head->rbh_root, key); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_ROOT (struct hiballoc_addr *head) { return _rb_root(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_EMPTY(struct hiballoc_addr *head ) { return _rb_empty(&head->rbh_root); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MIN (struct hiballoc_addr *head) { return _rb_min(hiballoc_addr_RBT_TYPE , &head->rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_MAX(struct hiballoc_addr *head) { return _rb_max(hiballoc_addr_RBT_TYPE, &head-> rbh_root); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_NEXT(struct hiballoc_entry *elm) { return _rb_next(hiballoc_addr_RBT_TYPE, elm); } __attribute__ ((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PREV (struct hiballoc_entry *elm) { return _rb_prev(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_LEFT(struct hiballoc_entry *elm) { return _rb_left(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline struct hiballoc_entry * hiballoc_addr_RBT_RIGHT (struct hiballoc_entry *elm) { return _rb_right(hiballoc_addr_RBT_TYPE , elm); } __attribute__((__unused__)) static inline struct hiballoc_entry * hiballoc_addr_RBT_PARENT(struct hiballoc_entry *elm) { return _rb_parent(hiballoc_addr_RBT_TYPE, elm); } __attribute__((__unused__ )) static inline void hiballoc_addr_RBT_SET_LEFT(struct hiballoc_entry *elm, struct hiballoc_entry *left) { _rb_set_left(hiballoc_addr_RBT_TYPE , elm, left); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_RIGHT(struct hiballoc_entry *elm, struct hiballoc_entry *right) { _rb_set_right(hiballoc_addr_RBT_TYPE , elm, right); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_SET_PARENT(struct hiballoc_entry *elm, struct hiballoc_entry *parent) { _rb_set_parent(hiballoc_addr_RBT_TYPE , elm, parent); } __attribute__((__unused__)) static inline void hiballoc_addr_RBT_POISON(struct hiballoc_entry *elm, unsigned long poison) { _rb_poison(hiballoc_addr_RBT_TYPE, elm, poison ); } __attribute__((__unused__)) static inline int hiballoc_addr_RBT_CHECK (struct hiballoc_entry *elm, unsigned long poison) { return _rb_check (hiballoc_addr_RBT_TYPE, elm, poison); } | |||
174 | ||||
175 | /* | |||
176 | * Given a hiballoc entry, return the address it manages. | |||
177 | */ | |||
178 | static __inline void * | |||
179 | hib_entry_to_addr(struct hiballoc_entry *entry) | |||
180 | { | |||
181 | caddr_t addr; | |||
182 | ||||
183 | addr = (caddr_t)entry; | |||
184 | addr += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
185 | return addr; | |||
186 | } | |||
187 | ||||
188 | /* | |||
189 | * Given an address, find the hiballoc that corresponds. | |||
190 | */ | |||
191 | static __inline struct hiballoc_entry* | |||
192 | hib_addr_to_entry(void *addr_param) | |||
193 | { | |||
194 | caddr_t addr; | |||
195 | ||||
196 | addr = (caddr_t)addr_param; | |||
197 | addr -= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
198 | return (struct hiballoc_entry*)addr; | |||
199 | } | |||
200 | ||||
201 | RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)static int hiballoc_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct hiballoc_entry *l = lptr, *r = rptr; return hibe_cmp(l, r); } static const struct rb_type hiballoc_addr_RBT_INFO = { hiballoc_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof (struct hiballoc_entry, hibe_entry), }; const struct rb_type * const hiballoc_addr_RBT_TYPE = &hiballoc_addr_RBT_INFO; | |||
202 | ||||
203 | /* | |||
204 | * Allocate memory from the arena. | |||
205 | * | |||
206 | * Returns NULL if no memory is available. | |||
207 | */ | |||
208 | void * | |||
209 | hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) | |||
210 | { | |||
211 | struct hiballoc_entry *entry, *new_entry; | |||
212 | size_t find_sz; | |||
213 | ||||
214 | /* | |||
215 | * Enforce alignment of HIB_ALIGN bytes. | |||
216 | * | |||
217 | * Note that, because the entry is put in front of the allocation, | |||
218 | * 0-byte allocations are guaranteed a unique address. | |||
219 | */ | |||
220 | alloc_sz = roundup(alloc_sz, HIB_ALIGN)((((alloc_sz)+((8)-1))/(8))*(8)); | |||
221 | ||||
222 | /* | |||
223 | * Find an entry with hibe_space >= find_sz. | |||
224 | * | |||
225 | * If the root node is not large enough, we switch to tree traversal. | |||
226 | * Because all entries are made at the bottom of the free space, | |||
227 | * traversal from the end has a slightly better chance of yielding | |||
228 | * a sufficiently large space. | |||
229 | */ | |||
230 | find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
231 | entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_ROOT(&arena->hib_addrs); | |||
232 | if (entry != NULL((void *)0) && entry->hibe_space < find_sz) { | |||
233 | RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs)for ((entry) = hiballoc_addr_RBT_MAX((&arena->hib_addrs )); (entry) != ((void *)0); (entry) = hiballoc_addr_RBT_PREV( (entry))) { | |||
234 | if (entry->hibe_space >= find_sz) | |||
235 | break; | |||
236 | } | |||
237 | } | |||
238 | ||||
239 | /* | |||
240 | * Insufficient or too fragmented memory. | |||
241 | */ | |||
242 | if (entry == NULL((void *)0)) | |||
243 | return NULL((void *)0); | |||
244 | ||||
245 | /* | |||
246 | * Create new entry in allocated space. | |||
247 | */ | |||
248 | new_entry = (struct hiballoc_entry*)( | |||
249 | (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); | |||
250 | new_entry->hibe_space = entry->hibe_space - find_sz; | |||
251 | new_entry->hibe_use = alloc_sz; | |||
252 | ||||
253 | /* | |||
254 | * Insert entry. | |||
255 | */ | |||
256 | if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, new_entry) != NULL((void *)0)) | |||
257 | panic("hib_alloc: insert failure"); | |||
258 | entry->hibe_space = 0; | |||
259 | ||||
260 | /* Return address managed by entry. */ | |||
261 | return hib_entry_to_addr(new_entry); | |||
262 | } | |||
263 | ||||
264 | void | |||
265 | hib_getentropy(char **bufp, size_t *bufplen) | |||
266 | { | |||
267 | if (!bufp || !bufplen) | |||
268 | return; | |||
269 | ||||
270 | *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE(1 << 12))); | |||
271 | *bufplen = PAGE_SIZE(1 << 12); | |||
272 | } | |||
273 | ||||
274 | /* | |||
275 | * Free a pointer previously allocated from this arena. | |||
276 | * | |||
277 | * If addr is NULL, this will be silently accepted. | |||
278 | */ | |||
279 | void | |||
280 | hib_free(struct hiballoc_arena *arena, void *addr) | |||
281 | { | |||
282 | struct hiballoc_entry *entry, *prev; | |||
283 | ||||
284 | if (addr == NULL((void *)0)) | |||
285 | return; | |||
286 | ||||
287 | /* | |||
288 | * Derive entry from addr and check it is really in this arena. | |||
289 | */ | |||
290 | entry = hib_addr_to_entry(addr); | |||
291 | if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_FIND(&arena->hib_addrs, entry) != entry) | |||
292 | panic("hib_free: freed item %p not in hib arena", addr); | |||
293 | ||||
294 | /* | |||
295 | * Give the space in entry to its predecessor. | |||
296 | * | |||
297 | * If entry has no predecessor, change its used space into free space | |||
298 | * instead. | |||
299 | */ | |||
300 | prev = RBT_PREV(hiballoc_addr, entry)hiballoc_addr_RBT_PREV(entry); | |||
301 | if (prev != NULL((void *)0) && | |||
302 | (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | |||
303 | prev->hibe_use + prev->hibe_space) == entry) { | |||
304 | /* Merge entry. */ | |||
305 | RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_REMOVE(&arena->hib_addrs, entry); | |||
306 | prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)) + | |||
307 | entry->hibe_use + entry->hibe_space; | |||
308 | } else { | |||
309 | /* Flip used memory to free space. */ | |||
310 | entry->hibe_space += entry->hibe_use; | |||
311 | entry->hibe_use = 0; | |||
312 | } | |||
313 | } | |||
314 | ||||
315 | /* | |||
316 | * Initialize hiballoc. | |||
317 | * | |||
318 | * The allocator will manage memory at ptr, which is len bytes. | |||
319 | */ | |||
320 | int | |||
321 | hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) | |||
322 | { | |||
323 | struct hiballoc_entry *entry; | |||
324 | caddr_t ptr; | |||
325 | size_t len; | |||
326 | ||||
327 | RBT_INIT(hiballoc_addr, &arena->hib_addrs)hiballoc_addr_RBT_INIT(&arena->hib_addrs); | |||
328 | ||||
329 | /* | |||
330 | * Hib allocator enforces HIB_ALIGN alignment. | |||
331 | * Fixup ptr and len. | |||
332 | */ | |||
333 | ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN)(((((vaddr_t)p_ptr)+((8)-1))/(8))*(8)); | |||
334 | len = p_len - ((size_t)ptr - (size_t)p_ptr); | |||
335 | len &= ~((size_t)HIB_ALIGN8 - 1); | |||
336 | ||||
337 | /* | |||
338 | * Insufficient memory to be able to allocate and also do bookkeeping. | |||
339 | */ | |||
340 | if (len <= HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8))) | |||
341 | return ENOMEM12; | |||
342 | ||||
343 | /* | |||
344 | * Create entry describing space. | |||
345 | */ | |||
346 | entry = (struct hiballoc_entry*)ptr; | |||
347 | entry->hibe_use = 0; | |||
348 | entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry)((((sizeof(struct hiballoc_entry))+((8)-1))/(8))*(8)); | |||
349 | RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry)hiballoc_addr_RBT_INSERT(&arena->hib_addrs, entry); | |||
350 | ||||
351 | return 0; | |||
352 | } | |||
353 | ||||
354 | /* | |||
355 | * Zero all free memory. | |||
356 | */ | |||
357 | void | |||
358 | uvm_pmr_zero_everything(void) | |||
359 | { | |||
360 | struct uvm_pmemrange *pmr; | |||
361 | struct vm_page *pg; | |||
362 | int i; | |||
363 | ||||
364 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | |||
365 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | |||
366 | /* Zero single pages. */ | |||
367 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])((&pmr->single[0])->tqh_first)) | |||
368 | != NULL((void *)0)) { | |||
369 | uvm_pmr_remove(pmr, pg); | |||
370 | uvm_pagezero(pg); | |||
371 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | |||
372 | uvmexp.zeropages++; | |||
373 | uvm_pmr_insert(pmr, pg, 0); | |||
374 | } | |||
375 | ||||
376 | /* Zero multi page ranges. */ | |||
377 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[0]) | |||
378 | &pmr->size[UVM_PMR_MEMTYPE_DIRTY])uvm_pmr_size_RBT_ROOT(&pmr->size[0])) != NULL((void *)0)) { | |||
379 | pg--; /* Size tree always has second page. */ | |||
380 | uvm_pmr_remove(pmr, pg); | |||
381 | for (i = 0; i < pg->fpgsz; i++) { | |||
382 | uvm_pagezero(&pg[i]); | |||
383 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | |||
384 | uvmexp.zeropages++; | |||
385 | } | |||
386 | uvm_pmr_insert(pmr, pg, 0); | |||
387 | } | |||
388 | } | |||
389 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | |||
390 | } | |||
391 | ||||
392 | /* | |||
393 | * Mark all memory as dirty. | |||
394 | * | |||
395 | * Used to inform the system that the clean memory isn't clean for some | |||
396 | * reason, for example because we just came back from hibernate. | |||
397 | */ | |||
398 | void | |||
399 | uvm_pmr_dirty_everything(void) | |||
400 | { | |||
401 | struct uvm_pmemrange *pmr; | |||
402 | struct vm_page *pg; | |||
403 | int i; | |||
404 | ||||
405 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | |||
406 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | |||
407 | /* Dirty single pages. */ | |||
408 | while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])((&pmr->single[1])->tqh_first)) | |||
409 | != NULL((void *)0)) { | |||
410 | uvm_pmr_remove(pmr, pg); | |||
411 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | |||
412 | uvm_pmr_insert(pmr, pg, 0); | |||
413 | } | |||
414 | ||||
415 | /* Dirty multi page ranges. */ | |||
416 | while ((pg = RBT_ROOT(uvm_pmr_size,uvm_pmr_size_RBT_ROOT(&pmr->size[1]) | |||
417 | &pmr->size[UVM_PMR_MEMTYPE_ZERO])uvm_pmr_size_RBT_ROOT(&pmr->size[1])) != NULL((void *)0)) { | |||
418 | pg--; /* Size tree always has second page. */ | |||
419 | uvm_pmr_remove(pmr, pg); | |||
420 | for (i = 0; i < pg->fpgsz; i++) | |||
421 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | |||
422 | uvm_pmr_insert(pmr, pg, 0); | |||
423 | } | |||
424 | } | |||
425 | ||||
426 | uvmexp.zeropages = 0; | |||
427 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | |||
428 | } | |||
429 | ||||
430 | /* | |||
431 | * Allocate an area that can hold sz bytes and doesn't overlap with | |||
432 | * the piglet at piglet_pa. | |||
433 | */ | |||
434 | int | |||
435 | uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) | |||
436 | { | |||
437 | struct uvm_constraint_range pig_constraint; | |||
438 | struct kmem_pa_mode kp_pig = { | |||
439 | .kp_constraint = &pig_constraint, | |||
440 | .kp_maxseg = 1 | |||
441 | }; | |||
442 | vaddr_t va; | |||
443 | ||||
444 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
445 | ||||
446 | pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE0x400000; | |||
447 | pig_constraint.ucr_high = -1; | |||
448 | ||||
449 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | |||
450 | if (va == 0) { | |||
451 | pig_constraint.ucr_low = 0; | |||
452 | pig_constraint.ucr_high = piglet_pa - 1; | |||
453 | ||||
454 | va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); | |||
455 | if (va == 0) | |||
456 | return ENOMEM12; | |||
457 | } | |||
458 | ||||
459 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, pa); | |||
460 | return 0; | |||
461 | } | |||
462 | ||||
463 | /* | |||
464 | * Allocate a piglet area. | |||
465 | * | |||
466 | * This needs to be in DMA-safe memory. | |||
467 | * Piglets are aligned. | |||
468 | * | |||
469 | * sz and align in bytes. | |||
470 | * | |||
471 | * The call will sleep for the pagedaemon to attempt to free memory. | |||
472 | * The pagedaemon may decide its not possible to free enough memory, causing | |||
473 | * the allocation to fail. | |||
474 | */ | |||
475 | int | |||
476 | uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) | |||
477 | { | |||
478 | struct kmem_pa_mode kp_piglet = { | |||
479 | .kp_constraint = &dma_constraint, | |||
480 | .kp_align = align, | |||
481 | .kp_maxseg = 1 | |||
482 | }; | |||
483 | ||||
484 | /* Ensure align is a power of 2 */ | |||
485 | KASSERT((align & (align - 1)) == 0)(((align & (align - 1)) == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 485, "(align & (align - 1)) == 0" )); | |||
486 | ||||
487 | /* | |||
488 | * Fixup arguments: align must be at least PAGE_SIZE, | |||
489 | * sz will be converted to pagecount, since that is what | |||
490 | * pmemrange uses internally. | |||
491 | */ | |||
492 | if (align < PAGE_SIZE(1 << 12)) | |||
493 | kp_piglet.kp_align = PAGE_SIZE(1 << 12); | |||
494 | ||||
495 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
496 | ||||
497 | *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); | |||
498 | if (*va == 0) | |||
499 | return ENOMEM12; | |||
500 | ||||
501 | pmap_extract(pmap_kernel()(&kernel_pmap_store), *va, pa); | |||
502 | return 0; | |||
503 | } | |||
504 | ||||
505 | /* | |||
506 | * Free a piglet area. | |||
507 | */ | |||
508 | void | |||
509 | uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) | |||
510 | { | |||
511 | /* | |||
512 | * Fix parameters. | |||
513 | */ | |||
514 | sz = round_page(sz)(((sz) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
515 | ||||
516 | /* | |||
517 | * Free the physical and virtual memory. | |||
518 | */ | |||
519 | km_free((void *)va, sz, &kv_any, &kp_dma_contig); | |||
520 | } | |||
521 | ||||
522 | /* | |||
523 | * Physmem RLE compression support. | |||
524 | * | |||
525 | * Given a physical page address, return the number of pages starting at the | |||
526 | * address that are free. Clamps to the number of pages in | |||
527 | * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. | |||
528 | */ | |||
529 | int | |||
530 | uvm_page_rle(paddr_t addr) | |||
531 | { | |||
532 | struct vm_page *pg, *pg_end; | |||
533 | struct vm_physseg *vmp; | |||
534 | int pseg_idx, off_idx; | |||
535 | ||||
536 | pseg_idx = vm_physseg_find(atop(addr)((addr) >> 12), &off_idx); | |||
537 | if (pseg_idx == -1) | |||
538 | return 0; | |||
539 | ||||
540 | vmp = &vm_physmem[pseg_idx]; | |||
541 | pg = &vmp->pgs[off_idx]; | |||
542 | if (!(pg->pg_flags & PQ_FREE0x00010000)) | |||
543 | return 0; | |||
544 | ||||
545 | /* | |||
546 | * Search for the first non-free page after pg. | |||
547 | * Note that the page may not be the first page in a free pmemrange, | |||
548 | * therefore pg->fpgsz cannot be used. | |||
549 | */ | |||
550 | for (pg_end = pg; pg_end <= vmp->lastpg && | |||
551 | (pg_end->pg_flags & PQ_FREE0x00010000) == PQ_FREE0x00010000 && | |||
552 | (pg_end - pg) < HIBERNATE_CHUNK_SIZE0x400000/PAGE_SIZE(1 << 12); pg_end++) | |||
553 | ; | |||
554 | return pg_end - pg; | |||
555 | } | |||
556 | ||||
557 | /* | |||
558 | * Fills out the hibernate_info union pointed to by hib | |||
559 | * with information about this machine (swap signature block | |||
560 | * offsets, number of memory ranges, kernel in use, etc) | |||
561 | */ | |||
562 | int | |||
563 | get_hibernate_info(union hibernate_info *hib, int suspend) | |||
564 | { | |||
565 | struct disklabel dl; | |||
566 | char err_string[128], *dl_ret; | |||
567 | int part; | |||
568 | SHA2_CTX ctx; | |||
569 | void *fn; | |||
570 | ||||
571 | #ifndef NO_PROPOLICE | |||
572 | /* Save propolice guard */ | |||
573 | hib->guard = __guard_local; | |||
574 | #endif /* ! NO_PROPOLICE */ | |||
575 | ||||
576 | /* Determine I/O function to use */ | |||
577 | hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev); | |||
578 | if (hib->io_func == NULL((void *)0)) | |||
579 | return (1); | |||
580 | ||||
581 | /* Calculate hibernate device */ | |||
582 | hib->dev = swdevt[0].sw_dev; | |||
583 | ||||
584 | /* Read disklabel (used to calculate signature and image offsets) */ | |||
585 | dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); | |||
586 | ||||
587 | if (dl_ret) { | |||
588 | printf("Hibernate error reading disklabel: %s\n", dl_ret); | |||
589 | return (1); | |||
590 | } | |||
591 | ||||
592 | /* Make sure we have a swap partition. */ | |||
593 | part = DISKPART(hib->dev)(((unsigned)((hib->dev) & 0xff) | (((hib->dev) & 0xffff0000) >> 8)) % 16); | |||
594 | if (dl.d_npartitions <= part || | |||
595 | dl.d_partitions[part].p_fstype != FS_SWAP1 || | |||
596 | DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) == 0) | |||
597 | return (1); | |||
598 | ||||
599 | /* Magic number */ | |||
600 | hib->magic = HIBERNATE_MAGIC0x0B5D0B5D; | |||
601 | ||||
602 | /* Calculate signature block location */ | |||
603 | hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size) - | |||
604 | sizeof(union hibernate_info)/DEV_BSIZE(1 << 9); | |||
605 | ||||
606 | SHA256Init(&ctx); | |||
607 | SHA256Update(&ctx, version, strlen(version)); | |||
608 | fn = printf; | |||
609 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
610 | fn = malloc; | |||
611 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
612 | fn = km_alloc; | |||
613 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
614 | fn = strlen; | |||
615 | SHA256Update(&ctx, &fn, sizeof(fn)); | |||
616 | SHA256Final((u_int8_t *)&hib->kern_hash, &ctx); | |||
617 | ||||
618 | if (suspend) { | |||
619 | /* Grab the previously-allocated piglet addresses */ | |||
620 | hib->piglet_va = global_piglet_va; | |||
621 | hib->piglet_pa = global_piglet_pa; | |||
622 | hib->io_page = (void *)hib->piglet_va; | |||
623 | ||||
624 | /* | |||
625 | * Initialization of the hibernate IO function for drivers | |||
626 | * that need to do prep work (such as allocating memory or | |||
627 | * setting up data structures that cannot safely be done | |||
628 | * during suspend without causing side effects). There is | |||
629 | * a matching HIB_DONE call performed after the write is | |||
630 | * completed. | |||
631 | */ | |||
632 | if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_offseth << 32) + (&dl.d_partitions[part])->p_offset), | |||
633 | (vaddr_t)NULL((void *)0), DL_GETPSIZE(&dl.d_partitions[part])(((u_int64_t)(&dl.d_partitions[part])->p_sizeh << 32) + (&dl.d_partitions[part])->p_size), | |||
634 | HIB_INIT-1, hib->io_page)) | |||
635 | goto fail; | |||
636 | ||||
637 | } else { | |||
638 | /* | |||
639 | * Resuming kernels use a regular private page for the driver | |||
640 | * No need to free this I/O page as it will vanish as part of | |||
641 | * the resume. | |||
642 | */ | |||
643 | hib->io_page = malloc(PAGE_SIZE(1 << 12), M_DEVBUF2, M_NOWAIT0x0002); | |||
644 | if (!hib->io_page) | |||
645 | goto fail; | |||
646 | } | |||
647 | ||||
648 | if (get_hibernate_info_md(hib)) | |||
649 | goto fail; | |||
650 | ||||
651 | return (0); | |||
652 | ||||
653 | fail: | |||
654 | return (1); | |||
655 | } | |||
656 | ||||
657 | /* | |||
658 | * Allocate nitems*size bytes from the hiballoc area presently in use | |||
659 | */ | |||
660 | void * | |||
661 | hibernate_zlib_alloc(void *unused, int nitems, int size) | |||
662 | { | |||
663 | struct hibernate_zlib_state *hibernate_state; | |||
664 | ||||
665 | hibernate_state = | |||
666 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
667 | ||||
668 | return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); | |||
669 | } | |||
670 | ||||
671 | /* | |||
672 | * Free the memory pointed to by addr in the hiballoc area presently in | |||
673 | * use | |||
674 | */ | |||
675 | void | |||
676 | hibernate_zlib_free(void *unused, void *addr) | |||
677 | { | |||
678 | struct hibernate_zlib_state *hibernate_state; | |||
679 | ||||
680 | hibernate_state = | |||
681 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
682 | ||||
683 | hib_free(&hibernate_state->hiballoc_arena, addr); | |||
684 | } | |||
685 | ||||
686 | /* | |||
687 | * Inflate next page of data from the image stream. | |||
688 | * The rle parameter is modified on exit to contain the number of pages to | |||
689 | * skip in the output stream (or 0 if this page was inflated into). | |||
690 | * | |||
691 | * Returns 0 if the stream contains additional data, or 1 if the stream is | |||
692 | * finished. | |||
693 | */ | |||
694 | int | |||
695 | hibernate_inflate_page(int *rle) | |||
696 | { | |||
697 | struct hibernate_zlib_state *hibernate_state; | |||
698 | int i; | |||
699 | ||||
700 | hibernate_state = | |||
701 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
702 | ||||
703 | /* Set up the stream for RLE code inflate */ | |||
704 | hibernate_state->hib_stream.next_out = (unsigned char *)rle; | |||
705 | hibernate_state->hib_stream.avail_out = sizeof(*rle); | |||
706 | ||||
707 | /* Inflate RLE code */ | |||
708 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | |||
709 | if (i != Z_OK0 && i != Z_STREAM_END1) { | |||
710 | /* | |||
711 | * XXX - this will likely reboot/hang most machines | |||
712 | * since the console output buffer will be unmapped, | |||
713 | * but there's not much else we can do here. | |||
714 | */ | |||
715 | panic("rle inflate stream error"); | |||
716 | } | |||
717 | ||||
718 | if (hibernate_state->hib_stream.avail_out != 0) { | |||
719 | /* | |||
720 | * XXX - this will likely reboot/hang most machines | |||
721 | * since the console output buffer will be unmapped, | |||
722 | * but there's not much else we can do here. | |||
723 | */ | |||
724 | panic("rle short inflate error"); | |||
725 | } | |||
726 | ||||
727 | if (*rle < 0 || *rle > 1024) { | |||
| ||||
728 | /* | |||
729 | * XXX - this will likely reboot/hang most machines | |||
730 | * since the console output buffer will be unmapped, | |||
731 | * but there's not much else we can do here. | |||
732 | */ | |||
733 | panic("invalid rle count"); | |||
734 | } | |||
735 | ||||
736 | if (i == Z_STREAM_END1) | |||
737 | return (1); | |||
738 | ||||
739 | if (*rle != 0) | |||
740 | return (0); | |||
741 | ||||
742 | /* Set up the stream for page inflate */ | |||
743 | hibernate_state->hib_stream.next_out = | |||
744 | (unsigned char *)HIBERNATE_INFLATE_PAGE((1 << 12) * 33); | |||
745 | hibernate_state->hib_stream.avail_out = PAGE_SIZE(1 << 12); | |||
746 | ||||
747 | /* Process next block of data */ | |||
748 | i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2); | |||
749 | if (i != Z_OK0 && i != Z_STREAM_END1) { | |||
750 | /* | |||
751 | * XXX - this will likely reboot/hang most machines | |||
752 | * since the console output buffer will be unmapped, | |||
753 | * but there's not much else we can do here. | |||
754 | */ | |||
755 | panic("inflate error"); | |||
756 | } | |||
757 | ||||
758 | /* We should always have extracted a full page ... */ | |||
759 | if (hibernate_state->hib_stream.avail_out != 0) { | |||
760 | /* | |||
761 | * XXX - this will likely reboot/hang most machines | |||
762 | * since the console output buffer will be unmapped, | |||
763 | * but there's not much else we can do here. | |||
764 | */ | |||
765 | panic("incomplete page"); | |||
766 | } | |||
767 | ||||
768 | return (i == Z_STREAM_END1); | |||
769 | } | |||
770 | ||||
771 | /* | |||
772 | * Inflate size bytes from src into dest, skipping any pages in | |||
773 | * [src..dest] that are special (see hibernate_inflate_skip) | |||
774 | * | |||
775 | * This function executes while using the resume-time stack | |||
776 | * and pmap, and therefore cannot use ddb/printf/etc. Doing so | |||
777 | * will likely hang or reset the machine since the console output buffer | |||
778 | * will be unmapped. | |||
779 | */ | |||
780 | void | |||
781 | hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, | |||
782 | paddr_t src, size_t size) | |||
783 | { | |||
784 | int end_stream = 0, rle, skip; | |||
785 | struct hibernate_zlib_state *hibernate_state; | |||
786 | ||||
787 | hibernate_state = | |||
788 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
789 | ||||
790 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | |||
791 | hibernate_state->hib_stream.avail_in = size; | |||
792 | ||||
793 | do { | |||
794 | /* | |||
795 | * Is this a special page? If yes, redirect the | |||
796 | * inflate output to a scratch page (eg, discard it) | |||
797 | */ | |||
798 | skip = hibernate_inflate_skip(hib, dest); | |||
799 | if (skip == HIB_SKIP1) { | |||
800 | hibernate_enter_resume_mapping( | |||
801 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
802 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), 0); | |||
803 | } else if (skip == HIB_MOVE2) { | |||
804 | /* | |||
805 | * Special case : retguard region. This gets moved | |||
806 | * temporarily into the piglet region and copied into | |||
807 | * place immediately before resume | |||
808 | */ | |||
809 | hibernate_enter_resume_mapping( | |||
810 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
811 | hib->piglet_pa + (110 * PAGE_SIZE(1 << 12)) + | |||
812 | hib->retguard_ofs, 0); | |||
813 | hib->retguard_ofs += PAGE_SIZE(1 << 12); | |||
814 | if (hib->retguard_ofs > 255 * PAGE_SIZE(1 << 12)) { | |||
815 | /* | |||
816 | * XXX - this will likely reboot/hang most | |||
817 | * machines since the console output | |||
818 | * buffer will be unmapped, but there's | |||
819 | * not much else we can do here. | |||
820 | */ | |||
821 | panic("retguard move error, out of space"); | |||
822 | } | |||
823 | } else { | |||
824 | hibernate_enter_resume_mapping( | |||
825 | HIBERNATE_INFLATE_PAGE((1 << 12) * 33), dest, 0); | |||
826 | } | |||
827 | ||||
828 | hibernate_flush(); | |||
829 | end_stream = hibernate_inflate_page(&rle); | |||
830 | ||||
831 | if (rle == 0) | |||
832 | dest += PAGE_SIZE(1 << 12); | |||
833 | else | |||
834 | dest += (rle * PAGE_SIZE(1 << 12)); | |||
835 | } while (!end_stream); | |||
836 | } | |||
837 | ||||
838 | /* | |||
839 | * deflate from src into the I/O page, up to 'remaining' bytes | |||
840 | * | |||
841 | * Returns number of input bytes consumed, and may reset | |||
842 | * the 'remaining' parameter if not all the output space was consumed | |||
843 | * (this information is needed to know how much to write to disk) | |||
844 | */ | |||
845 | size_t | |||
846 | hibernate_deflate(union hibernate_info *hib, paddr_t src, | |||
847 | size_t *remaining) | |||
848 | { | |||
849 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
850 | struct hibernate_zlib_state *hibernate_state; | |||
851 | ||||
852 | hibernate_state = | |||
853 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
854 | ||||
855 | /* Set up the stream for deflate */ | |||
856 | hibernate_state->hib_stream.next_in = (unsigned char *)src; | |||
857 | hibernate_state->hib_stream.avail_in = PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1)); | |||
858 | hibernate_state->hib_stream.next_out = | |||
859 | (unsigned char *)hibernate_io_page + (PAGE_SIZE(1 << 12) - *remaining); | |||
860 | hibernate_state->hib_stream.avail_out = *remaining; | |||
861 | ||||
862 | /* Process next block of data */ | |||
863 | if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH2) != Z_OK0) | |||
864 | panic("hibernate zlib deflate error"); | |||
865 | ||||
866 | /* Update pointers and return number of bytes consumed */ | |||
867 | *remaining = hibernate_state->hib_stream.avail_out; | |||
868 | return (PAGE_SIZE(1 << 12) - (src & PAGE_MASK((1 << 12) - 1))) - | |||
869 | hibernate_state->hib_stream.avail_in; | |||
870 | } | |||
871 | ||||
872 | /* | |||
873 | * Write the hibernation information specified in hiber_info | |||
874 | * to the location in swap previously calculated (last block of | |||
875 | * swap), called the "signature block". | |||
876 | */ | |||
877 | int | |||
878 | hibernate_write_signature(union hibernate_info *hib) | |||
879 | { | |||
880 | /* Write hibernate info to disk */ | |||
881 | return (hib->io_func(hib->dev, hib->sig_offset, | |||
882 | (vaddr_t)hib, DEV_BSIZE(1 << 9), HIB_W1, | |||
883 | hib->io_page)); | |||
884 | } | |||
885 | ||||
886 | /* | |||
887 | * Write the memory chunk table to the area in swap immediately | |||
888 | * preceding the signature block. The chunk table is stored | |||
889 | * in the piglet when this function is called. Returns errno. | |||
890 | */ | |||
891 | int | |||
892 | hibernate_write_chunktable(union hibernate_info *hib) | |||
893 | { | |||
894 | vaddr_t hibernate_chunk_table_start; | |||
895 | size_t hibernate_chunk_table_size; | |||
896 | int i, err; | |||
897 | ||||
898 | hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000; | |||
899 | ||||
900 | hibernate_chunk_table_start = hib->piglet_va + | |||
901 | HIBERNATE_CHUNK_SIZE0x400000; | |||
902 | ||||
903 | /* Write chunk table */ | |||
904 | for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS(64 * 1024)) { | |||
905 | if ((err = hib->io_func(hib->dev, | |||
906 | hib->chunktable_offset + (i/DEV_BSIZE(1 << 9)), | |||
907 | (vaddr_t)(hibernate_chunk_table_start + i), | |||
908 | MAXPHYS(64 * 1024), HIB_W1, hib->io_page))) { | |||
909 | DPRINTF("chunktable write error: %d\n", err); | |||
910 | return (err); | |||
911 | } | |||
912 | } | |||
913 | ||||
914 | return (0); | |||
915 | } | |||
916 | ||||
917 | /* | |||
918 | * Write an empty hiber_info to the swap signature block, which is | |||
919 | * guaranteed to not match any valid hib. | |||
920 | */ | |||
921 | int | |||
922 | hibernate_clear_signature(union hibernate_info *hib) | |||
923 | { | |||
924 | union hibernate_info blank_hiber_info; | |||
925 | ||||
926 | /* Zero out a blank hiber_info */ | |||
927 | memset(&blank_hiber_info, 0, sizeof(union hibernate_info))__builtin_memset((&blank_hiber_info), (0), (sizeof(union hibernate_info ))); | |||
928 | ||||
929 | /* Write (zeroed) hibernate info to disk */ | |||
930 | DPRINTF("clearing hibernate signature block location: %lld\n", | |||
931 | hib->sig_offset); | |||
932 | if (hibernate_block_io(hib, | |||
933 | hib->sig_offset, | |||
934 | DEV_BSIZE(1 << 9), (vaddr_t)&blank_hiber_info, 1)) | |||
935 | printf("Warning: could not clear hibernate signature\n"); | |||
936 | ||||
937 | return (0); | |||
938 | } | |||
939 | ||||
940 | /* | |||
941 | * Compare two hibernate_infos to determine if they are the same (eg, | |||
942 | * we should be performing a hibernate resume on this machine. | |||
943 | * Not all fields are checked - just enough to verify that the machine | |||
944 | * has the same memory configuration and kernel as the one that | |||
945 | * wrote the signature previously. | |||
946 | */ | |||
947 | int | |||
948 | hibernate_compare_signature(union hibernate_info *mine, | |||
949 | union hibernate_info *disk) | |||
950 | { | |||
951 | u_int i; | |||
952 | ||||
953 | if (mine->nranges != disk->nranges) { | |||
954 | printf("unhibernate failed: memory layout changed\n"); | |||
955 | return (1); | |||
956 | } | |||
957 | ||||
958 | if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH32) != 0) { | |||
959 | printf("unhibernate failed: original kernel changed\n"); | |||
960 | return (1); | |||
961 | } | |||
962 | ||||
963 | for (i = 0; i < mine->nranges; i++) { | |||
964 | if ((mine->ranges[i].base != disk->ranges[i].base) || | |||
965 | (mine->ranges[i].end != disk->ranges[i].end) ) { | |||
966 | DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", | |||
967 | i, | |||
968 | (void *)mine->ranges[i].base, | |||
969 | (void *)mine->ranges[i].end, | |||
970 | (void *)disk->ranges[i].base, | |||
971 | (void *)disk->ranges[i].end); | |||
972 | printf("unhibernate failed: memory size changed\n"); | |||
973 | return (1); | |||
974 | } | |||
975 | } | |||
976 | ||||
977 | return (0); | |||
978 | } | |||
979 | ||||
980 | /* | |||
981 | * Transfers xfer_size bytes between the hibernate device specified in | |||
982 | * hib_info at offset blkctr and the vaddr specified at dest. | |||
983 | * | |||
984 | * Separate offsets and pages are used to handle misaligned reads (reads | |||
985 | * that span a page boundary). | |||
986 | * | |||
987 | * blkctr specifies a relative offset (relative to the start of swap), | |||
988 | * not an absolute disk offset | |||
989 | * | |||
990 | */ | |||
991 | int | |||
992 | hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, | |||
993 | size_t xfer_size, vaddr_t dest, int iswrite) | |||
994 | { | |||
995 | struct buf *bp; | |||
996 | struct bdevsw *bdsw; | |||
997 | int error; | |||
998 | ||||
999 | bp = geteblk(xfer_size); | |||
1000 | bdsw = &bdevsw[major(hib->dev)(((unsigned)(hib->dev) >> 8) & 0xff)]; | |||
1001 | ||||
1002 | error = (*bdsw->d_open)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1003 | if (error) { | |||
1004 | printf("hibernate_block_io open failed\n"); | |||
1005 | return (1); | |||
1006 | } | |||
1007 | ||||
1008 | if (iswrite) | |||
1009 | bcopy((caddr_t)dest, bp->b_data, xfer_size); | |||
1010 | ||||
1011 | bp->b_bcount = xfer_size; | |||
1012 | bp->b_blkno = blkctr; | |||
1013 | CLR(bp->b_flags, B_READ | B_WRITE | B_DONE)((bp->b_flags) &= ~(0x00008000 | 0x00000000 | 0x00000100 )); | |||
1014 | SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW)((bp->b_flags) |= (0x00000010 | (iswrite ? 0x00000000 : 0x00008000 ) | 0x00004000)); | |||
1015 | bp->b_dev = hib->dev; | |||
1016 | (*bdsw->d_strategy)(bp); | |||
1017 | ||||
1018 | error = biowait(bp); | |||
1019 | if (error) { | |||
1020 | printf("hib block_io biowait error %d blk %lld size %zu\n", | |||
1021 | error, (long long)blkctr, xfer_size); | |||
1022 | error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR0020000, | |||
1023 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1024 | if (error) | |||
1025 | printf("hibernate_block_io error close failed\n"); | |||
1026 | return (1); | |||
1027 | } | |||
1028 | ||||
1029 | error = (*bdsw->d_close)(hib->dev, FREAD0x0001, S_IFCHR0020000, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1030 | if (error) { | |||
1031 | printf("hibernate_block_io close failed\n"); | |||
1032 | return (1); | |||
1033 | } | |||
1034 | ||||
1035 | if (!iswrite) | |||
1036 | bcopy(bp->b_data, (caddr_t)dest, xfer_size); | |||
1037 | ||||
1038 | bp->b_flags |= B_INVAL0x00000800; | |||
1039 | brelse(bp); | |||
1040 | ||||
1041 | return (0); | |||
1042 | } | |||
1043 | ||||
1044 | /* | |||
1045 | * Preserve one page worth of random data, generated from the resuming | |||
1046 | * kernel's arc4random. After resume, this preserved entropy can be used | |||
1047 | * to further improve the un-hibernated machine's entropy pool. This | |||
1048 | * random data is stored in the piglet, which is preserved across the | |||
1049 | * unpack operation, and is restored later in the resume process (see | |||
1050 | * hib_getentropy) | |||
1051 | */ | |||
1052 | void | |||
1053 | hibernate_preserve_entropy(union hibernate_info *hib) | |||
1054 | { | |||
1055 | void *entropy; | |||
1056 | ||||
1057 | entropy = km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_nowait); | |||
1058 | ||||
1059 | if (!entropy) | |||
1060 | return; | |||
1061 | ||||
1062 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1063 | pmap_kenter_pa((vaddr_t)entropy, | |||
1064 | (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE(1 << 12))), | |||
1065 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1066 | ||||
1067 | arc4random_buf((void *)entropy, PAGE_SIZE(1 << 12)); | |||
1068 | pmap_kremove((vaddr_t)entropy, PAGE_SIZE(1 << 12)); | |||
1069 | km_free(entropy, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | |||
1070 | } | |||
1071 | ||||
1072 | #ifndef NO_PROPOLICE | |||
1073 | vaddr_t | |||
1074 | hibernate_unprotect_ssp(void) | |||
1075 | { | |||
1076 | struct kmem_dyn_mode kd_avoidalias; | |||
1077 | vaddr_t va = trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | |||
1078 | paddr_t pa; | |||
1079 | ||||
1080 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
1081 | ||||
1082 | memset(&kd_avoidalias, 0, sizeof kd_avoidalias)__builtin_memset((&kd_avoidalias), (0), (sizeof kd_avoidalias )); | |||
1083 | kd_avoidalias.kd_prefer = pa; | |||
1084 | kd_avoidalias.kd_waitok = 1; | |||
1085 | va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, &kd_avoidalias); | |||
1086 | if (!va) | |||
1087 | panic("hibernate_unprotect_ssp"); | |||
1088 | ||||
1089 | pmap_kenter_pa(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
1090 | pmap_update(pmap_kernel()); | |||
1091 | ||||
1092 | return va; | |||
1093 | } | |||
1094 | ||||
1095 | void | |||
1096 | hibernate_reprotect_ssp(vaddr_t va) | |||
1097 | { | |||
1098 | pmap_kremove(va, PAGE_SIZE(1 << 12)); | |||
1099 | km_free((void *)va, PAGE_SIZE(1 << 12), &kv_any, &kp_none); | |||
1100 | } | |||
1101 | #endif /* NO_PROPOLICE */ | |||
1102 | ||||
1103 | /* | |||
1104 | * Reads the signature block from swap, checks against the current machine's | |||
1105 | * information. If the information matches, perform a resume by reading the | |||
1106 | * saved image into the pig area, and unpacking. | |||
1107 | * | |||
1108 | * Must be called with interrupts enabled. | |||
1109 | */ | |||
1110 | void | |||
1111 | hibernate_resume(void) | |||
1112 | { | |||
1113 | union hibernate_info hib; | |||
1114 | int s; | |||
1115 | #ifndef NO_PROPOLICE | |||
1116 | vsize_t off = (vaddr_t)&__guard_local - | |||
1117 | trunc_page((vaddr_t)&__guard_local)(((vaddr_t)&__guard_local) & ~((1 << 12) - 1)); | |||
1118 | vaddr_t guard_va; | |||
1119 | #endif | |||
1120 | ||||
1121 | /* Get current running machine's hibernate info */ | |||
1122 | memset(&hib, 0, sizeof(hib))__builtin_memset((&hib), (0), (sizeof(hib))); | |||
1123 | if (get_hibernate_info(&hib, 0)) { | |||
| ||||
1124 | DPRINTF("couldn't retrieve machine's hibernate info\n"); | |||
1125 | return; | |||
1126 | } | |||
1127 | ||||
1128 | /* Read hibernate info from disk */ | |||
1129 | s = splbio()splraise(0x3); | |||
1130 | ||||
1131 | DPRINTF("reading hibernate signature block location: %lld\n", | |||
1132 | hib.sig_offset); | |||
1133 | ||||
1134 | if (hibernate_block_io(&hib, | |||
1135 | hib.sig_offset, | |||
1136 | DEV_BSIZE(1 << 9), (vaddr_t)&disk_hib, 0)) { | |||
1137 | DPRINTF("error in hibernate read"); | |||
1138 | splx(s)spllower(s); | |||
1139 | return; | |||
1140 | } | |||
1141 | ||||
1142 | /* Check magic number */ | |||
1143 | if (disk_hib.magic != HIBERNATE_MAGIC0x0B5D0B5D) { | |||
1144 | DPRINTF("wrong magic number in hibernate signature: %x\n", | |||
1145 | disk_hib.magic); | |||
1146 | splx(s)spllower(s); | |||
1147 | return; | |||
1148 | } | |||
1149 | ||||
1150 | /* | |||
1151 | * We (possibly) found a hibernate signature. Clear signature first, | |||
1152 | * to prevent accidental resume or endless resume cycles later. | |||
1153 | */ | |||
1154 | if (hibernate_clear_signature(&hib)) { | |||
1155 | DPRINTF("error clearing hibernate signature block\n"); | |||
1156 | splx(s)spllower(s); | |||
1157 | return; | |||
1158 | } | |||
1159 | ||||
1160 | /* | |||
1161 | * If on-disk and in-memory hibernate signatures match, | |||
1162 | * this means we should do a resume from hibernate. | |||
1163 | */ | |||
1164 | if (hibernate_compare_signature(&hib, &disk_hib)) { | |||
1165 | DPRINTF("mismatched hibernate signature block\n"); | |||
1166 | splx(s)spllower(s); | |||
1167 | return; | |||
1168 | } | |||
1169 | disk_hib.dev = hib.dev; | |||
1170 | ||||
1171 | #ifdef MULTIPROCESSOR1 | |||
1172 | /* XXX - if we fail later, we may need to rehatch APs on some archs */ | |||
1173 | DPRINTF("hibernate: quiescing APs\n"); | |||
1174 | hibernate_quiesce_cpus(); | |||
1175 | #endif /* MULTIPROCESSOR */ | |||
1176 | ||||
1177 | /* Read the image from disk into the image (pig) area */ | |||
1178 | if (hibernate_read_image(&disk_hib)) | |||
1179 | goto fail; | |||
1180 | ||||
1181 | DPRINTF("hibernate: quiescing devices\n"); | |||
1182 | if (config_suspend_all(DVACT_QUIESCE2) != 0) | |||
1183 | goto fail; | |||
1184 | ||||
1185 | #ifndef NO_PROPOLICE | |||
1186 | guard_va = hibernate_unprotect_ssp(); | |||
1187 | #endif /* NO_PROPOLICE */ | |||
1188 | ||||
1189 | (void) splhigh()splraise(0xd); | |||
1190 | hibernate_disable_intr_machdep(); | |||
1191 | cold = 2; | |||
1192 | ||||
1193 | DPRINTF("hibernate: suspending devices\n"); | |||
1194 | if (config_suspend_all(DVACT_SUSPEND3) != 0) { | |||
1195 | cold = 0; | |||
1196 | hibernate_enable_intr_machdep(); | |||
1197 | #ifndef NO_PROPOLICE | |||
1198 | hibernate_reprotect_ssp(guard_va); | |||
1199 | #endif /* ! NO_PROPOLICE */ | |||
1200 | goto fail; | |||
1201 | } | |||
1202 | ||||
1203 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, | |||
1204 | &retguard_start_phys); | |||
1205 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, | |||
1206 | &retguard_end_phys); | |||
1207 | ||||
1208 | hibernate_preserve_entropy(&disk_hib); | |||
1209 | ||||
1210 | printf("Unpacking image...\n"); | |||
1211 | ||||
1212 | /* Switch stacks */ | |||
1213 | DPRINTF("hibernate: switching stacks\n"); | |||
1214 | hibernate_switch_stack_machdep(); | |||
1215 | ||||
1216 | #ifndef NO_PROPOLICE | |||
1217 | /* Start using suspended kernel's propolice guard */ | |||
1218 | *(long *)(guard_va + off) = disk_hib.guard; | |||
1219 | hibernate_reprotect_ssp(guard_va); | |||
1220 | #endif /* ! NO_PROPOLICE */ | |||
1221 | ||||
1222 | /* Unpack and resume */ | |||
1223 | hibernate_unpack_image(&disk_hib); | |||
1224 | ||||
1225 | fail: | |||
1226 | splx(s)spllower(s); | |||
1227 | printf("\nUnable to resume hibernated image\n"); | |||
1228 | } | |||
1229 | ||||
1230 | /* | |||
1231 | * Unpack image from pig area to original location by looping through the | |||
1232 | * list of output chunks in the order they should be restored (fchunks). | |||
1233 | * | |||
1234 | * Note that due to the stack smash protector and the fact that we have | |||
1235 | * switched stacks, it is not permitted to return from this function. | |||
1236 | */ | |||
1237 | void | |||
1238 | hibernate_unpack_image(union hibernate_info *hib) | |||
1239 | { | |||
1240 | struct hibernate_disk_chunk *chunks; | |||
1241 | union hibernate_info local_hib; | |||
1242 | paddr_t image_cur = global_pig_start; | |||
1243 | short i, *fchunks; | |||
1244 | char *pva; | |||
1245 | ||||
1246 | /* Piglet will be identity mapped (VA == PA) */ | |||
1247 | pva = (char *)hib->piglet_pa; | |||
1248 | ||||
1249 | fchunks = (short *)(pva + (4 * PAGE_SIZE(1 << 12))); | |||
1250 | ||||
1251 | chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE0x400000); | |||
1252 | ||||
1253 | /* Can't use hiber_info that's passed in after this point */ | |||
1254 | bcopy(hib, &local_hib, sizeof(union hibernate_info)); | |||
1255 | local_hib.retguard_ofs = 0; | |||
1256 | ||||
1257 | /* VA == PA */ | |||
1258 | local_hib.piglet_va = local_hib.piglet_pa; | |||
1259 | ||||
1260 | /* | |||
1261 | * Point of no return. Once we pass this point, only kernel code can | |||
1262 | * be accessed. No global variables or other kernel data structures | |||
1263 | * are guaranteed to be coherent after unpack starts. | |||
1264 | * | |||
1265 | * The image is now in high memory (pig area), we unpack from the pig | |||
1266 | * to the correct location in memory. We'll eventually end up copying | |||
1267 | * on top of ourself, but we are assured the kernel code here is the | |||
1268 | * same between the hibernated and resuming kernel, and we are running | |||
1269 | * on our own stack, so the overwrite is ok. | |||
1270 | */ | |||
1271 | DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); | |||
1272 | hibernate_activate_resume_pt_machdep(); | |||
1273 | ||||
1274 | for (i = 0; i < local_hib.chunk_ctr; i++) { | |||
1275 | /* Reset zlib for inflate */ | |||
1276 | if (hibernate_zlib_reset(&local_hib, 0) != Z_OK0) | |||
1277 | panic("hibernate failed to reset zlib for inflate"); | |||
1278 | ||||
1279 | hibernate_process_chunk(&local_hib, &chunks[fchunks[i]], | |||
1280 | image_cur); | |||
1281 | ||||
1282 | image_cur += chunks[fchunks[i]].compressed_size; | |||
1283 | } | |||
1284 | ||||
1285 | /* | |||
1286 | * Resume the loaded kernel by jumping to the MD resume vector. | |||
1287 | * We won't be returning from this call. We pass the location of | |||
1288 | * the retguard save area so the MD code can replace it before | |||
1289 | * resuming. See the piglet layout at the top of this file for | |||
1290 | * more information on the layout of the piglet area. | |||
1291 | * | |||
1292 | * We use 'global_piglet_va' here since by the time we are at | |||
1293 | * this point, we have already unpacked the image, and we want | |||
1294 | * the suspended kernel's view of what the piglet was, before | |||
1295 | * suspend occurred (since we will need to use that in the retguard | |||
1296 | * copy code in hibernate_resume_machdep.) | |||
1297 | */ | |||
1298 | hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE(1 << 12))); | |||
1299 | } | |||
1300 | ||||
1301 | /* | |||
1302 | * Bounce a compressed image chunk to the piglet, entering mappings for the | |||
1303 | * copied pages as needed | |||
1304 | */ | |||
1305 | void | |||
1306 | hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) | |||
1307 | { | |||
1308 | size_t ct, ofs; | |||
1309 | paddr_t src = img_cur; | |||
1310 | vaddr_t dest = piglet; | |||
1311 | ||||
1312 | /* Copy first partial page */ | |||
1313 | ct = (PAGE_SIZE(1 << 12)) - (src & PAGE_MASK((1 << 12) - 1)); | |||
1314 | ofs = (src & PAGE_MASK((1 << 12) - 1)); | |||
1315 | ||||
1316 | if (ct < PAGE_SIZE(1 << 12)) { | |||
1317 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), | |||
1318 | (src - ofs), 0); | |||
1319 | hibernate_flush(); | |||
1320 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33) + ofs), (caddr_t)dest, ct); | |||
1321 | src += ct; | |||
1322 | dest += ct; | |||
1323 | } | |||
1324 | ||||
1325 | /* Copy remaining pages */ | |||
1326 | while (src < size + img_cur) { | |||
1327 | hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE((1 << 12) * 33), src, 0); | |||
1328 | hibernate_flush(); | |||
1329 | ct = PAGE_SIZE(1 << 12); | |||
1330 | bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE((1 << 12) * 33)), (caddr_t)dest, ct); | |||
1331 | hibernate_flush(); | |||
1332 | src += ct; | |||
1333 | dest += ct; | |||
1334 | } | |||
1335 | } | |||
1336 | ||||
1337 | /* | |||
1338 | * Process a chunk by bouncing it to the piglet, followed by unpacking | |||
1339 | */ | |||
1340 | void | |||
1341 | hibernate_process_chunk(union hibernate_info *hib, | |||
1342 | struct hibernate_disk_chunk *chunk, paddr_t img_cur) | |||
1343 | { | |||
1344 | char *pva = (char *)hib->piglet_va; | |||
1345 | ||||
1346 | hibernate_copy_chunk_to_piglet(img_cur, | |||
1347 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), chunk->compressed_size); | |||
1348 | hibernate_inflate_region(hib, chunk->base, | |||
1349 | (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE0x400000 * 2)), | |||
1350 | chunk->compressed_size); | |||
1351 | } | |||
1352 | ||||
1353 | /* | |||
1354 | * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between | |||
1355 | * inaddr and range_end. | |||
1356 | */ | |||
1357 | int | |||
1358 | hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) | |||
1359 | { | |||
1360 | int rle; | |||
1361 | ||||
1362 | rle = uvm_page_rle(inaddr); | |||
1363 | KASSERT(rle >= 0 && rle <= MAX_RLE)((rle >= 0 && rle <= (0x400000 / (1 << 12 ))) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1363, "rle >= 0 && rle <= MAX_RLE")); | |||
1364 | ||||
1365 | /* Clamp RLE to range end */ | |||
1366 | if (rle > 0 && inaddr + (rle * PAGE_SIZE(1 << 12)) > range_end) | |||
1367 | rle = (range_end - inaddr) / PAGE_SIZE(1 << 12); | |||
1368 | ||||
1369 | return (rle); | |||
1370 | } | |||
1371 | ||||
1372 | /* | |||
1373 | * Write the RLE byte for page at 'inaddr' to the output stream. | |||
1374 | * Returns the number of pages to be skipped at 'inaddr'. | |||
1375 | */ | |||
1376 | int | |||
1377 | hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, | |||
1378 | paddr_t range_end, daddr_t *blkctr, | |||
1379 | size_t *out_remaining) | |||
1380 | { | |||
1381 | int rle, err, *rleloc; | |||
1382 | struct hibernate_zlib_state *hibernate_state; | |||
1383 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
1384 | ||||
1385 | hibernate_state = | |||
1386 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
1387 | ||||
1388 | rle = hibernate_calc_rle(inaddr, range_end); | |||
1389 | ||||
1390 | rleloc = (int *)hibernate_rle_page + MAX_RLE(0x400000 / (1 << 12)) - 1; | |||
1391 | *rleloc = rle; | |||
1392 | ||||
1393 | /* Deflate the RLE byte into the stream */ | |||
1394 | hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); | |||
1395 | ||||
1396 | /* Did we fill the output page? If so, flush to disk */ | |||
1397 | if (*out_remaining == 0) { | |||
1398 | if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset, | |||
1399 | (vaddr_t)hibernate_io_page, PAGE_SIZE(1 << 12), HIB_W1, | |||
1400 | hib->io_page))) { | |||
1401 | DPRINTF("hib write error %d\n", err); | |||
1402 | return (err); | |||
1403 | } | |||
1404 | ||||
1405 | *blkctr += PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | |||
1406 | *out_remaining = PAGE_SIZE(1 << 12); | |||
1407 | ||||
1408 | /* If we didn't deflate the entire RLE byte, finish it now */ | |||
1409 | if (hibernate_state->hib_stream.avail_in != 0) | |||
1410 | hibernate_deflate(hib, | |||
1411 | (vaddr_t)hibernate_state->hib_stream.next_in, | |||
1412 | out_remaining); | |||
1413 | } | |||
1414 | ||||
1415 | return (rle); | |||
1416 | } | |||
1417 | ||||
1418 | /* | |||
1419 | * Write a compressed version of this machine's memory to disk, at the | |||
1420 | * precalculated swap offset: | |||
1421 | * | |||
1422 | * end of swap - signature block size - chunk table size - memory size | |||
1423 | * | |||
1424 | * The function begins by looping through each phys mem range, cutting each | |||
1425 | * one into MD sized chunks. These chunks are then compressed individually | |||
1426 | * and written out to disk, in phys mem order. Some chunks might compress | |||
1427 | * more than others, and for this reason, each chunk's size is recorded | |||
1428 | * in the chunk table, which is written to disk after the image has | |||
1429 | * properly been compressed and written (in hibernate_write_chunktable). | |||
1430 | * | |||
1431 | * When this function is called, the machine is nearly suspended - most | |||
1432 | * devices are quiesced/suspended, interrupts are off, and cold has | |||
1433 | * been set. This means that there can be no side effects once the | |||
1434 | * write has started, and the write function itself can also have no | |||
1435 | * side effects. This also means no printfs are permitted (since printf | |||
1436 | * has side effects.) | |||
1437 | * | |||
1438 | * Return values : | |||
1439 | * | |||
1440 | * 0 - success | |||
1441 | * EIO - I/O error occurred writing the chunks | |||
1442 | * EINVAL - Failed to write a complete range | |||
1443 | * ENOMEM - Memory allocation failure during preparation of the zlib arena | |||
1444 | */ | |||
1445 | int | |||
1446 | hibernate_write_chunks(union hibernate_info *hib) | |||
1447 | { | |||
1448 | paddr_t range_base, range_end, inaddr, temp_inaddr; | |||
1449 | size_t nblocks, out_remaining, used; | |||
1450 | struct hibernate_disk_chunk *chunks; | |||
1451 | vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE(1 << 12); | |||
1452 | daddr_t blkctr = 0; | |||
1453 | int i, rle, err; | |||
1454 | struct hibernate_zlib_state *hibernate_state; | |||
1455 | ||||
1456 | hibernate_state = | |||
1457 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
1458 | ||||
1459 | hib->chunk_ctr = 0; | |||
1460 | ||||
1461 | /* | |||
1462 | * Map the utility VAs to the piglet. See the piglet map at the | |||
1463 | * top of this file for piglet layout information. | |||
1464 | */ | |||
1465 | hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE(1 << 12); | |||
1466 | hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE(1 << 12); | |||
1467 | ||||
1468 | chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + | |||
1469 | HIBERNATE_CHUNK_SIZE0x400000); | |||
1470 | ||||
1471 | /* Calculate the chunk regions */ | |||
1472 | for (i = 0; i < hib->nranges; i++) { | |||
1473 | range_base = hib->ranges[i].base; | |||
1474 | range_end = hib->ranges[i].end; | |||
1475 | ||||
1476 | inaddr = range_base; | |||
1477 | ||||
1478 | while (inaddr < range_end) { | |||
1479 | chunks[hib->chunk_ctr].base = inaddr; | |||
1480 | if (inaddr + HIBERNATE_CHUNK_SIZE0x400000 < range_end) | |||
1481 | chunks[hib->chunk_ctr].end = inaddr + | |||
1482 | HIBERNATE_CHUNK_SIZE0x400000; | |||
1483 | else | |||
1484 | chunks[hib->chunk_ctr].end = range_end; | |||
1485 | ||||
1486 | inaddr += HIBERNATE_CHUNK_SIZE0x400000; | |||
1487 | hib->chunk_ctr ++; | |||
1488 | } | |||
1489 | } | |||
1490 | ||||
1491 | uvm_pmr_dirty_everything(); | |||
1492 | uvm_pmr_zero_everything(); | |||
1493 | ||||
1494 | /* Compress and write the chunks in the chunktable */ | |||
1495 | for (i = 0; i < hib->chunk_ctr; i++) { | |||
1496 | range_base = chunks[i].base; | |||
1497 | range_end = chunks[i].end; | |||
1498 | ||||
1499 | chunks[i].offset = blkctr + hib->image_offset; | |||
1500 | ||||
1501 | /* Reset zlib for deflate */ | |||
1502 | if (hibernate_zlib_reset(hib, 1) != Z_OK0) { | |||
1503 | DPRINTF("hibernate_zlib_reset failed for deflate\n"); | |||
1504 | return (ENOMEM12); | |||
1505 | } | |||
1506 | ||||
1507 | inaddr = range_base; | |||
1508 | ||||
1509 | /* | |||
1510 | * For each range, loop through its phys mem region | |||
1511 | * and write out the chunks (the last chunk might be | |||
1512 | * smaller than the chunk size). | |||
1513 | */ | |||
1514 | while (inaddr < range_end) { | |||
1515 | out_remaining = PAGE_SIZE(1 << 12); | |||
1516 | while (out_remaining > 0 && inaddr < range_end) { | |||
1517 | /* | |||
1518 | * Adjust for regions that are not evenly | |||
1519 | * divisible by PAGE_SIZE or overflowed | |||
1520 | * pages from the previous iteration. | |||
1521 | */ | |||
1522 | temp_inaddr = (inaddr & PAGE_MASK((1 << 12) - 1)) + | |||
1523 | hibernate_copy_page; | |||
1524 | ||||
1525 | /* Deflate from temp_inaddr to IO page */ | |||
1526 | if (inaddr != range_end) { | |||
1527 | if (inaddr % PAGE_SIZE(1 << 12) == 0) { | |||
1528 | rle = hibernate_write_rle(hib, | |||
1529 | inaddr, | |||
1530 | range_end, | |||
1531 | &blkctr, | |||
1532 | &out_remaining); | |||
1533 | } | |||
1534 | ||||
1535 | if (rle == 0) { | |||
1536 | pmap_kenter_pa(hibernate_temp_page, | |||
1537 | inaddr & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1)), | |||
1538 | PROT_READ0x01); | |||
1539 | ||||
1540 | bcopy((caddr_t)hibernate_temp_page, | |||
1541 | (caddr_t)hibernate_copy_page, | |||
1542 | PAGE_SIZE(1 << 12)); | |||
1543 | inaddr += hibernate_deflate(hib, | |||
1544 | temp_inaddr, | |||
1545 | &out_remaining); | |||
1546 | } else { | |||
1547 | inaddr += rle * PAGE_SIZE(1 << 12); | |||
1548 | if (inaddr > range_end) | |||
1549 | inaddr = range_end; | |||
1550 | } | |||
1551 | ||||
1552 | } | |||
1553 | ||||
1554 | if (out_remaining == 0) { | |||
1555 | /* Filled up the page */ | |||
1556 | nblocks = PAGE_SIZE(1 << 12) / DEV_BSIZE(1 << 9); | |||
1557 | ||||
1558 | if ((err = hib->io_func(hib->dev, | |||
1559 | blkctr + hib->image_offset, | |||
1560 | (vaddr_t)hibernate_io_page, | |||
1561 | PAGE_SIZE(1 << 12), HIB_W1, hib->io_page))) { | |||
1562 | DPRINTF("hib write error %d\n", | |||
1563 | err); | |||
1564 | return (err); | |||
1565 | } | |||
1566 | ||||
1567 | blkctr += nblocks; | |||
1568 | } | |||
1569 | } | |||
1570 | } | |||
1571 | ||||
1572 | if (inaddr != range_end) { | |||
1573 | DPRINTF("deflate range ended prematurely\n"); | |||
1574 | return (EINVAL22); | |||
1575 | } | |||
1576 | ||||
1577 | /* | |||
1578 | * End of range. Round up to next secsize bytes | |||
1579 | * after finishing compress | |||
1580 | */ | |||
1581 | if (out_remaining == 0) | |||
1582 | out_remaining = PAGE_SIZE(1 << 12); | |||
1583 | ||||
1584 | /* Finish compress */ | |||
1585 | hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; | |||
1586 | hibernate_state->hib_stream.avail_in = 0; | |||
1587 | hibernate_state->hib_stream.next_out = | |||
1588 | (unsigned char *)hibernate_io_page + | |||
1589 | (PAGE_SIZE(1 << 12) - out_remaining); | |||
1590 | ||||
1591 | /* We have an extra output page available for finalize */ | |||
1592 | hibernate_state->hib_stream.avail_out = | |||
1593 | out_remaining + PAGE_SIZE(1 << 12); | |||
1594 | ||||
1595 | if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH4)) != | |||
1596 | Z_STREAM_END1) { | |||
1597 | DPRINTF("deflate error in output stream: %d\n", err); | |||
1598 | return (err); | |||
1599 | } | |||
1600 | ||||
1601 | out_remaining = hibernate_state->hib_stream.avail_out; | |||
1602 | ||||
1603 | used = 2 * PAGE_SIZE(1 << 12) - out_remaining; | |||
1604 | nblocks = used / DEV_BSIZE(1 << 9); | |||
1605 | ||||
1606 | /* Round up to next block if needed */ | |||
1607 | if (used % DEV_BSIZE(1 << 9) != 0) | |||
1608 | nblocks ++; | |||
1609 | ||||
1610 | /* Write final block(s) for this chunk */ | |||
1611 | if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, | |||
1612 | (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE(1 << 9), | |||
1613 | HIB_W1, hib->io_page))) { | |||
1614 | DPRINTF("hib final write error %d\n", err); | |||
1615 | return (err); | |||
1616 | } | |||
1617 | ||||
1618 | blkctr += nblocks; | |||
1619 | ||||
1620 | chunks[i].compressed_size = (blkctr + hib->image_offset - | |||
1621 | chunks[i].offset) * DEV_BSIZE(1 << 9); | |||
1622 | } | |||
1623 | ||||
1624 | hib->chunktable_offset = hib->image_offset + blkctr; | |||
1625 | return (0); | |||
1626 | } | |||
1627 | ||||
1628 | /* | |||
1629 | * Reset the zlib stream state and allocate a new hiballoc area for either | |||
1630 | * inflate or deflate. This function is called once for each hibernate chunk. | |||
1631 | * Calling hiballoc_init multiple times is acceptable since the memory it is | |||
1632 | * provided is unmanaged memory (stolen). We use the memory provided to us | |||
1633 | * by the piglet allocated via the supplied hib. | |||
1634 | */ | |||
1635 | int | |||
1636 | hibernate_zlib_reset(union hibernate_info *hib, int deflate) | |||
1637 | { | |||
1638 | vaddr_t hibernate_zlib_start; | |||
1639 | size_t hibernate_zlib_size; | |||
1640 | char *pva = (char *)hib->piglet_va; | |||
1641 | struct hibernate_zlib_state *hibernate_state; | |||
1642 | ||||
1643 | hibernate_state = | |||
1644 | (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34); | |||
1645 | ||||
1646 | if (!deflate) | |||
1647 | pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK(((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL )))); | |||
1648 | ||||
1649 | /* | |||
1650 | * See piglet layout information at the start of this file for | |||
1651 | * information on the zlib page assignments. | |||
1652 | */ | |||
1653 | hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE(1 << 12))); | |||
1654 | hibernate_zlib_size = 80 * PAGE_SIZE(1 << 12); | |||
1655 | ||||
1656 | memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size)__builtin_memset(((void *)hibernate_zlib_start), (0), (hibernate_zlib_size )); | |||
1657 | memset(hibernate_state, 0, PAGE_SIZE)__builtin_memset((hibernate_state), (0), ((1 << 12))); | |||
1658 | ||||
1659 | /* Set up stream structure */ | |||
1660 | hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; | |||
1661 | hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; | |||
1662 | ||||
1663 | /* Initialize the hiballoc arena for zlib allocs/frees */ | |||
1664 | hiballoc_init(&hibernate_state->hiballoc_arena, | |||
1665 | (caddr_t)hibernate_zlib_start, hibernate_zlib_size); | |||
1666 | ||||
1667 | if (deflate) { | |||
1668 | return deflateInit(&hibernate_state->hib_stream,deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)) | |||
1669 | Z_BEST_SPEED)deflateInit_((&hibernate_state->hib_stream), (1), "1.3.0.1-motley" , (int)sizeof(z_stream)); | |||
1670 | } else | |||
1671 | return inflateInit(&hibernate_state->hib_stream)inflateInit_((&hibernate_state->hib_stream), "1.3.0.1-motley" , (int)sizeof(z_stream)); | |||
1672 | } | |||
1673 | ||||
1674 | /* | |||
1675 | * Reads the hibernated memory image from disk, whose location and | |||
1676 | * size are recorded in hib. Begin by reading the persisted | |||
1677 | * chunk table, which records the original chunk placement location | |||
1678 | * and compressed size for each. Next, allocate a pig region of | |||
1679 | * sufficient size to hold the compressed image. Next, read the | |||
1680 | * chunks into the pig area (calling hibernate_read_chunks to do this), | |||
1681 | * and finally, if all of the above succeeds, clear the hibernate signature. | |||
1682 | * The function will then return to hibernate_resume, which will proceed | |||
1683 | * to unpack the pig image to the correct place in memory. | |||
1684 | */ | |||
1685 | int | |||
1686 | hibernate_read_image(union hibernate_info *hib) | |||
1687 | { | |||
1688 | size_t compressed_size, disk_size, chunktable_size, pig_sz; | |||
1689 | paddr_t image_start, image_end, pig_start, pig_end; | |||
1690 | struct hibernate_disk_chunk *chunks; | |||
1691 | daddr_t blkctr; | |||
1692 | vaddr_t chunktable = (vaddr_t)NULL((void *)0); | |||
1693 | paddr_t piglet_chunktable = hib->piglet_pa + | |||
1694 | HIBERNATE_CHUNK_SIZE0x400000; | |||
1695 | int i, status; | |||
1696 | ||||
1697 | status = 0; | |||
1698 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1699 | ||||
1700 | /* Calculate total chunk table size in disk blocks */ | |||
1701 | chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE0x100000 / DEV_BSIZE(1 << 9); | |||
1702 | ||||
1703 | blkctr = hib->chunktable_offset; | |||
1704 | ||||
1705 | chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE0x100000, &kv_any, | |||
1706 | &kp_none, &kd_nowait); | |||
1707 | ||||
1708 | if (!chunktable) | |||
1709 | return (1); | |||
1710 | ||||
1711 | /* Map chunktable pages */ | |||
1712 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; i += PAGE_SIZE(1 << 12)) | |||
1713 | pmap_kenter_pa(chunktable + i, piglet_chunktable + i, | |||
1714 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1715 | pmap_update(pmap_kernel()); | |||
1716 | ||||
1717 | /* Read the chunktable from disk into the piglet chunktable */ | |||
1718 | for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE0x100000; | |||
1719 | i += MAXPHYS(64 * 1024), blkctr += MAXPHYS(64 * 1024)/DEV_BSIZE(1 << 9)) | |||
1720 | hibernate_block_io(hib, blkctr, MAXPHYS(64 * 1024), | |||
1721 | chunktable + i, 0); | |||
1722 | ||||
1723 | blkctr = hib->image_offset; | |||
1724 | compressed_size = 0; | |||
1725 | ||||
1726 | chunks = (struct hibernate_disk_chunk *)chunktable; | |||
1727 | ||||
1728 | for (i = 0; i < hib->chunk_ctr; i++) | |||
1729 | compressed_size += chunks[i].compressed_size; | |||
1730 | ||||
1731 | disk_size = compressed_size; | |||
1732 | ||||
1733 | printf("unhibernating @ block %lld length %luMB\n", | |||
1734 | hib->sig_offset - chunktable_size, | |||
1735 | compressed_size / (1024 * 1024)); | |||
1736 | ||||
1737 | /* Allocate the pig area */ | |||
1738 | pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE0x400000; | |||
1739 | if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM12) { | |||
1740 | status = 1; | |||
1741 | goto unmap; | |||
1742 | } | |||
1743 | ||||
1744 | pig_end = pig_start + pig_sz; | |||
1745 | ||||
1746 | /* Calculate image extents. Pig image must end on a chunk boundary. */ | |||
1747 | image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE0x400000 - 1); | |||
1748 | image_start = image_end - disk_size; | |||
1749 | ||||
1750 | hibernate_read_chunks(hib, image_start, image_end, disk_size, | |||
1751 | chunks); | |||
1752 | ||||
1753 | /* Prepare the resume time pmap/page table */ | |||
1754 | hibernate_populate_resume_pt(hib, image_start, image_end); | |||
1755 | ||||
1756 | unmap: | |||
1757 | /* Unmap chunktable pages */ | |||
1758 | pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE0x100000); | |||
1759 | pmap_update(pmap_kernel()); | |||
1760 | ||||
1761 | return (status); | |||
1762 | } | |||
1763 | ||||
1764 | /* | |||
1765 | * Read the hibernated memory chunks from disk (chunk information at this | |||
1766 | * point is stored in the piglet) into the pig area specified by | |||
1767 | * [pig_start .. pig_end]. Order the chunks so that the final chunk is the | |||
1768 | * only chunk with overlap possibilities. | |||
1769 | */ | |||
1770 | int | |||
1771 | hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, | |||
1772 | paddr_t pig_end, size_t image_compr_size, | |||
1773 | struct hibernate_disk_chunk *chunks) | |||
1774 | { | |||
1775 | paddr_t img_cur, piglet_base; | |||
1776 | daddr_t blkctr; | |||
1777 | size_t processed, compressed_size, read_size; | |||
1778 | int nchunks, nfchunks, num_io_pages; | |||
1779 | vaddr_t tempva, hibernate_fchunk_area; | |||
1780 | short *fchunks, i, j; | |||
1781 | ||||
1782 | tempva = (vaddr_t)NULL((void *)0); | |||
1783 | hibernate_fchunk_area = (vaddr_t)NULL((void *)0); | |||
1784 | nfchunks = 0; | |||
1785 | piglet_base = hib->piglet_pa; | |||
1786 | global_pig_start = pig_start; | |||
1787 | ||||
1788 | /* | |||
1789 | * These mappings go into the resuming kernel's page table, and are | |||
1790 | * used only during image read. They disappear from existence | |||
1791 | * when the suspended kernel is unpacked on top of us. | |||
1792 | */ | |||
1793 | tempva = (vaddr_t)km_alloc(MAXPHYS(64 * 1024) + PAGE_SIZE(1 << 12), &kv_any, &kp_none, | |||
1794 | &kd_nowait); | |||
1795 | if (!tempva) | |||
1796 | return (1); | |||
1797 | hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE(1 << 12), &kv_any, | |||
1798 | &kp_none, &kd_nowait); | |||
1799 | if (!hibernate_fchunk_area) | |||
1800 | return (1); | |||
1801 | ||||
1802 | /* Final output chunk ordering VA */ | |||
1803 | fchunks = (short *)hibernate_fchunk_area; | |||
1804 | ||||
1805 | /* Map the chunk ordering region */ | |||
1806 | for(i = 0; i < 24 ; i++) | |||
1807 | pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE(1 << 12)), | |||
1808 | piglet_base + ((4 + i) * PAGE_SIZE(1 << 12)), | |||
1809 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1810 | pmap_update(pmap_kernel()); | |||
1811 | ||||
1812 | nchunks = hib->chunk_ctr; | |||
1813 | ||||
1814 | /* Initially start all chunks as unplaced */ | |||
1815 | for (i = 0; i < nchunks; i++) | |||
1816 | chunks[i].flags = 0; | |||
1817 | ||||
1818 | /* | |||
1819 | * Search the list for chunks that are outside the pig area. These | |||
1820 | * can be placed first in the final output list. | |||
1821 | */ | |||
1822 | for (i = 0; i < nchunks; i++) { | |||
1823 | if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { | |||
1824 | fchunks[nfchunks] = i; | |||
1825 | nfchunks++; | |||
1826 | chunks[i].flags |= HIBERNATE_CHUNK_PLACED4; | |||
1827 | } | |||
1828 | } | |||
1829 | ||||
1830 | /* | |||
1831 | * Walk the ordering, place the chunks in ascending memory order. | |||
1832 | */ | |||
1833 | for (i = 0; i < nchunks; i++) { | |||
1834 | if (chunks[i].flags != HIBERNATE_CHUNK_PLACED4) { | |||
1835 | fchunks[nfchunks] = i; | |||
1836 | nfchunks++; | |||
1837 | chunks[i].flags = HIBERNATE_CHUNK_PLACED4; | |||
1838 | } | |||
1839 | } | |||
1840 | ||||
1841 | img_cur = pig_start; | |||
1842 | ||||
1843 | for (i = 0; i < nfchunks; i++) { | |||
1844 | blkctr = chunks[fchunks[i]].offset; | |||
1845 | processed = 0; | |||
1846 | compressed_size = chunks[fchunks[i]].compressed_size; | |||
1847 | ||||
1848 | while (processed < compressed_size) { | |||
1849 | if (compressed_size - processed >= MAXPHYS(64 * 1024)) | |||
1850 | read_size = MAXPHYS(64 * 1024); | |||
1851 | else | |||
1852 | read_size = compressed_size - processed; | |||
1853 | ||||
1854 | /* | |||
1855 | * We're reading read_size bytes, offset from the | |||
1856 | * start of a page by img_cur % PAGE_SIZE, so the | |||
1857 | * end will be read_size + (img_cur % PAGE_SIZE) | |||
1858 | * from the start of the first page. Round that | |||
1859 | * up to the next page size. | |||
1860 | */ | |||
1861 | num_io_pages = (read_size + (img_cur % PAGE_SIZE(1 << 12)) | |||
1862 | + PAGE_SIZE(1 << 12) - 1) / PAGE_SIZE(1 << 12); | |||
1863 | ||||
1864 | KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1)((num_io_pages <= (64 * 1024)/(1 << 12) + 1) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1864, "num_io_pages <= MAXPHYS/PAGE_SIZE + 1")); | |||
1865 | ||||
1866 | /* Map pages for this read */ | |||
1867 | for (j = 0; j < num_io_pages; j ++) | |||
1868 | pmap_kenter_pa(tempva + j * PAGE_SIZE(1 << 12), | |||
1869 | img_cur + j * PAGE_SIZE(1 << 12), | |||
1870 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1871 | ||||
1872 | pmap_update(pmap_kernel()); | |||
1873 | ||||
1874 | hibernate_block_io(hib, blkctr, read_size, | |||
1875 | tempva + (img_cur & PAGE_MASK((1 << 12) - 1)), 0); | |||
1876 | ||||
1877 | blkctr += (read_size / DEV_BSIZE(1 << 9)); | |||
1878 | ||||
1879 | pmap_kremove(tempva, num_io_pages * PAGE_SIZE(1 << 12)); | |||
1880 | pmap_update(pmap_kernel()); | |||
1881 | ||||
1882 | processed += read_size; | |||
1883 | img_cur += read_size; | |||
1884 | } | |||
1885 | } | |||
1886 | ||||
1887 | pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE(1 << 12)); | |||
1888 | pmap_update(pmap_kernel()); | |||
1889 | ||||
1890 | return (0); | |||
1891 | } | |||
1892 | ||||
1893 | /* | |||
1894 | * Hibernating a machine comprises the following operations: | |||
1895 | * 1. Calculating this machine's hibernate_info information | |||
1896 | * 2. Allocating a piglet and saving the piglet's physaddr | |||
1897 | * 3. Calculating the memory chunks | |||
1898 | * 4. Writing the compressed chunks to disk | |||
1899 | * 5. Writing the chunk table | |||
1900 | * 6. Writing the signature block (hibernate_info) | |||
1901 | * | |||
1902 | * On most architectures, the function calling hibernate_suspend would | |||
1903 | * then power off the machine using some MD-specific implementation. | |||
1904 | */ | |||
1905 | int | |||
1906 | hibernate_suspend(void) | |||
1907 | { | |||
1908 | union hibernate_info hib; | |||
1909 | u_long start, end; | |||
1910 | ||||
1911 | /* | |||
1912 | * Calculate memory ranges, swap offsets, etc. | |||
1913 | * This also allocates a piglet whose physaddr is stored in | |||
1914 | * hib->piglet_pa and vaddr stored in hib->piglet_va | |||
1915 | */ | |||
1916 | if (get_hibernate_info(&hib, 1)) { | |||
1917 | DPRINTF("failed to obtain hibernate info\n"); | |||
1918 | return (1); | |||
1919 | } | |||
1920 | ||||
1921 | /* Find a page-addressed region in swap [start,end] */ | |||
1922 | if (uvm_hibswap(hib.dev, &start, &end)) { | |||
1923 | printf("hibernate: cannot find any swap\n"); | |||
1924 | return (1); | |||
1925 | } | |||
1926 | ||||
1927 | if (end - start < 1000) { | |||
1928 | printf("hibernate: insufficient swap (%lu is too small)\n", | |||
1929 | end - start + 1); | |||
1930 | return (1); | |||
1931 | } | |||
1932 | ||||
1933 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_start, | |||
1934 | &retguard_start_phys); | |||
1935 | pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)&__retguard_end, | |||
1936 | &retguard_end_phys); | |||
1937 | ||||
1938 | /* Calculate block offsets in swap */ | |||
1939 | hib.image_offset = ctod(start)((start) << (12 - 9)); | |||
1940 | ||||
1941 | DPRINTF("hibernate @ block %lld max-length %lu blocks\n", | |||
1942 | hib.image_offset, ctod(end) - ctod(start) + 1); | |||
1943 | ||||
1944 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1945 | DPRINTF("hibernate: writing chunks\n"); | |||
1946 | if (hibernate_write_chunks(&hib)) { | |||
1947 | DPRINTF("hibernate_write_chunks failed\n"); | |||
1948 | return (1); | |||
1949 | } | |||
1950 | ||||
1951 | DPRINTF("hibernate: writing chunktable\n"); | |||
1952 | if (hibernate_write_chunktable(&hib)) { | |||
1953 | DPRINTF("hibernate_write_chunktable failed\n"); | |||
1954 | return (1); | |||
1955 | } | |||
1956 | ||||
1957 | DPRINTF("hibernate: writing signature\n"); | |||
1958 | if (hibernate_write_signature(&hib)) { | |||
1959 | DPRINTF("hibernate_write_signature failed\n"); | |||
1960 | return (1); | |||
1961 | } | |||
1962 | ||||
1963 | /* Allow the disk to settle */ | |||
1964 | delay(500000)(*delay_func)(500000); | |||
1965 | ||||
1966 | /* | |||
1967 | * Give the device-specific I/O function a notification that we're | |||
1968 | * done, and that it can clean up or shutdown as needed. | |||
1969 | */ | |||
1970 | hib.io_func(hib.dev, 0, (vaddr_t)NULL((void *)0), 0, HIB_DONE-2, hib.io_page); | |||
1971 | return (0); | |||
1972 | } | |||
1973 | ||||
1974 | int | |||
1975 | hibernate_alloc(void) | |||
1976 | { | |||
1977 | KASSERT(global_piglet_va == 0)((global_piglet_va == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/subr_hibernate.c" , 1977, "global_piglet_va == 0")); | |||
1978 | KASSERT(hibernate_temp_page == 0)((hibernate_temp_page == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/subr_hibernate.c", 1978, "hibernate_temp_page == 0" )); | |||
1979 | ||||
1980 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
1981 | pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), | |||
1982 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1983 | ||||
1984 | /* Allocate a piglet, store its addresses in the supplied globals */ | |||
1985 | if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, | |||
1986 | HIBERNATE_CHUNK_SIZE0x400000 * 4, HIBERNATE_CHUNK_SIZE0x400000)) | |||
1987 | goto unmap; | |||
1988 | ||||
1989 | /* | |||
1990 | * Allocate VA for the temp page. | |||
1991 | * | |||
1992 | * This will become part of the suspended kernel and will | |||
1993 | * be freed in hibernate_free, upon resume (or hibernate | |||
1994 | * failure) | |||
1995 | */ | |||
1996 | hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, | |||
1997 | &kp_none, &kd_nowait); | |||
1998 | if (!hibernate_temp_page) { | |||
1999 | uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE0x400000); | |||
2000 | global_piglet_va = 0; | |||
2001 | goto unmap; | |||
2002 | } | |||
2003 | return (0); | |||
2004 | unmap: | |||
2005 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | |||
2006 | pmap_update(pmap_kernel()); | |||
2007 | return (ENOMEM12); | |||
2008 | } | |||
2009 | ||||
2010 | /* | |||
2011 | * Free items allocated by hibernate_alloc() | |||
2012 | */ | |||
2013 | void | |||
2014 | hibernate_free(void) | |||
2015 | { | |||
2016 | pmap_activate(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
2017 | ||||
2018 | if (global_piglet_va) | |||
2019 | uvm_pmr_free_piglet(global_piglet_va, | |||
2020 | 4 * HIBERNATE_CHUNK_SIZE0x400000); | |||
2021 | ||||
2022 | if (hibernate_temp_page) { | |||
2023 | pmap_kremove(hibernate_temp_page, PAGE_SIZE(1 << 12)); | |||
2024 | km_free((void *)hibernate_temp_page, PAGE_SIZE(1 << 12), | |||
2025 | &kv_any, &kp_none); | |||
2026 | } | |||
2027 | ||||
2028 | global_piglet_va = 0; | |||
2029 | hibernate_temp_page = 0; | |||
2030 | pmap_kremove(HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34), PAGE_SIZE(1 << 12)); | |||
2031 | pmap_update(pmap_kernel()); | |||
2032 | } |