File: | uvm/uvm_pmemrange.c |
Warning: | line 2118, column 5 Address of stack memory associated with local variable 'pma' is still referred to by the global variable 'uvm' upon returning to the caller. This will be a dangling reference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: uvm_pmemrange.c,v 1.61 2021/03/12 14:15:49 jsg Exp $ */ | ||||||
2 | |||||||
3 | /* | ||||||
4 | * Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl> | ||||||
5 | * | ||||||
6 | * Permission to use, copy, modify, and distribute this software for any | ||||||
7 | * purpose with or without fee is hereby granted, provided that the above | ||||||
8 | * copyright notice and this permission notice appear in all copies. | ||||||
9 | * | ||||||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||||||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||||||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||||||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||||||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||||||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||||
17 | */ | ||||||
18 | |||||||
19 | #include <sys/param.h> | ||||||
20 | #include <sys/systm.h> | ||||||
21 | #include <uvm/uvm.h> | ||||||
22 | #include <sys/malloc.h> | ||||||
23 | #include <sys/kernel.h> | ||||||
24 | #include <sys/proc.h> | ||||||
25 | #include <sys/mount.h> | ||||||
26 | |||||||
27 | /* | ||||||
28 | * 2 trees: addr tree and size tree. | ||||||
29 | * | ||||||
30 | * The allocator keeps chunks of free pages (called a range). | ||||||
31 | * Two pages are part of the same range if: | ||||||
32 | * - all pages in between are part of that range, | ||||||
33 | * - they are of the same memory type (zeroed or non-zeroed), | ||||||
34 | * - they are part of the same pmemrange. | ||||||
35 | * A pmemrange is a range of memory which is part of the same vm_physseg | ||||||
36 | * and has a use-count. | ||||||
37 | * | ||||||
38 | * addr tree is vm_page[0].objt | ||||||
39 | * size tree is vm_page[1].objt | ||||||
40 | * | ||||||
41 | * The size tree is not used for memory ranges of 1 page, instead, | ||||||
42 | * single queue is vm_page[0].pageq | ||||||
43 | * | ||||||
44 | * vm_page[0].fpgsz describes the length of a free range. Two adjecent ranges | ||||||
45 | * are joined, unless: | ||||||
46 | * - they have pages in between them which are not free | ||||||
47 | * - they belong to different memtypes (zeroed vs dirty memory) | ||||||
48 | * - they are in different pmemrange areas (ISA vs non-ISA memory for instance) | ||||||
49 | * - they are not a continuation of the same array | ||||||
50 | * The latter issue is caused by vm_physseg ordering and splitting from the | ||||||
51 | * MD initialization machinery. The MD code is dependant on freelists and | ||||||
52 | * happens to split ISA memory from non-ISA memory. | ||||||
53 | * (Note: freelists die die die!) | ||||||
54 | * | ||||||
55 | * uvm_page_init guarantees that every vm_physseg contains an array of | ||||||
56 | * struct vm_page. Also, uvm_page_physload allocates an array of struct | ||||||
57 | * vm_page. This code depends on that array. The array may break across | ||||||
58 | * vm_physsegs boundaries. | ||||||
59 | */ | ||||||
60 | |||||||
61 | /* | ||||||
62 | * Validate the flags of the page. (Used in asserts.) | ||||||
63 | * Any free page must have the PQ_FREE flag set. | ||||||
64 | * Free pages may be zeroed. | ||||||
65 | * Pmap flags are left untouched. | ||||||
66 | * | ||||||
67 | * The PQ_FREE flag is not checked here: by not checking, we can easily use | ||||||
68 | * this check in pages which are freed. | ||||||
69 | */ | ||||||
70 | #define VALID_FLAGS(pg_flags)(((pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0 ) \ | ||||||
71 | (((pg_flags) & ~(PQ_FREE0x00010000|PG_ZERO0x00000100|PG_PMAPMASK0x3f000000)) == 0x0) | ||||||
72 | |||||||
73 | /* Tree comparators. */ | ||||||
74 | int uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *, | ||||||
75 | const struct uvm_pmemrange *); | ||||||
76 | int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); | ||||||
77 | int uvm_pmr_pg_to_memtype(struct vm_page *); | ||||||
78 | |||||||
79 | #ifdef DDB1 | ||||||
80 | void uvm_pmr_print(void); | ||||||
81 | #endif | ||||||
82 | |||||||
83 | /* | ||||||
84 | * Memory types. The page flags are used to derive what the current memory | ||||||
85 | * type of a page is. | ||||||
86 | */ | ||||||
87 | int | ||||||
88 | uvm_pmr_pg_to_memtype(struct vm_page *pg) | ||||||
89 | { | ||||||
90 | if (pg->pg_flags & PG_ZERO0x00000100) | ||||||
91 | return UVM_PMR_MEMTYPE_ZERO1; | ||||||
92 | /* Default: dirty memory. */ | ||||||
93 | return UVM_PMR_MEMTYPE_DIRTY0; | ||||||
94 | } | ||||||
95 | |||||||
96 | /* Trees. */ | ||||||
97 | RBT_GENERATE(uvm_pmr_addr, vm_page, objt, uvm_pmr_addr_cmp)static int uvm_pmr_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_addr_cmp (l, r); } static const struct rb_type uvm_pmr_addr_RBT_INFO = { uvm_pmr_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof( struct vm_page, objt), }; const struct rb_type *const uvm_pmr_addr_RBT_TYPE = &uvm_pmr_addr_RBT_INFO; | ||||||
98 | RBT_GENERATE(uvm_pmr_size, vm_page, objt, uvm_pmr_size_cmp)static int uvm_pmr_size_RBT_COMPARE(const void *lptr, const void *rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_size_cmp (l, r); } static const struct rb_type uvm_pmr_size_RBT_INFO = { uvm_pmr_size_RBT_COMPARE, ((void *)0), __builtin_offsetof( struct vm_page, objt), }; const struct rb_type *const uvm_pmr_size_RBT_TYPE = &uvm_pmr_size_RBT_INFO; | ||||||
99 | RBT_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr ; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE , ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr ), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE = &uvm_pmemrange_addr_RBT_INFO | ||||||
100 | uvm_pmemrange_addr_cmp)static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr ; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE , ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr ), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE = &uvm_pmemrange_addr_RBT_INFO; | ||||||
101 | |||||||
102 | /* Validation. */ | ||||||
103 | #ifdef DEBUG | ||||||
104 | void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0); | ||||||
105 | #else | ||||||
106 | #define uvm_pmr_assertvalid(pmr)do {} while (0) do {} while (0) | ||||||
107 | #endif | ||||||
108 | |||||||
109 | psize_t uvm_pmr_get1page(psize_t, int, struct pglist *, | ||||||
110 | paddr_t, paddr_t, int); | ||||||
111 | |||||||
112 | struct uvm_pmemrange *uvm_pmr_allocpmr(void); | ||||||
113 | struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); | ||||||
114 | struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *, | ||||||
115 | struct vm_page *, int); | ||||||
116 | void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, | ||||||
117 | struct vm_page *pg, struct vm_page **pg_prev, | ||||||
118 | struct vm_page **pg_next); | ||||||
119 | struct vm_page *uvm_pmr_findnextsegment(struct uvm_pmemrange *, | ||||||
120 | struct vm_page *, paddr_t); | ||||||
121 | struct vm_page *uvm_pmr_findprevsegment(struct uvm_pmemrange *, | ||||||
122 | struct vm_page *, paddr_t); | ||||||
123 | psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t, | ||||||
124 | struct vm_page **, int); | ||||||
125 | psize_t uvm_pmr_remove_1strange_reverse(struct pglist *, | ||||||
126 | paddr_t *); | ||||||
127 | void uvm_pmr_split(paddr_t); | ||||||
128 | struct uvm_pmemrange *uvm_pmemrange_find(paddr_t); | ||||||
129 | struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *, | ||||||
130 | struct uvm_pmemrange *); | ||||||
131 | psize_t pow2divide(psize_t, psize_t); | ||||||
132 | struct vm_page *uvm_pmr_rootupdate(struct uvm_pmemrange *, | ||||||
133 | struct vm_page *, paddr_t, paddr_t, int); | ||||||
134 | |||||||
135 | /* | ||||||
136 | * Computes num/denom and rounds it up to the next power-of-2. | ||||||
137 | * | ||||||
138 | * This is a division function which calculates an approximation of | ||||||
139 | * num/denom, with result =~ num/denom. It is meant to be fast and doesn't | ||||||
140 | * have to be accurate. | ||||||
141 | * | ||||||
142 | * Providing too large a value makes the allocator slightly faster, at the | ||||||
143 | * risk of hitting the failure case more often. Providing too small a value | ||||||
144 | * makes the allocator a bit slower, but less likely to hit a failure case. | ||||||
145 | */ | ||||||
146 | psize_t | ||||||
147 | pow2divide(psize_t num, psize_t denom) | ||||||
148 | { | ||||||
149 | int rshift; | ||||||
150 | |||||||
151 | for (rshift = 0; num > denom; rshift++, denom <<= 1) | ||||||
152 | ; | ||||||
153 | return (paddr_t)1 << rshift; | ||||||
154 | } | ||||||
155 | |||||||
156 | /* | ||||||
157 | * Predicate: lhs is a subrange or rhs. | ||||||
158 | * | ||||||
159 | * If rhs_low == 0: don't care about lower bound. | ||||||
160 | * If rhs_high == 0: don't care about upper bound. | ||||||
161 | */ | ||||||
162 | #define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && ((rhs_high ) == 0 || (lhs_high) <= (rhs_high))) \ | ||||||
163 | (((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && \ | ||||||
164 | ((rhs_high) == 0 || (lhs_high) <= (rhs_high))) | ||||||
165 | |||||||
166 | /* | ||||||
167 | * Predicate: lhs intersects with rhs. | ||||||
168 | * | ||||||
169 | * If rhs_low == 0: don't care about lower bound. | ||||||
170 | * If rhs_high == 0: don't care about upper bound. | ||||||
171 | * Ranges don't intersect if they don't have any page in common, array | ||||||
172 | * semantics mean that < instead of <= should be used here. | ||||||
173 | */ | ||||||
174 | #define PMR_INTERSECTS_WITH(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (rhs_low) < (lhs_high)) && ((rhs_high ) == 0 || (lhs_low) < (rhs_high))) \ | ||||||
175 | (((rhs_low) == 0 || (rhs_low) < (lhs_high)) && \ | ||||||
176 | ((rhs_high) == 0 || (lhs_low) < (rhs_high))) | ||||||
177 | |||||||
178 | /* | ||||||
179 | * Align to power-of-2 alignment. | ||||||
180 | */ | ||||||
181 | #define PMR_ALIGN(pgno, align)(((pgno) + ((align) - 1)) & ~((align) - 1)) \ | ||||||
182 | (((pgno) + ((align) - 1)) & ~((align) - 1)) | ||||||
183 | #define PMR_ALIGN_DOWN(pgno, align)((pgno) & ~((align) - 1)) \ | ||||||
184 | ((pgno) & ~((align) - 1)) | ||||||
185 | |||||||
186 | |||||||
187 | /* | ||||||
188 | * Comparator: sort by address ascending. | ||||||
189 | */ | ||||||
190 | int | ||||||
191 | uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *lhs, | ||||||
192 | const struct uvm_pmemrange *rhs) | ||||||
193 | { | ||||||
194 | return lhs->low < rhs->low ? -1 : lhs->low > rhs->low; | ||||||
195 | } | ||||||
196 | |||||||
197 | /* | ||||||
198 | * Comparator: sort by use ascending. | ||||||
199 | * | ||||||
200 | * The higher the use value of a range, the more devices need memory in | ||||||
201 | * this range. Therefore allocate from the range with the lowest use first. | ||||||
202 | */ | ||||||
203 | int | ||||||
204 | uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) | ||||||
205 | { | ||||||
206 | int result; | ||||||
207 | |||||||
208 | result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use; | ||||||
209 | if (result == 0) | ||||||
210 | result = uvm_pmemrange_addr_cmp(lhs, rhs); | ||||||
211 | return result; | ||||||
212 | } | ||||||
213 | |||||||
214 | int | ||||||
215 | uvm_pmr_addr_cmp(const struct vm_page *lhs, const struct vm_page *rhs) | ||||||
216 | { | ||||||
217 | paddr_t lhs_addr, rhs_addr; | ||||||
218 | |||||||
219 | lhs_addr = VM_PAGE_TO_PHYS(lhs)((lhs)->phys_addr); | ||||||
220 | rhs_addr = VM_PAGE_TO_PHYS(rhs)((rhs)->phys_addr); | ||||||
221 | |||||||
222 | return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr); | ||||||
223 | } | ||||||
224 | |||||||
225 | int | ||||||
226 | uvm_pmr_size_cmp(const struct vm_page *lhs, const struct vm_page *rhs) | ||||||
227 | { | ||||||
228 | psize_t lhs_size, rhs_size; | ||||||
229 | int cmp; | ||||||
230 | |||||||
231 | /* Using second tree, so we receive pg[1] instead of pg[0]. */ | ||||||
232 | lhs_size = (lhs - 1)->fpgsz; | ||||||
233 | rhs_size = (rhs - 1)->fpgsz; | ||||||
234 | |||||||
235 | cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size); | ||||||
236 | if (cmp == 0) | ||||||
237 | cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1); | ||||||
238 | return cmp; | ||||||
239 | } | ||||||
240 | |||||||
241 | /* | ||||||
242 | * Find the first range of free pages that is at least sz pages long. | ||||||
243 | */ | ||||||
244 | struct vm_page * | ||||||
245 | uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti) | ||||||
246 | { | ||||||
247 | struct vm_page *node, *best; | ||||||
248 | |||||||
249 | KASSERT(sz >= 1)((sz >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 249, "sz >= 1")); | ||||||
250 | |||||||
251 | if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti])(((&pmr->single[mti])->tqh_first) == ((void *)0))) | ||||||
252 | return TAILQ_FIRST(&pmr->single[mti])((&pmr->single[mti])->tqh_first); | ||||||
253 | |||||||
254 | node = RBT_ROOT(uvm_pmr_size, &pmr->size[mti])uvm_pmr_size_RBT_ROOT(&pmr->size[mti]); | ||||||
255 | best = NULL((void *)0); | ||||||
256 | while (node != NULL((void *)0)) { | ||||||
257 | if ((node - 1)->fpgsz >= sz) { | ||||||
258 | best = (node - 1); | ||||||
259 | node = RBT_LEFT(uvm_objtree, node)uvm_objtree_RBT_LEFT(node); | ||||||
260 | } else | ||||||
261 | node = RBT_RIGHT(uvm_objtree, node)uvm_objtree_RBT_RIGHT(node); | ||||||
262 | } | ||||||
263 | return best; | ||||||
264 | } | ||||||
265 | |||||||
266 | /* | ||||||
267 | * Finds the next range. The next range has a size >= pg->fpgsz. | ||||||
268 | * Returns NULL if no more ranges are available. | ||||||
269 | */ | ||||||
270 | struct vm_page * | ||||||
271 | uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt) | ||||||
272 | { | ||||||
273 | struct vm_page *npg; | ||||||
274 | |||||||
275 | KASSERT(pmr != NULL && pg != NULL)((pmr != ((void *)0) && pg != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 275 , "pmr != NULL && pg != NULL")); | ||||||
276 | if (pg->fpgsz == 1) { | ||||||
277 | if (TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) != NULL((void *)0)) | ||||||
278 | return TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next); | ||||||
279 | else | ||||||
280 | npg = RBT_MIN(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MIN(&pmr->size[mt]); | ||||||
281 | } else | ||||||
282 | npg = RBT_NEXT(uvm_pmr_size, pg + 1)uvm_pmr_size_RBT_NEXT(pg + 1); | ||||||
283 | |||||||
284 | return npg == NULL((void *)0) ? NULL((void *)0) : npg - 1; | ||||||
285 | } | ||||||
286 | |||||||
287 | /* | ||||||
288 | * Finds the previous and next ranges relative to the (uninserted) pg range. | ||||||
289 | * | ||||||
290 | * *pg_prev == NULL if no previous range is available, that can join with | ||||||
291 | * pg. | ||||||
292 | * *pg_next == NULL if no next range is available, that can join with | ||||||
293 | * pg. | ||||||
294 | */ | ||||||
295 | void | ||||||
296 | uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg, | ||||||
297 | struct vm_page **pg_prev, struct vm_page **pg_next) | ||||||
298 | { | ||||||
299 | KASSERT(pg_prev != NULL && pg_next != NULL)((pg_prev != ((void *)0) && pg_next != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 299, "pg_prev != NULL && pg_next != NULL")); | ||||||
300 | |||||||
301 | *pg_next = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg); | ||||||
302 | if (*pg_next == NULL((void *)0)) | ||||||
303 | *pg_prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||||
304 | else | ||||||
305 | *pg_prev = RBT_PREV(uvm_pmr_addr, *pg_next)uvm_pmr_addr_RBT_PREV(*pg_next); | ||||||
306 | |||||||
307 | KDASSERT(*pg_next == NULL ||((void)0) | ||||||
308 | VM_PAGE_TO_PHYS(*pg_next) > VM_PAGE_TO_PHYS(pg))((void)0); | ||||||
309 | KDASSERT(*pg_prev == NULL ||((void)0) | ||||||
310 | VM_PAGE_TO_PHYS(*pg_prev) < VM_PAGE_TO_PHYS(pg))((void)0); | ||||||
311 | |||||||
312 | /* Reset if not contig. */ | ||||||
313 | if (*pg_prev != NULL((void *)0) && | ||||||
314 | (atop(VM_PAGE_TO_PHYS(*pg_prev))((((*pg_prev)->phys_addr)) >> 12) + (*pg_prev)->fpgsz | ||||||
315 | != atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) || | ||||||
316 | *pg_prev + (*pg_prev)->fpgsz != pg || /* Array broke. */ | ||||||
317 | uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg))) | ||||||
318 | *pg_prev = NULL((void *)0); | ||||||
319 | if (*pg_next != NULL((void *)0) && | ||||||
320 | (atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz | ||||||
321 | != atop(VM_PAGE_TO_PHYS(*pg_next))((((*pg_next)->phys_addr)) >> 12) || | ||||||
322 | pg + pg->fpgsz != *pg_next || /* Array broke. */ | ||||||
323 | uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg))) | ||||||
324 | *pg_next = NULL((void *)0); | ||||||
325 | return; | ||||||
326 | } | ||||||
327 | |||||||
328 | /* | ||||||
329 | * Remove a range from the address tree. | ||||||
330 | * Address tree maintains pmr counters. | ||||||
331 | */ | ||||||
332 | void | ||||||
333 | uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||||
334 | { | ||||||
335 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0); | ||||||
336 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||||
337 | RBT_REMOVE(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_REMOVE(&pmr->addr, pg); | ||||||
338 | |||||||
339 | pmr->nsegs--; | ||||||
340 | } | ||||||
341 | /* | ||||||
342 | * Remove a range from the size tree. | ||||||
343 | */ | ||||||
344 | void | ||||||
345 | uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||||
346 | { | ||||||
347 | int memtype; | ||||||
348 | #ifdef DEBUG | ||||||
349 | struct vm_page *i; | ||||||
350 | #endif | ||||||
351 | |||||||
352 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||||
353 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||||
354 | memtype = uvm_pmr_pg_to_memtype(pg); | ||||||
355 | |||||||
356 | if (pg->fpgsz == 1) { | ||||||
357 | #ifdef DEBUG | ||||||
358 | TAILQ_FOREACH(i, &pmr->single[memtype], pageq)for((i) = ((&pmr->single[memtype])->tqh_first); (i) != ((void *)0); (i) = ((i)->pageq.tqe_next)) { | ||||||
359 | if (i == pg) | ||||||
360 | break; | ||||||
361 | } | ||||||
362 | KDASSERT(i == pg)((void)0); | ||||||
363 | #endif | ||||||
364 | TAILQ_REMOVE(&pmr->single[memtype], pg, pageq)do { if (((pg)->pageq.tqe_next) != ((void *)0)) (pg)->pageq .tqe_next->pageq.tqe_prev = (pg)->pageq.tqe_prev; else ( &pmr->single[memtype])->tqh_last = (pg)->pageq.tqe_prev ; *(pg)->pageq.tqe_prev = (pg)->pageq.tqe_next; ((pg)-> pageq.tqe_prev) = ((void *)-1); ((pg)->pageq.tqe_next) = ( (void *)-1); } while (0); | ||||||
365 | } else { | ||||||
366 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[memtype],((void)0) | ||||||
367 | pg + 1) == pg + 1)((void)0); | ||||||
368 | RBT_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_REMOVE(&pmr->size[memtype], pg + 1); | ||||||
369 | } | ||||||
370 | } | ||||||
371 | /* Remove from both trees. */ | ||||||
372 | void | ||||||
373 | uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||||
374 | { | ||||||
375 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
376 | uvm_pmr_remove_size(pmr, pg); | ||||||
377 | uvm_pmr_remove_addr(pmr, pg); | ||||||
378 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
379 | } | ||||||
380 | |||||||
381 | /* | ||||||
382 | * Insert the range described in pg. | ||||||
383 | * Returns the range thus created (which may be joined with the previous and | ||||||
384 | * next ranges). | ||||||
385 | * If no_join, the caller guarantees that the range cannot possibly join | ||||||
386 | * with adjecent ranges. | ||||||
387 | */ | ||||||
388 | struct vm_page * | ||||||
389 | uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) | ||||||
390 | { | ||||||
391 | struct vm_page *prev, *next; | ||||||
392 | |||||||
393 | #ifdef DEBUG | ||||||
394 | struct vm_page *i; | ||||||
395 | int mt; | ||||||
396 | #endif | ||||||
397 | |||||||
398 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||||
399 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||||
400 | |||||||
401 | #ifdef DEBUG | ||||||
402 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||||
403 | TAILQ_FOREACH(i, &pmr->single[mt], pageq)for((i) = ((&pmr->single[mt])->tqh_first); (i) != ( (void *)0); (i) = ((i)->pageq.tqe_next)) | ||||||
404 | KDASSERT(i != pg)((void)0); | ||||||
405 | if (pg->fpgsz > 1) { | ||||||
406 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mt],((void)0) | ||||||
407 | pg + 1) == NULL)((void)0); | ||||||
408 | } | ||||||
409 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL)((void)0); | ||||||
410 | } | ||||||
411 | #endif | ||||||
412 | |||||||
413 | if (!no_join) { | ||||||
414 | uvm_pmr_pnaddr(pmr, pg, &prev, &next); | ||||||
415 | if (next != NULL((void *)0)) { | ||||||
416 | uvm_pmr_remove_size(pmr, next); | ||||||
417 | uvm_pmr_remove_addr(pmr, next); | ||||||
418 | pg->fpgsz += next->fpgsz; | ||||||
419 | next->fpgsz = 0; | ||||||
420 | } | ||||||
421 | if (prev != NULL((void *)0)) { | ||||||
422 | uvm_pmr_remove_size(pmr, prev); | ||||||
423 | prev->fpgsz += pg->fpgsz; | ||||||
424 | pg->fpgsz = 0; | ||||||
425 | return prev; | ||||||
426 | } | ||||||
427 | } | ||||||
428 | |||||||
429 | RBT_INSERT(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_INSERT(&pmr->addr, pg); | ||||||
430 | |||||||
431 | pmr->nsegs++; | ||||||
432 | |||||||
433 | return pg; | ||||||
434 | } | ||||||
435 | /* | ||||||
436 | * Insert the range described in pg. | ||||||
437 | * Returns the range thus created (which may be joined with the previous and | ||||||
438 | * next ranges). | ||||||
439 | * Page must already be in the address tree. | ||||||
440 | */ | ||||||
441 | void | ||||||
442 | uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||||
443 | { | ||||||
444 | int memtype; | ||||||
445 | #ifdef DEBUG | ||||||
446 | struct vm_page *i; | ||||||
447 | int mti; | ||||||
448 | #endif | ||||||
449 | |||||||
450 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||||
451 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||||
452 | |||||||
453 | memtype = uvm_pmr_pg_to_memtype(pg); | ||||||
454 | #ifdef DEBUG | ||||||
455 | for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) { | ||||||
456 | TAILQ_FOREACH(i, &pmr->single[mti], pageq)for((i) = ((&pmr->single[mti])->tqh_first); (i) != ( (void *)0); (i) = ((i)->pageq.tqe_next)) | ||||||
457 | KDASSERT(i != pg)((void)0); | ||||||
458 | if (pg->fpgsz > 1) { | ||||||
459 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mti],((void)0) | ||||||
460 | pg + 1) == NULL)((void)0); | ||||||
461 | } | ||||||
462 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0); | ||||||
463 | } | ||||||
464 | for (i = pg; i < pg + pg->fpgsz; i++) | ||||||
465 | KASSERT(uvm_pmr_pg_to_memtype(i) == memtype)((uvm_pmr_pg_to_memtype(i) == memtype) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 465, "uvm_pmr_pg_to_memtype(i) == memtype" )); | ||||||
466 | #endif | ||||||
467 | |||||||
468 | if (pg->fpgsz == 1) | ||||||
469 | TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq)do { (pg)->pageq.tqe_next = ((void *)0); (pg)->pageq.tqe_prev = (&pmr->single[memtype])->tqh_last; *(&pmr-> single[memtype])->tqh_last = (pg); (&pmr->single[memtype ])->tqh_last = &(pg)->pageq.tqe_next; } while (0); | ||||||
470 | else | ||||||
471 | RBT_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_INSERT(&pmr->size[memtype], pg + 1); | ||||||
472 | } | ||||||
473 | /* Insert in both trees. */ | ||||||
474 | struct vm_page * | ||||||
475 | uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) | ||||||
476 | { | ||||||
477 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
478 | pg = uvm_pmr_insert_addr(pmr, pg, no_join); | ||||||
479 | uvm_pmr_insert_size(pmr, pg); | ||||||
480 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
481 | return pg; | ||||||
482 | } | ||||||
483 | |||||||
484 | /* | ||||||
485 | * Find the last page that is part of this segment. | ||||||
486 | * => pg: the range at which to start the search. | ||||||
487 | * => boundary: the page number boundary specification (0 = no boundary). | ||||||
488 | * => pmr: the pmemrange of the page. | ||||||
489 | * | ||||||
490 | * This function returns 1 before the next range, so if you want to have the | ||||||
491 | * next range, you need to run TAILQ_NEXT(result, pageq) after calling. | ||||||
492 | * The reason is that this way, the length of the segment is easily | ||||||
493 | * calculated using: atop(result) - atop(pg) + 1. | ||||||
494 | * Hence this function also never returns NULL. | ||||||
495 | */ | ||||||
496 | struct vm_page * | ||||||
497 | uvm_pmr_findnextsegment(struct uvm_pmemrange *pmr, | ||||||
498 | struct vm_page *pg, paddr_t boundary) | ||||||
499 | { | ||||||
500 | paddr_t first_boundary; | ||||||
501 | struct vm_page *next; | ||||||
502 | struct vm_page *prev; | ||||||
503 | |||||||
504 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0) | ||||||
505 | pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||||
506 | if (boundary != 0) { | ||||||
507 | first_boundary = | ||||||
508 | PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary)(((((((pg)->phys_addr)) >> 12) + 1) + ((boundary) - 1 )) & ~((boundary) - 1)); | ||||||
509 | } else | ||||||
510 | first_boundary = 0; | ||||||
511 | |||||||
512 | /* | ||||||
513 | * Increase next until it hits the first page of the next segment. | ||||||
514 | * | ||||||
515 | * While loop checks the following: | ||||||
516 | * - next != NULL we have not reached the end of pgl | ||||||
517 | * - boundary == 0 || next < first_boundary | ||||||
518 | * we do not cross a boundary | ||||||
519 | * - atop(prev) + 1 == atop(next) | ||||||
520 | * still in the same segment | ||||||
521 | * - low <= last | ||||||
522 | * - high > last still in the same memory range | ||||||
523 | * - memtype is equal allocator is unable to view different memtypes | ||||||
524 | * as part of the same segment | ||||||
525 | * - prev + 1 == next no array breakage occurs | ||||||
526 | */ | ||||||
527 | prev = pg; | ||||||
528 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||||
529 | while (next != NULL((void *)0) && | ||||||
530 | (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) < first_boundary) && | ||||||
531 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
532 | pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
533 | pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
534 | uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) && | ||||||
535 | prev + 1 == next) { | ||||||
536 | prev = next; | ||||||
537 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||||
538 | } | ||||||
539 | |||||||
540 | /* | ||||||
541 | * End of this segment. | ||||||
542 | */ | ||||||
543 | return prev; | ||||||
544 | } | ||||||
545 | |||||||
546 | /* | ||||||
547 | * Find the first page that is part of this segment. | ||||||
548 | * => pg: the range at which to start the search. | ||||||
549 | * => boundary: the page number boundary specification (0 = no boundary). | ||||||
550 | * => pmr: the pmemrange of the page. | ||||||
551 | * | ||||||
552 | * This function returns 1 after the previous range, so if you want to have the | ||||||
553 | * previous range, you need to run TAILQ_NEXT(result, pageq) after calling. | ||||||
554 | * The reason is that this way, the length of the segment is easily | ||||||
555 | * calculated using: atop(pg) - atop(result) + 1. | ||||||
556 | * Hence this function also never returns NULL. | ||||||
557 | */ | ||||||
558 | struct vm_page * | ||||||
559 | uvm_pmr_findprevsegment(struct uvm_pmemrange *pmr, | ||||||
560 | struct vm_page *pg, paddr_t boundary) | ||||||
561 | { | ||||||
562 | paddr_t first_boundary; | ||||||
563 | struct vm_page *next; | ||||||
564 | struct vm_page *prev; | ||||||
565 | |||||||
566 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0) | ||||||
567 | pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||||
568 | if (boundary != 0) { | ||||||
569 | first_boundary = | ||||||
570 | PMR_ALIGN_DOWN(atop(VM_PAGE_TO_PHYS(pg)), boundary)((((((pg)->phys_addr)) >> 12)) & ~((boundary) - 1 )); | ||||||
571 | } else | ||||||
572 | first_boundary = 0; | ||||||
573 | |||||||
574 | /* | ||||||
575 | * Increase next until it hits the first page of the previous segment. | ||||||
576 | * | ||||||
577 | * While loop checks the following: | ||||||
578 | * - next != NULL we have not reached the end of pgl | ||||||
579 | * - boundary == 0 || next >= first_boundary | ||||||
580 | * we do not cross a boundary | ||||||
581 | * - atop(prev) - 1 == atop(next) | ||||||
582 | * still in the same segment | ||||||
583 | * - low <= last | ||||||
584 | * - high > last still in the same memory range | ||||||
585 | * - memtype is equal allocator is unable to view different memtypes | ||||||
586 | * as part of the same segment | ||||||
587 | * - prev - 1 == next no array breakage occurs | ||||||
588 | */ | ||||||
589 | prev = pg; | ||||||
590 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||||
591 | while (next != NULL((void *)0) && | ||||||
592 | (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) >= first_boundary) && | ||||||
593 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) - 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
594 | pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
595 | pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||||
596 | uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) && | ||||||
597 | prev - 1 == next) { | ||||||
598 | prev = next; | ||||||
599 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||||
600 | } | ||||||
601 | |||||||
602 | /* | ||||||
603 | * Start of this segment. | ||||||
604 | */ | ||||||
605 | return prev; | ||||||
606 | } | ||||||
607 | |||||||
608 | /* | ||||||
609 | * Remove the first segment of contiguous pages from pgl. | ||||||
610 | * A segment ends if it crosses boundary (unless boundary = 0) or | ||||||
611 | * if it would enter a different uvm_pmemrange. | ||||||
612 | * | ||||||
613 | * Work: the page range that the caller is currently working with. | ||||||
614 | * May be null. | ||||||
615 | * | ||||||
616 | * If is_desperate is non-zero, the smallest segment is erased. Otherwise, | ||||||
617 | * the first segment is erased (which, if called by uvm_pmr_getpages(), | ||||||
618 | * probably is the smallest or very close to it). | ||||||
619 | */ | ||||||
620 | psize_t | ||||||
621 | uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary, | ||||||
622 | struct vm_page **work, int is_desperate) | ||||||
623 | { | ||||||
624 | struct vm_page *start, *end, *iter, *iter_end, *inserted, *lowest; | ||||||
625 | psize_t count; | ||||||
626 | struct uvm_pmemrange *pmr, *pmr_iter; | ||||||
627 | |||||||
628 | KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 628, "!TAILQ_EMPTY(pgl)" )); | ||||||
629 | |||||||
630 | /* | ||||||
631 | * Initialize to first page. | ||||||
632 | * Unless desperate scan finds a better candidate, this is what'll be | ||||||
633 | * erased. | ||||||
634 | */ | ||||||
635 | start = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||||
636 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12)); | ||||||
637 | end = uvm_pmr_findnextsegment(pmr, start, boundary); | ||||||
638 | |||||||
639 | /* | ||||||
640 | * If we are desperate, we _really_ want to get rid of the smallest | ||||||
641 | * element (rather than a close match to the smallest element). | ||||||
642 | */ | ||||||
643 | if (is_desperate) { | ||||||
644 | /* Linear search for smallest segment. */ | ||||||
645 | pmr_iter = pmr; | ||||||
646 | for (iter = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||||
647 | iter != NULL((void *)0) && start != end; | ||||||
648 | iter = TAILQ_NEXT(iter_end, pageq)((iter_end)->pageq.tqe_next)) { | ||||||
649 | /* | ||||||
650 | * Only update pmr if it doesn't match current | ||||||
651 | * iteration. | ||||||
652 | */ | ||||||
653 | if (pmr->low > atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12) || | ||||||
654 | pmr->high <= atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12)) { | ||||||
655 | pmr_iter = uvm_pmemrange_find(atop(((((iter)->phys_addr)) >> 12) | ||||||
656 | VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12)); | ||||||
657 | } | ||||||
658 | |||||||
659 | iter_end = uvm_pmr_findnextsegment(pmr_iter, iter, | ||||||
660 | boundary); | ||||||
661 | |||||||
662 | /* | ||||||
663 | * Current iteration is smaller than best match so | ||||||
664 | * far; update. | ||||||
665 | */ | ||||||
666 | if (VM_PAGE_TO_PHYS(iter_end)((iter_end)->phys_addr) - VM_PAGE_TO_PHYS(iter)((iter)->phys_addr) < | ||||||
667 | VM_PAGE_TO_PHYS(end)((end)->phys_addr) - VM_PAGE_TO_PHYS(start)((start)->phys_addr)) { | ||||||
668 | start = iter; | ||||||
669 | end = iter_end; | ||||||
670 | pmr = pmr_iter; | ||||||
671 | } | ||||||
672 | } | ||||||
673 | } | ||||||
674 | |||||||
675 | /* | ||||||
676 | * Calculate count and end of the list. | ||||||
677 | */ | ||||||
678 | count = atop(VM_PAGE_TO_PHYS(end) - VM_PAGE_TO_PHYS(start))((((end)->phys_addr) - ((start)->phys_addr)) >> 12 ) + 1; | ||||||
679 | lowest = start; | ||||||
680 | end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||||
681 | |||||||
682 | /* | ||||||
683 | * Actually remove the range of pages. | ||||||
684 | * | ||||||
685 | * Sadly, this cannot be done using pointer iteration: | ||||||
686 | * vm_physseg is not guaranteed to be sorted on address, hence | ||||||
687 | * uvm_page_init() may not have initialized its array sorted by | ||||||
688 | * page number. | ||||||
689 | */ | ||||||
690 | for (iter = start; iter != end; iter = iter_end) { | ||||||
691 | iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next); | ||||||
692 | TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)-> pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev ; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter )->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)-> pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) = ((void *)-1); } while (0); | ||||||
693 | } | ||||||
694 | |||||||
695 | lowest->fpgsz = count; | ||||||
696 | inserted = uvm_pmr_insert(pmr, lowest, 0); | ||||||
697 | |||||||
698 | /* | ||||||
699 | * If the caller was working on a range and this function modified | ||||||
700 | * that range, update the pointer. | ||||||
701 | */ | ||||||
702 | if (work != NULL((void *)0) && *work != NULL((void *)0) && | ||||||
703 | atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) <= atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12) && | ||||||
704 | atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) + inserted->fpgsz > | ||||||
705 | atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12)) | ||||||
706 | *work = inserted; | ||||||
707 | return count; | ||||||
708 | } | ||||||
709 | |||||||
710 | /* | ||||||
711 | * Remove the first segment of contiguous pages from a pgl | ||||||
712 | * with the list elements in reverse order of physaddr. | ||||||
713 | * | ||||||
714 | * A segment ends if it would enter a different uvm_pmemrange. | ||||||
715 | * | ||||||
716 | * Stores starting physical address of the segment in pstart. | ||||||
717 | */ | ||||||
718 | psize_t | ||||||
719 | uvm_pmr_remove_1strange_reverse(struct pglist *pgl, paddr_t *pstart) | ||||||
720 | { | ||||||
721 | struct vm_page *start, *end, *iter, *iter_end, *lowest; | ||||||
722 | psize_t count; | ||||||
723 | struct uvm_pmemrange *pmr; | ||||||
724 | |||||||
725 | KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 725, "!TAILQ_EMPTY(pgl)" )); | ||||||
726 | |||||||
727 | start = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||||
728 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12)); | ||||||
729 | end = uvm_pmr_findprevsegment(pmr, start, 0); | ||||||
730 | |||||||
731 | KASSERT(end <= start)((end <= start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 731, "end <= start")); | ||||||
732 | |||||||
733 | /* | ||||||
734 | * Calculate count and end of the list. | ||||||
735 | */ | ||||||
736 | count = atop(VM_PAGE_TO_PHYS(start) - VM_PAGE_TO_PHYS(end))((((start)->phys_addr) - ((end)->phys_addr)) >> 12 ) + 1; | ||||||
737 | lowest = end; | ||||||
738 | end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||||
739 | |||||||
740 | /* | ||||||
741 | * Actually remove the range of pages. | ||||||
742 | * | ||||||
743 | * Sadly, this cannot be done using pointer iteration: | ||||||
744 | * vm_physseg is not guaranteed to be sorted on address, hence | ||||||
745 | * uvm_page_init() may not have initialized its array sorted by | ||||||
746 | * page number. | ||||||
747 | */ | ||||||
748 | for (iter = start; iter != end; iter = iter_end) { | ||||||
749 | iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next); | ||||||
750 | TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)-> pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev ; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter )->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)-> pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) = ((void *)-1); } while (0); | ||||||
751 | } | ||||||
752 | |||||||
753 | lowest->fpgsz = count; | ||||||
754 | (void) uvm_pmr_insert(pmr, lowest, 0); | ||||||
755 | |||||||
756 | *pstart = VM_PAGE_TO_PHYS(lowest)((lowest)->phys_addr); | ||||||
757 | return count; | ||||||
758 | } | ||||||
759 | |||||||
760 | /* | ||||||
761 | * Extract a number of pages from a segment of free pages. | ||||||
762 | * Called by uvm_pmr_getpages. | ||||||
763 | * | ||||||
764 | * Returns the segment that was created from pages left over at the tail | ||||||
765 | * of the remove set of pages, or NULL if no pages were left at the tail. | ||||||
766 | */ | ||||||
767 | struct vm_page * | ||||||
768 | uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg, | ||||||
769 | paddr_t start, paddr_t end, struct pglist *result) | ||||||
770 | { | ||||||
771 | struct vm_page *after, *pg_i; | ||||||
772 | psize_t before_sz, after_sz; | ||||||
773 | #ifdef DEBUG | ||||||
774 | psize_t i; | ||||||
775 | #endif | ||||||
776 | |||||||
777 | KDASSERT(end > start)((void)0); | ||||||
778 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||||
779 | KDASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz)((void)0); | ||||||
780 | KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start)((void)0); | ||||||
781 | KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz >= end)((void)0); | ||||||
782 | |||||||
783 | before_sz = start - atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12); | ||||||
784 | after_sz = atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz - end; | ||||||
785 | KDASSERT(before_sz + after_sz + (end - start) == pg->fpgsz)((void)0); | ||||||
786 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
787 | |||||||
788 | uvm_pmr_remove_size(pmr, pg); | ||||||
789 | if (before_sz == 0) | ||||||
790 | uvm_pmr_remove_addr(pmr, pg); | ||||||
791 | after = pg + before_sz + (end - start); | ||||||
792 | |||||||
793 | /* Add selected pages to result. */ | ||||||
794 | for (pg_i = pg + before_sz; pg_i != after; pg_i++) { | ||||||
795 | KASSERT(pg_i->pg_flags & PQ_FREE)((pg_i->pg_flags & 0x00010000) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 795, "pg_i->pg_flags & PQ_FREE" )); | ||||||
796 | pg_i->fpgsz = 0; | ||||||
797 | TAILQ_INSERT_TAIL(result, pg_i, pageq)do { (pg_i)->pageq.tqe_next = ((void *)0); (pg_i)->pageq .tqe_prev = (result)->tqh_last; *(result)->tqh_last = ( pg_i); (result)->tqh_last = &(pg_i)->pageq.tqe_next ; } while (0); | ||||||
798 | } | ||||||
799 | |||||||
800 | /* Before handling. */ | ||||||
801 | if (before_sz > 0) { | ||||||
802 | pg->fpgsz = before_sz; | ||||||
803 | uvm_pmr_insert_size(pmr, pg); | ||||||
804 | } | ||||||
805 | |||||||
806 | /* After handling. */ | ||||||
807 | if (after_sz > 0) { | ||||||
808 | #ifdef DEBUG | ||||||
809 | for (i = 0; i < after_sz; i++) { | ||||||
810 | KASSERT(!uvm_pmr_isfree(after + i))((!uvm_pmr_isfree(after + i)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 810, "!uvm_pmr_isfree(after + i)" )); | ||||||
811 | } | ||||||
812 | #endif | ||||||
813 | KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end)((void)0); | ||||||
814 | after->fpgsz = after_sz; | ||||||
815 | after = uvm_pmr_insert_addr(pmr, after, 1); | ||||||
816 | uvm_pmr_insert_size(pmr, after); | ||||||
817 | } | ||||||
818 | |||||||
819 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
820 | return (after_sz > 0 ? after : NULL((void *)0)); | ||||||
821 | } | ||||||
822 | |||||||
823 | /* | ||||||
824 | * Indicate to the page daemon that a nowait call failed and it should | ||||||
825 | * recover at least some memory in the most restricted region (assumed | ||||||
826 | * to be dma_constraint). | ||||||
827 | */ | ||||||
828 | extern volatile int uvm_nowait_failed; | ||||||
829 | |||||||
830 | /* | ||||||
831 | * Acquire a number of pages. | ||||||
832 | * | ||||||
833 | * count: the number of pages returned | ||||||
834 | * start: lowest page number | ||||||
835 | * end: highest page number +1 | ||||||
836 | * (start = end = 0: no limitation) | ||||||
837 | * align: power-of-2 alignment constraint (align = 1: no alignment) | ||||||
838 | * boundary: power-of-2 boundary (boundary = 0: no boundary) | ||||||
839 | * maxseg: maximum number of segments to return | ||||||
840 | * flags: UVM_PLA_* flags | ||||||
841 | * result: returned pages storage (uses pageq) | ||||||
842 | */ | ||||||
843 | int | ||||||
844 | uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align, | ||||||
845 | paddr_t boundary, int maxseg, int flags, struct pglist *result) | ||||||
846 | { | ||||||
847 | struct uvm_pmemrange *pmr; /* Iterate memory ranges. */ | ||||||
848 | struct vm_page *found, *f_next; /* Iterate chunks. */ | ||||||
849 | psize_t fcount; /* Current found pages. */ | ||||||
850 | int fnsegs; /* Current segment counter. */ | ||||||
851 | int try, start_try; | ||||||
852 | psize_t search[3]; | ||||||
853 | paddr_t fstart, fend; /* Pages to be taken from found. */ | ||||||
854 | int memtype; /* Requested memtype. */ | ||||||
855 | int memtype_init; /* Best memtype. */ | ||||||
856 | int desperate; /* True if allocation failed. */ | ||||||
857 | #ifdef DIAGNOSTIC1 | ||||||
858 | struct vm_page *diag_prev; /* Used during validation. */ | ||||||
859 | #endif /* DIAGNOSTIC */ | ||||||
860 | |||||||
861 | /* | ||||||
862 | * Validate arguments. | ||||||
863 | */ | ||||||
864 | KASSERT(count > 0)((count > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 864, "count > 0")); | ||||||
| |||||||
865 | KASSERT(start == 0 || end == 0 || start < end)((start == 0 || end == 0 || start < end) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 865, "start == 0 || end == 0 || start < end" )); | ||||||
866 | KASSERT(align >= 1)((align >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 866, "align >= 1")); | ||||||
867 | KASSERT(powerof2(align))((((((align)-1)&(align))==0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 867, "powerof2(align)") ); | ||||||
868 | KASSERT(maxseg > 0)((maxseg > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 868, "maxseg > 0")); | ||||||
869 | KASSERT(boundary == 0 || powerof2(boundary))((boundary == 0 || ((((boundary)-1)&(boundary))==0)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 869, "boundary == 0 || powerof2(boundary)")); | ||||||
870 | KASSERT(boundary == 0 || maxseg * boundary >= count)((boundary == 0 || maxseg * boundary >= count) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 870 , "boundary == 0 || maxseg * boundary >= count")); | ||||||
871 | KASSERT(TAILQ_EMPTY(result))(((((result)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 871, "TAILQ_EMPTY(result)" )); | ||||||
872 | KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT))((!(flags & 0x0001) ^ !(flags & 0x0002)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 872, "!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)" )); | ||||||
873 | |||||||
874 | /* | ||||||
875 | * TRYCONTIG is a noop if you only want a single segment. | ||||||
876 | * Remove it if that's the case: otherwise it'll deny the fast | ||||||
877 | * allocation. | ||||||
878 | */ | ||||||
879 | if (maxseg == 1 || count == 1) | ||||||
880 | flags &= ~UVM_PLA_TRYCONTIG0x0008; | ||||||
881 | |||||||
882 | /* | ||||||
883 | * Configure search. | ||||||
884 | * | ||||||
885 | * search[0] is one segment, only used in UVM_PLA_TRYCONTIG case. | ||||||
886 | * search[1] is multiple segments, chosen to fulfill the search in | ||||||
887 | * approximately even-sized segments. | ||||||
888 | * This is a good trade-off between slightly reduced allocation speed | ||||||
889 | * and less fragmentation. | ||||||
890 | * search[2] is the worst case, in which all segments are evaluated. | ||||||
891 | * This provides the least fragmentation, but makes the search | ||||||
892 | * possibly longer (although in the case it is selected, that no | ||||||
893 | * longer matters most). | ||||||
894 | * | ||||||
895 | * The exception is when maxseg == 1: since we can only fulfill that | ||||||
896 | * with one segment of size pages, only a single search type has to | ||||||
897 | * be attempted. | ||||||
898 | */ | ||||||
899 | if (maxseg
| ||||||
900 | start_try = 2; | ||||||
901 | search[2] = count; | ||||||
902 | } else if (maxseg >= count && (flags & UVM_PLA_TRYCONTIG0x0008) == 0) { | ||||||
903 | start_try = 2; | ||||||
904 | search[2] = 1; | ||||||
905 | } else { | ||||||
906 | start_try = 0; | ||||||
907 | search[0] = count; | ||||||
908 | search[1] = pow2divide(count, maxseg); | ||||||
909 | search[2] = 1; | ||||||
910 | if ((flags & UVM_PLA_TRYCONTIG0x0008) == 0) | ||||||
911 | start_try = 1; | ||||||
912 | if (search[1] >= search[0]) { | ||||||
913 | search[1] = search[0]; | ||||||
914 | start_try = 1; | ||||||
915 | } | ||||||
916 | if (search[2] >= search[start_try]) { | ||||||
917 | start_try = 2; | ||||||
918 | } | ||||||
919 | } | ||||||
920 | |||||||
921 | /* | ||||||
922 | * Memory type: if zeroed memory is requested, traverse the zero set. | ||||||
923 | * Otherwise, traverse the dirty set. | ||||||
924 | * | ||||||
925 | * The memtype iterator is reinitialized to memtype_init on entrance | ||||||
926 | * of a pmemrange. | ||||||
927 | */ | ||||||
928 | if (flags & UVM_PLA_ZERO0x0004) | ||||||
929 | memtype_init = UVM_PMR_MEMTYPE_ZERO1; | ||||||
930 | else | ||||||
931 | memtype_init = UVM_PMR_MEMTYPE_DIRTY0; | ||||||
932 | |||||||
933 | /* | ||||||
934 | * Initially, we're not desperate. | ||||||
935 | * | ||||||
936 | * Note that if we return from a sleep, we are still desperate. | ||||||
937 | * Chances are that memory pressure is still high, so resetting | ||||||
938 | * seems over-optimistic to me. | ||||||
939 | */ | ||||||
940 | desperate = 0; | ||||||
941 | |||||||
942 | again: | ||||||
943 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
944 | |||||||
945 | /* | ||||||
946 | * check to see if we need to generate some free pages waking | ||||||
947 | * the pagedaemon. | ||||||
948 | */ | ||||||
949 | if ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)) < uvmexp.freemin || | ||||||
950 | ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)) < uvmexp.freetarg && | ||||||
951 | (uvmexp.inactive + BUFPAGES_INACT(((bcstats.numcleanpages - buflowpages) < 0) ? 0 : bcstats .numcleanpages - buflowpages)) < uvmexp.inactarg)) | ||||||
952 | wakeup(&uvm.pagedaemon); | ||||||
953 | |||||||
954 | /* | ||||||
955 | * fail if any of these conditions is true: | ||||||
956 | * [1] there really are no free pages, or | ||||||
957 | * [2] only kernel "reserved" pages remain and | ||||||
958 | * the UVM_PLA_USERESERVE flag wasn't used. | ||||||
959 | * [3] only pagedaemon "reserved" pages remain and | ||||||
960 | * the requestor isn't the pagedaemon nor the syncer. | ||||||
961 | */ | ||||||
962 | if ((uvmexp.free <= uvmexp.reserve_kernel) && | ||||||
963 | !(flags & UVM_PLA_USERESERVE0x0040)) { | ||||||
964 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
965 | return ENOMEM12; | ||||||
966 | } | ||||||
967 | |||||||
968 | if ((uvmexp.free <= (uvmexp.reserve_pagedaemon + count)) && | ||||||
969 | (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc != uvm.pagedaemon_proc) && (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc != syncerproc)) { | ||||||
970 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
971 | if (flags & UVM_PLA_WAITOK0x0001) { | ||||||
972 | uvm_wait("uvm_pmr_getpages"); | ||||||
973 | goto again; | ||||||
974 | } | ||||||
975 | return ENOMEM12; | ||||||
976 | } | ||||||
977 | |||||||
978 | retry: /* Return point after sleeping. */ | ||||||
979 | fcount = 0; | ||||||
980 | fnsegs = 0; | ||||||
981 | |||||||
982 | retry_desperate: | ||||||
983 | /* | ||||||
984 | * If we just want any page(s), go for the really fast option. | ||||||
985 | */ | ||||||
986 | if (count
| ||||||
987 | (flags & UVM_PLA_TRYCONTIG0x0008) == 0) { | ||||||
988 | fcount += uvm_pmr_get1page(count - fcount, memtype_init, | ||||||
989 | result, start, end, 0); | ||||||
990 | |||||||
991 | /* | ||||||
992 | * If we found sufficient pages, go to the success exit code. | ||||||
993 | * | ||||||
994 | * Otherwise, go immediately to fail, since we collected | ||||||
995 | * all we could anyway. | ||||||
996 | */ | ||||||
997 | if (fcount == count) | ||||||
998 | goto out; | ||||||
999 | else | ||||||
1000 | goto fail; | ||||||
1001 | } | ||||||
1002 | |||||||
1003 | /* | ||||||
1004 | * The heart of the contig case. | ||||||
1005 | * | ||||||
1006 | * The code actually looks like this: | ||||||
1007 | * | ||||||
1008 | * foreach (struct pmemrange) { | ||||||
1009 | * foreach (memtype) { | ||||||
1010 | * foreach(try) { | ||||||
1011 | * foreach (free range of memtype in pmemrange, | ||||||
1012 | * starting at search[try]) { | ||||||
1013 | * while (range has space left) | ||||||
1014 | * take from range | ||||||
1015 | * } | ||||||
1016 | * } | ||||||
1017 | * } | ||||||
1018 | * | ||||||
1019 | * if next pmemrange has higher usecount than current: | ||||||
1020 | * enter desperate case (which will drain the pmemranges | ||||||
1021 | * until empty prior to moving to the next one) | ||||||
1022 | * } | ||||||
1023 | * | ||||||
1024 | * When desperate is activated, try always starts at the highest | ||||||
1025 | * value. The memtype loop is using a goto ReScanMemtype. | ||||||
1026 | * The try loop is using a goto ReScan. | ||||||
1027 | * The 'range has space left' loop uses label DrainFound. | ||||||
1028 | * | ||||||
1029 | * Writing them all as loops would take up a lot of screen space in | ||||||
1030 | * the form of indentation and some parts are easier to express | ||||||
1031 | * using the labels. | ||||||
1032 | */ | ||||||
1033 | |||||||
1034 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||||
1035 | /* Empty range. */ | ||||||
1036 | if (pmr->nsegs == 0) | ||||||
1037 | continue; | ||||||
1038 | |||||||
1039 | /* Outside requested range. */ | ||||||
1040 | if (!PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end ) == 0 || (pmr->low) < (end)))) | ||||||
1041 | continue; | ||||||
1042 | |||||||
1043 | memtype = memtype_init; | ||||||
1044 | |||||||
1045 | rescan_memtype: /* Return point at memtype++. */ | ||||||
1046 | try = start_try; | ||||||
1047 | |||||||
1048 | rescan: /* Return point at try++. */ | ||||||
1049 | for (found = uvm_pmr_nfindsz(pmr, search[try], memtype); | ||||||
1050 | found != NULL((void *)0); | ||||||
1051 | found = f_next) { | ||||||
1052 | f_next = uvm_pmr_nextsz(pmr, found, memtype); | ||||||
1053 | |||||||
1054 | fstart = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12); | ||||||
1055 | if (start != 0) | ||||||
1056 | fstart = MAX(start, fstart)(((start)>(fstart))?(start):(fstart)); | ||||||
1057 | drain_found: | ||||||
1058 | /* | ||||||
1059 | * Throw away the first segment if fnsegs == maxseg | ||||||
1060 | * | ||||||
1061 | * Note that f_next is still valid after this call, | ||||||
1062 | * since we only allocated from entries before f_next. | ||||||
1063 | * We don't revisit the entries we already extracted | ||||||
1064 | * from unless we entered the desperate case. | ||||||
1065 | */ | ||||||
1066 | if (fnsegs == maxseg) { | ||||||
1067 | fnsegs--; | ||||||
1068 | fcount -= | ||||||
1069 | uvm_pmr_remove_1strange(result, boundary, | ||||||
1070 | &found, desperate); | ||||||
1071 | } | ||||||
1072 | |||||||
1073 | fstart = PMR_ALIGN(fstart, align)(((fstart) + ((align) - 1)) & ~((align) - 1)); | ||||||
1074 | fend = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + found->fpgsz; | ||||||
1075 | if (end != 0) | ||||||
1076 | fend = MIN(end, fend)(((end)<(fend))?(end):(fend)); | ||||||
1077 | if (boundary != 0) { | ||||||
1078 | fend = | ||||||
1079 | MIN(fend, PMR_ALIGN(fstart + 1, boundary))(((fend)<((((fstart + 1) + ((boundary) - 1)) & ~((boundary ) - 1))))?(fend):((((fstart + 1) + ((boundary) - 1)) & ~( (boundary) - 1)))); | ||||||
1080 | } | ||||||
1081 | if (fstart >= fend) | ||||||
1082 | continue; | ||||||
1083 | if (fend - fstart > count - fcount) | ||||||
1084 | fend = fstart + (count - fcount); | ||||||
1085 | |||||||
1086 | fcount += fend - fstart; | ||||||
1087 | fnsegs++; | ||||||
1088 | found = uvm_pmr_extract_range(pmr, found, | ||||||
1089 | fstart, fend, result); | ||||||
1090 | |||||||
1091 | if (fcount == count) | ||||||
1092 | goto out; | ||||||
1093 | |||||||
1094 | /* | ||||||
1095 | * If there's still space left in found, try to | ||||||
1096 | * fully drain it prior to continuing. | ||||||
1097 | */ | ||||||
1098 | if (found != NULL((void *)0)) { | ||||||
1099 | fstart = fend; | ||||||
1100 | goto drain_found; | ||||||
1101 | } | ||||||
1102 | } | ||||||
1103 | |||||||
1104 | /* Try a smaller search now. */ | ||||||
1105 | if (++try < nitems(search)(sizeof((search)) / sizeof((search)[0]))) | ||||||
1106 | goto rescan; | ||||||
1107 | |||||||
1108 | /* | ||||||
1109 | * Exhaust all memory types prior to going to the next memory | ||||||
1110 | * segment. | ||||||
1111 | * This means that zero-vs-dirty are eaten prior to moving | ||||||
1112 | * to a pmemrange with a higher use-count. | ||||||
1113 | * | ||||||
1114 | * Code is basically a difficult way of writing: | ||||||
1115 | * memtype = memtype_init; | ||||||
1116 | * do { | ||||||
1117 | * ...; | ||||||
1118 | * memtype += 1; | ||||||
1119 | * memtype %= MEMTYPE_MAX; | ||||||
1120 | * } while (memtype != memtype_init); | ||||||
1121 | */ | ||||||
1122 | memtype += 1; | ||||||
1123 | if (memtype == UVM_PMR_MEMTYPE_MAX2) | ||||||
1124 | memtype = 0; | ||||||
1125 | if (memtype != memtype_init) | ||||||
1126 | goto rescan_memtype; | ||||||
1127 | |||||||
1128 | /* | ||||||
1129 | * If not desperate, enter desperate case prior to eating all | ||||||
1130 | * the good stuff in the next range. | ||||||
1131 | */ | ||||||
1132 | if (!desperate && TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next) != NULL((void *)0) && | ||||||
1133 | TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next)->use != pmr->use) | ||||||
1134 | break; | ||||||
1135 | } | ||||||
1136 | |||||||
1137 | /* | ||||||
1138 | * Not enough memory of the requested type available. Fall back to | ||||||
1139 | * less good memory that we'll clean up better later. | ||||||
1140 | * | ||||||
1141 | * This algorithm is not very smart though, it just starts scanning | ||||||
1142 | * a different typed range, but the nicer ranges of the previous | ||||||
1143 | * iteration may fall out. Hence there is a small chance of a false | ||||||
1144 | * negative. | ||||||
1145 | * | ||||||
1146 | * When desperate: scan all sizes starting at the smallest | ||||||
1147 | * (start_try = 1) and do not consider UVM_PLA_TRYCONTIG (which may | ||||||
1148 | * allow us to hit the fast path now). | ||||||
1149 | * | ||||||
1150 | * Also, because we will revisit entries we scanned before, we need | ||||||
1151 | * to reset the page queue, or we may end up releasing entries in | ||||||
1152 | * such a way as to invalidate f_next. | ||||||
1153 | */ | ||||||
1154 | if (!desperate
| ||||||
1155 | desperate = 1; | ||||||
1156 | start_try = nitems(search)(sizeof((search)) / sizeof((search)[0])) - 1; | ||||||
1157 | flags &= ~UVM_PLA_TRYCONTIG0x0008; | ||||||
1158 | |||||||
1159 | while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0))) | ||||||
1160 | uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0); | ||||||
1161 | fnsegs = 0; | ||||||
1162 | fcount = 0; | ||||||
1163 | goto retry_desperate; | ||||||
1164 | } | ||||||
1165 | |||||||
1166 | fail: | ||||||
1167 | /* Allocation failed. */ | ||||||
1168 | /* XXX: claim from memory reserve here */ | ||||||
1169 | |||||||
1170 | while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0))) | ||||||
1171 | uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0); | ||||||
1172 | |||||||
1173 | if (flags & UVM_PLA_WAITOK0x0001) { | ||||||
1174 | if (uvm_wait_pla(ptoa(start)((paddr_t)(start) << 12), ptoa(end)((paddr_t)(end) << 12) - 1, ptoa(count)((paddr_t)(count) << 12), | ||||||
1175 | flags & UVM_PLA_FAILOK0x0010) == 0) | ||||||
1176 | goto retry; | ||||||
1177 | KASSERT(flags & UVM_PLA_FAILOK)((flags & 0x0010) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1177, "flags & UVM_PLA_FAILOK")); | ||||||
1178 | } else { | ||||||
1179 | if (!(flags & UVM_PLA_NOWAKE0x0020)) { | ||||||
1180 | uvm_nowait_failed = 1; | ||||||
1181 | wakeup(&uvm.pagedaemon); | ||||||
1182 | } | ||||||
1183 | } | ||||||
1184 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1185 | |||||||
1186 | return ENOMEM12; | ||||||
1187 | |||||||
1188 | out: | ||||||
1189 | /* Allocation successful. */ | ||||||
1190 | uvmexp.free -= fcount; | ||||||
1191 | |||||||
1192 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1193 | |||||||
1194 | /* Update statistics and zero pages if UVM_PLA_ZERO. */ | ||||||
1195 | #ifdef DIAGNOSTIC1 | ||||||
1196 | fnsegs = 0; | ||||||
1197 | fcount = 0; | ||||||
1198 | diag_prev = NULL((void *)0); | ||||||
1199 | #endif /* DIAGNOSTIC */ | ||||||
1200 | TAILQ_FOREACH(found, result, pageq)for((found) = ((result)->tqh_first); (found) != ((void *)0 ); (found) = ((found)->pageq.tqe_next)) { | ||||||
1201 | atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_PMAPMASK0x3f000000); | ||||||
1202 | |||||||
1203 | if (found->pg_flags & PG_ZERO0x00000100) { | ||||||
1204 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1205 | uvmexp.zeropages--; | ||||||
1206 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||||
1207 | wakeup(&uvmexp.zeropages); | ||||||
1208 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1209 | } | ||||||
1210 | if (flags & UVM_PLA_ZERO0x0004) { | ||||||
1211 | if (found->pg_flags & PG_ZERO0x00000100) | ||||||
1212 | uvmexp.pga_zerohit++; | ||||||
1213 | else { | ||||||
1214 | uvmexp.pga_zeromiss++; | ||||||
1215 | uvm_pagezero(found); | ||||||
1216 | } | ||||||
1217 | } | ||||||
1218 | atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_ZERO0x00000100|PQ_FREE0x00010000); | ||||||
1219 | |||||||
1220 | found->uobject = NULL((void *)0); | ||||||
1221 | found->uanon = NULL((void *)0); | ||||||
1222 | found->pg_version++; | ||||||
1223 | |||||||
1224 | /* | ||||||
1225 | * Validate that the page matches range criterium. | ||||||
1226 | */ | ||||||
1227 | KDASSERT(start == 0 || atop(VM_PAGE_TO_PHYS(found)) >= start)((void)0); | ||||||
1228 | KDASSERT(end == 0 || atop(VM_PAGE_TO_PHYS(found)) < end)((void)0); | ||||||
1229 | |||||||
1230 | #ifdef DIAGNOSTIC1 | ||||||
1231 | /* | ||||||
1232 | * Update fcount (# found pages) and | ||||||
1233 | * fnsegs (# found segments) counters. | ||||||
1234 | */ | ||||||
1235 | if (diag_prev == NULL((void *)0) || | ||||||
1236 | /* new segment if it contains a hole */ | ||||||
1237 | atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) + 1 != | ||||||
1238 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) || | ||||||
1239 | /* new segment if it crosses boundary */ | ||||||
1240 | (atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) & ~(boundary - 1)) != | ||||||
1241 | (atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) & ~(boundary - 1))) | ||||||
1242 | fnsegs++; | ||||||
1243 | fcount++; | ||||||
1244 | |||||||
1245 | diag_prev = found; | ||||||
1246 | #endif /* DIAGNOSTIC */ | ||||||
1247 | } | ||||||
1248 | |||||||
1249 | #ifdef DIAGNOSTIC1 | ||||||
1250 | /* | ||||||
1251 | * Panic on algorithm failure. | ||||||
1252 | */ | ||||||
1253 | if (fcount != count || fnsegs > maxseg) { | ||||||
1254 | panic("pmemrange allocation error: " | ||||||
1255 | "allocated %ld pages in %d segments, " | ||||||
1256 | "but request was %ld pages in %d segments", | ||||||
1257 | fcount, fnsegs, count, maxseg); | ||||||
1258 | } | ||||||
1259 | #endif /* DIAGNOSTIC */ | ||||||
1260 | |||||||
1261 | return 0; | ||||||
1262 | } | ||||||
1263 | |||||||
1264 | /* | ||||||
1265 | * Free a number of contig pages (invoked by uvm_page_init). | ||||||
1266 | */ | ||||||
1267 | void | ||||||
1268 | uvm_pmr_freepages(struct vm_page *pg, psize_t count) | ||||||
1269 | { | ||||||
1270 | struct uvm_pmemrange *pmr; | ||||||
1271 | psize_t i, pmr_count; | ||||||
1272 | struct vm_page *firstpg = pg; | ||||||
1273 | |||||||
1274 | for (i = 0; i < count; i++) { | ||||||
1275 | KASSERT(atop(VM_PAGE_TO_PHYS(&pg[i])) ==((((((&pg[i])->phys_addr)) >> 12) == ((((pg)-> phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i" )) | ||||||
1276 | atop(VM_PAGE_TO_PHYS(pg)) + i)((((((&pg[i])->phys_addr)) >> 12) == ((((pg)-> phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i" )); | ||||||
1277 | |||||||
1278 | if (!((pg[i].pg_flags & PQ_FREE0x00010000) == 0 && | ||||||
1279 | VALID_FLAGS(pg[i].pg_flags)(((pg[i].pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))) { | ||||||
1280 | printf("Flags: 0x%x, will panic now.\n", | ||||||
1281 | pg[i].pg_flags); | ||||||
1282 | } | ||||||
1283 | KASSERT((pg[i].pg_flags & PQ_FREE) == 0 &&(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i]. pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)" )) | ||||||
1284 | VALID_FLAGS(pg[i].pg_flags))(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i]. pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)" )); | ||||||
1285 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PQ_FREE0x00010000); | ||||||
1286 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | ||||||
1287 | } | ||||||
1288 | |||||||
1289 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1290 | |||||||
1291 | for (i = count; i > 0; i -= pmr_count) { | ||||||
1292 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12)); | ||||||
1293 | KASSERT(pmr != NULL)((pmr != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1293, "pmr != NULL")); | ||||||
1294 | |||||||
1295 | pmr_count = MIN(i, pmr->high - atop(VM_PAGE_TO_PHYS(pg)))(((i)<(pmr->high - ((((pg)->phys_addr)) >> 12) ))?(i):(pmr->high - ((((pg)->phys_addr)) >> 12))); | ||||||
1296 | pg->fpgsz = pmr_count; | ||||||
1297 | uvm_pmr_insert(pmr, pg, 0); | ||||||
1298 | |||||||
1299 | uvmexp.free += pmr_count; | ||||||
1300 | pg += pmr_count; | ||||||
1301 | } | ||||||
1302 | wakeup(&uvmexp.free); | ||||||
1303 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||||
1304 | wakeup(&uvmexp.zeropages); | ||||||
1305 | |||||||
1306 | uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg)((firstpg)->phys_addr), ptoa(count)((paddr_t)(count) << 12)); | ||||||
1307 | |||||||
1308 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1309 | } | ||||||
1310 | |||||||
1311 | /* | ||||||
1312 | * Free all pages in the queue. | ||||||
1313 | */ | ||||||
1314 | void | ||||||
1315 | uvm_pmr_freepageq(struct pglist *pgl) | ||||||
1316 | { | ||||||
1317 | struct vm_page *pg; | ||||||
1318 | paddr_t pstart; | ||||||
1319 | psize_t plen; | ||||||
1320 | |||||||
1321 | TAILQ_FOREACH(pg, pgl, pageq)for((pg) = ((pgl)->tqh_first); (pg) != ((void *)0); (pg) = ((pg)->pageq.tqe_next)) { | ||||||
1322 | if (!((pg->pg_flags & PQ_FREE0x00010000) == 0 && | ||||||
1323 | VALID_FLAGS(pg->pg_flags)(((pg->pg_flags) & ~(0x00010000|0x00000100|0x3f000000) ) == 0x0))) { | ||||||
1324 | printf("Flags: 0x%x, will panic now.\n", | ||||||
1325 | pg->pg_flags); | ||||||
1326 | } | ||||||
1327 | KASSERT((pg->pg_flags & PQ_FREE) == 0 &&(((pg->pg_flags & 0x00010000) == 0 && (((pg-> pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)" )) | ||||||
1328 | VALID_FLAGS(pg->pg_flags))(((pg->pg_flags & 0x00010000) == 0 && (((pg-> pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)" )); | ||||||
1329 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PQ_FREE0x00010000); | ||||||
1330 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||||
1331 | } | ||||||
1332 | |||||||
1333 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1334 | while (!TAILQ_EMPTY(pgl)(((pgl)->tqh_first) == ((void *)0))) { | ||||||
1335 | pg = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||||
1336 | if (pg == TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) + 1) { | ||||||
1337 | /* | ||||||
1338 | * If pg is one behind the position of the | ||||||
1339 | * next page in the list in the page array, | ||||||
1340 | * try going backwards instead of forward. | ||||||
1341 | */ | ||||||
1342 | plen = uvm_pmr_remove_1strange_reverse(pgl, &pstart); | ||||||
1343 | } else { | ||||||
1344 | pstart = VM_PAGE_TO_PHYS(TAILQ_FIRST(pgl))((((pgl)->tqh_first))->phys_addr); | ||||||
1345 | plen = uvm_pmr_remove_1strange(pgl, 0, NULL((void *)0), 0); | ||||||
1346 | } | ||||||
1347 | uvmexp.free += plen; | ||||||
1348 | |||||||
1349 | uvm_wakeup_pla(pstart, ptoa(plen)((paddr_t)(plen) << 12)); | ||||||
1350 | } | ||||||
1351 | wakeup(&uvmexp.free); | ||||||
1352 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||||
1353 | wakeup(&uvmexp.zeropages); | ||||||
1354 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1355 | |||||||
1356 | return; | ||||||
1357 | } | ||||||
1358 | |||||||
1359 | /* | ||||||
1360 | * Store a pmemrange in the list. | ||||||
1361 | * | ||||||
1362 | * The list is sorted by use. | ||||||
1363 | */ | ||||||
1364 | struct uvm_pmemrange * | ||||||
1365 | uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq, | ||||||
1366 | struct uvm_pmemrange *pmr) | ||||||
1367 | { | ||||||
1368 | struct uvm_pmemrange *iter; | ||||||
1369 | int cmp = 1; | ||||||
1370 | |||||||
1371 | TAILQ_FOREACH(iter, useq, pmr_use)for((iter) = ((useq)->tqh_first); (iter) != ((void *)0); ( iter) = ((iter)->pmr_use.tqe_next)) { | ||||||
1372 | cmp = uvm_pmemrange_use_cmp(pmr, iter); | ||||||
1373 | if (cmp == 0) | ||||||
1374 | return iter; | ||||||
1375 | if (cmp == -1) | ||||||
1376 | break; | ||||||
1377 | } | ||||||
1378 | |||||||
1379 | if (iter == NULL((void *)0)) | ||||||
1380 | TAILQ_INSERT_TAIL(useq, pmr, pmr_use)do { (pmr)->pmr_use.tqe_next = ((void *)0); (pmr)->pmr_use .tqe_prev = (useq)->tqh_last; *(useq)->tqh_last = (pmr) ; (useq)->tqh_last = &(pmr)->pmr_use.tqe_next; } while (0); | ||||||
1381 | else | ||||||
1382 | TAILQ_INSERT_BEFORE(iter, pmr, pmr_use)do { (pmr)->pmr_use.tqe_prev = (iter)->pmr_use.tqe_prev ; (pmr)->pmr_use.tqe_next = (iter); *(iter)->pmr_use.tqe_prev = (pmr); (iter)->pmr_use.tqe_prev = &(pmr)->pmr_use .tqe_next; } while (0); | ||||||
1383 | return NULL((void *)0); | ||||||
1384 | } | ||||||
1385 | |||||||
1386 | #ifdef DEBUG | ||||||
1387 | /* | ||||||
1388 | * Validation of the whole pmemrange. | ||||||
1389 | * Called with fpageq locked. | ||||||
1390 | */ | ||||||
1391 | void | ||||||
1392 | uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0) | ||||||
1393 | { | ||||||
1394 | struct vm_page *prev, *next, *i, *xref; | ||||||
1395 | int lcv, mti; | ||||||
1396 | |||||||
1397 | /* Empty range */ | ||||||
1398 | if (pmr->nsegs == 0) | ||||||
1399 | return; | ||||||
1400 | |||||||
1401 | /* Validate address tree. */ | ||||||
1402 | RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != ( (void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i))) { | ||||||
1403 | /* Validate the range. */ | ||||||
1404 | KASSERT(i->fpgsz > 0)((i->fpgsz > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1404, "i->fpgsz > 0")); | ||||||
1405 | KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low)((((((i)->phys_addr)) >> 12) >= pmr->low) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1405, "atop(VM_PAGE_TO_PHYS(i)) >= pmr->low")); | ||||||
1406 | KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr ->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high" )) | ||||||
1407 | <= pmr->high)((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr ->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high" )); | ||||||
1408 | |||||||
1409 | /* Validate each page in this range. */ | ||||||
1410 | for (lcv = 0; lcv < i->fpgsz; lcv++) { | ||||||
1411 | /* | ||||||
1412 | * Only the first page has a size specification. | ||||||
1413 | * Rest is size 0. | ||||||
1414 | */ | ||||||
1415 | KASSERT(lcv == 0 || i[lcv].fpgsz == 0)((lcv == 0 || i[lcv].fpgsz == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1415, "lcv == 0 || i[lcv].fpgsz == 0" )); | ||||||
1416 | /* | ||||||
1417 | * Flag check. | ||||||
1418 | */ | ||||||
1419 | KASSERT(VALID_FLAGS(i[lcv].pg_flags) &&(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000 )) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000 ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE" )) | ||||||
1420 | (i[lcv].pg_flags & PQ_FREE) == PQ_FREE)(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000 )) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000 ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE" )); | ||||||
1421 | /* | ||||||
1422 | * Free pages are: | ||||||
1423 | * - not wired | ||||||
1424 | * - have no vm_anon | ||||||
1425 | * - have no uvm_object | ||||||
1426 | */ | ||||||
1427 | KASSERT(i[lcv].wire_count == 0)((i[lcv].wire_count == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 1427, "i[lcv].wire_count == 0" )); | ||||||
1428 | KASSERT(i[lcv].uanon == (void*)0xdeadbeef ||((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL" )) | ||||||
1429 | i[lcv].uanon == NULL)((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL" )); | ||||||
1430 | KASSERT(i[lcv].uobject == (void*)0xdeadbeef ||((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == (( void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL" )) | ||||||
1431 | i[lcv].uobject == NULL)((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == (( void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL" )); | ||||||
1432 | /* | ||||||
1433 | * Pages in a single range always have the same | ||||||
1434 | * memtype. | ||||||
1435 | */ | ||||||
1436 | KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(& i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])" )) | ||||||
1437 | uvm_pmr_pg_to_memtype(&i[lcv]))((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(& i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])" )); | ||||||
1438 | } | ||||||
1439 | |||||||
1440 | /* Check that it shouldn't be joined with its predecessor. */ | ||||||
1441 | prev = RBT_PREV(uvm_pmr_addr, i)uvm_pmr_addr_RBT_PREV(i); | ||||||
1442 | if (prev != NULL((void *)0)) { | ||||||
1443 | KASSERT(uvm_pmr_pg_to_memtype(i) !=((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||||
1444 | uvm_pmr_pg_to_memtype(prev) ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||||
1445 | atop(VM_PAGE_TO_PHYS(i)) >((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||||
1446 | atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||||
1447 | prev + prev->fpgsz != i)((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )); | ||||||
1448 | } | ||||||
1449 | |||||||
1450 | /* Assert i is in the size tree as well. */ | ||||||
1451 | if (i->fpgsz == 1) { | ||||||
1452 | TAILQ_FOREACH(xref,for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)]) ->tqh_first); (xref) != ((void *)0); (xref) = ((xref)-> pageq.tqe_next)) | ||||||
1453 | &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq)for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)]) ->tqh_first); (xref) != ((void *)0); (xref) = ((xref)-> pageq.tqe_next)) { | ||||||
1454 | if (xref == i) | ||||||
1455 | break; | ||||||
1456 | } | ||||||
1457 | KASSERT(xref == i)((xref == i) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1457, "xref == i")); | ||||||
1458 | } else { | ||||||
1459 | KASSERT(RBT_FIND(uvm_pmr_size,((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )) | ||||||
1460 | &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )) | ||||||
1461 | i + 1)((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )); | ||||||
1462 | } | ||||||
1463 | } | ||||||
1464 | |||||||
1465 | /* Validate size tree. */ | ||||||
1466 | for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) { | ||||||
1467 | for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL((void *)0); i = next) { | ||||||
1468 | next = uvm_pmr_nextsz(pmr, i, mti); | ||||||
1469 | if (next != NULL((void *)0)) { | ||||||
1470 | KASSERT(i->fpgsz <=((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz" )) | ||||||
1471 | next->fpgsz)((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz" )); | ||||||
1472 | } | ||||||
1473 | |||||||
1474 | /* Assert i is in the addr tree as well. */ | ||||||
1475 | KASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i)((uvm_pmr_addr_RBT_FIND(&pmr->addr, i) == i) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1475, "RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i")); | ||||||
1476 | |||||||
1477 | /* Assert i is of the correct memory type. */ | ||||||
1478 | KASSERT(uvm_pmr_pg_to_memtype(i) == mti)((uvm_pmr_pg_to_memtype(i) == mti) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1478, "uvm_pmr_pg_to_memtype(i) == mti" )); | ||||||
1479 | } | ||||||
1480 | } | ||||||
1481 | |||||||
1482 | /* Validate nsegs statistic. */ | ||||||
1483 | lcv = 0; | ||||||
1484 | RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != ( (void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i))) | ||||||
1485 | lcv++; | ||||||
1486 | KASSERT(pmr->nsegs == lcv)((pmr->nsegs == lcv) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1486, "pmr->nsegs == lcv")); | ||||||
1487 | } | ||||||
1488 | #endif /* DEBUG */ | ||||||
1489 | |||||||
1490 | /* | ||||||
1491 | * Split pmr at split point pageno. | ||||||
1492 | * Called with fpageq unlocked. | ||||||
1493 | * | ||||||
1494 | * Split is only applied if a pmemrange spans pageno. | ||||||
1495 | */ | ||||||
1496 | void | ||||||
1497 | uvm_pmr_split(paddr_t pageno) | ||||||
1498 | { | ||||||
1499 | struct uvm_pmemrange *pmr, *drain; | ||||||
1500 | struct vm_page *rebuild, *prev, *next; | ||||||
1501 | psize_t prev_sz; | ||||||
1502 | |||||||
1503 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1504 | pmr = uvm_pmemrange_find(pageno); | ||||||
1505 | if (pmr == NULL((void *)0) || !(pmr->low < pageno)) { | ||||||
1506 | /* No split required. */ | ||||||
1507 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1508 | return; | ||||||
1509 | } | ||||||
1510 | |||||||
1511 | KASSERT(pmr->low < pageno)((pmr->low < pageno) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1511, "pmr->low < pageno" )); | ||||||
1512 | KASSERT(pmr->high > pageno)((pmr->high > pageno) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1512, "pmr->high > pageno" )); | ||||||
1513 | |||||||
1514 | /* | ||||||
1515 | * uvm_pmr_allocpmr() calls into malloc() which in turn calls into | ||||||
1516 | * uvm_kmemalloc which calls into pmemrange, making the locking | ||||||
1517 | * a bit hard, so we just race! | ||||||
1518 | */ | ||||||
1519 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1520 | drain = uvm_pmr_allocpmr(); | ||||||
1521 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1522 | pmr = uvm_pmemrange_find(pageno); | ||||||
1523 | if (pmr == NULL((void *)0) || !(pmr->low < pageno)) { | ||||||
1524 | /* | ||||||
1525 | * We lost the race since someone else ran this or a related | ||||||
1526 | * function, however this should be triggered very rarely so | ||||||
1527 | * we just leak the pmr. | ||||||
1528 | */ | ||||||
1529 | printf("uvm_pmr_split: lost one pmr\n"); | ||||||
1530 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1531 | return; | ||||||
1532 | } | ||||||
1533 | |||||||
1534 | drain->low = pageno; | ||||||
1535 | drain->high = pmr->high; | ||||||
1536 | drain->use = pmr->use; | ||||||
1537 | |||||||
1538 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1539 | uvm_pmr_assertvalid(drain)do {} while (0); | ||||||
1540 | KASSERT(drain->nsegs == 0)((drain->nsegs == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1540, "drain->nsegs == 0")); | ||||||
1541 | |||||||
1542 | RBT_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr)for ((rebuild) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); ( rebuild) != ((void *)0); (rebuild) = uvm_pmr_addr_RBT_NEXT((rebuild ))) { | ||||||
1543 | if (atop(VM_PAGE_TO_PHYS(rebuild))((((rebuild)->phys_addr)) >> 12) >= pageno) | ||||||
1544 | break; | ||||||
1545 | } | ||||||
1546 | if (rebuild == NULL((void *)0)) | ||||||
1547 | prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||||
1548 | else | ||||||
1549 | prev = RBT_PREV(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_PREV(rebuild); | ||||||
1550 | KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno)((prev == ((void *)0) || ((((prev)->phys_addr)) >> 12 ) < pageno) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1550, "prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno" )); | ||||||
1551 | |||||||
1552 | /* | ||||||
1553 | * Handle free chunk that spans the split point. | ||||||
1554 | */ | ||||||
1555 | if (prev != NULL((void *)0) && | ||||||
1556 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev->fpgsz > pageno) { | ||||||
1557 | psize_t before, after; | ||||||
1558 | |||||||
1559 | KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno)((((((prev)->phys_addr)) >> 12) < pageno) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1559, "atop(VM_PAGE_TO_PHYS(prev)) < pageno")); | ||||||
1560 | |||||||
1561 | uvm_pmr_remove(pmr, prev); | ||||||
1562 | prev_sz = prev->fpgsz; | ||||||
1563 | before = pageno - atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12); | ||||||
1564 | after = atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev_sz - pageno; | ||||||
1565 | |||||||
1566 | KASSERT(before > 0)((before > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1566, "before > 0")); | ||||||
1567 | KASSERT(after > 0)((after > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1567, "after > 0")); | ||||||
1568 | |||||||
1569 | prev->fpgsz = before; | ||||||
1570 | uvm_pmr_insert(pmr, prev, 1); | ||||||
1571 | (prev + before)->fpgsz = after; | ||||||
1572 | uvm_pmr_insert(drain, prev + before, 1); | ||||||
1573 | } | ||||||
1574 | |||||||
1575 | /* Move free chunks that no longer fall in the range. */ | ||||||
1576 | for (; rebuild != NULL((void *)0); rebuild = next) { | ||||||
1577 | next = RBT_NEXT(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_NEXT(rebuild); | ||||||
1578 | |||||||
1579 | uvm_pmr_remove(pmr, rebuild); | ||||||
1580 | uvm_pmr_insert(drain, rebuild, 1); | ||||||
1581 | } | ||||||
1582 | |||||||
1583 | pmr->high = pageno; | ||||||
1584 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1585 | uvm_pmr_assertvalid(drain)do {} while (0); | ||||||
1586 | |||||||
1587 | RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, drain ); | ||||||
1588 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain); | ||||||
1589 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1590 | } | ||||||
1591 | |||||||
1592 | /* | ||||||
1593 | * Increase the usage counter for the given range of memory. | ||||||
1594 | * | ||||||
1595 | * The more usage counters a given range of memory has, the more will be | ||||||
1596 | * attempted not to allocate from it. | ||||||
1597 | * | ||||||
1598 | * Addresses here are in paddr_t, not page-numbers. | ||||||
1599 | * The lowest and highest allowed address are specified. | ||||||
1600 | */ | ||||||
1601 | void | ||||||
1602 | uvm_pmr_use_inc(paddr_t low, paddr_t high) | ||||||
1603 | { | ||||||
1604 | struct uvm_pmemrange *pmr; | ||||||
1605 | paddr_t sz; | ||||||
1606 | |||||||
1607 | /* pmr uses page numbers, translate low and high. */ | ||||||
1608 | high++; | ||||||
1609 | high = atop(trunc_page(high))((((high) & ~((1 << 12) - 1))) >> 12); | ||||||
1610 | low = atop(round_page(low))(((((low) + ((1 << 12) - 1)) & ~((1 << 12) - 1 ))) >> 12); | ||||||
1611 | uvm_pmr_split(low); | ||||||
1612 | uvm_pmr_split(high); | ||||||
1613 | |||||||
1614 | sz = 0; | ||||||
1615 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
1616 | /* Increase use count on segments in range. */ | ||||||
1617 | RBT_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr)for ((pmr) = uvm_pmemrange_addr_RBT_MIN((&uvm.pmr_control .addr)); (pmr) != ((void *)0); (pmr) = uvm_pmemrange_addr_RBT_NEXT ((pmr))) { | ||||||
1618 | if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)(((low) == 0 || (pmr->low) >= (low)) && ((high) == 0 || (pmr->high) <= (high)))) { | ||||||
1619 | TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use)do { if (((pmr)->pmr_use.tqe_next) != ((void *)0)) (pmr)-> pmr_use.tqe_next->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_prev ; else (&uvm.pmr_control.use)->tqh_last = (pmr)->pmr_use .tqe_prev; *(pmr)->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_next ; ((pmr)->pmr_use.tqe_prev) = ((void *)-1); ((pmr)->pmr_use .tqe_next) = ((void *)-1); } while (0); | ||||||
1620 | pmr->use++; | ||||||
1621 | sz += pmr->high - pmr->low; | ||||||
1622 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr); | ||||||
1623 | } | ||||||
1624 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1625 | } | ||||||
1626 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
1627 | |||||||
1628 | KASSERT(sz >= high - low)((sz >= high - low) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1628, "sz >= high - low")); | ||||||
1629 | } | ||||||
1630 | |||||||
1631 | /* | ||||||
1632 | * Allocate a pmemrange. | ||||||
1633 | * | ||||||
1634 | * If called from uvm_page_init, the uvm_pageboot_alloc is used. | ||||||
1635 | * If called after uvm_init, malloc is used. | ||||||
1636 | * (And if called in between, you're dead.) | ||||||
1637 | */ | ||||||
1638 | struct uvm_pmemrange * | ||||||
1639 | uvm_pmr_allocpmr(void) | ||||||
1640 | { | ||||||
1641 | struct uvm_pmemrange *nw; | ||||||
1642 | int i; | ||||||
1643 | |||||||
1644 | /* We're only ever hitting the !uvm.page_init_done case for now. */ | ||||||
1645 | if (!uvm.page_init_done) { | ||||||
1646 | nw = (struct uvm_pmemrange *) | ||||||
1647 | uvm_pageboot_alloc(sizeof(struct uvm_pmemrange)); | ||||||
1648 | } else { | ||||||
1649 | nw = malloc(sizeof(struct uvm_pmemrange), | ||||||
1650 | M_VMMAP30, M_NOWAIT0x0002); | ||||||
1651 | } | ||||||
1652 | KASSERT(nw != NULL)((nw != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1652, "nw != NULL")); | ||||||
1653 | memset(nw, 0, sizeof(struct uvm_pmemrange))__builtin_memset((nw), (0), (sizeof(struct uvm_pmemrange))); | ||||||
1654 | RBT_INIT(uvm_pmr_addr, &nw->addr)uvm_pmr_addr_RBT_INIT(&nw->addr); | ||||||
1655 | for (i = 0; i < UVM_PMR_MEMTYPE_MAX2; i++) { | ||||||
1656 | RBT_INIT(uvm_pmr_size, &nw->size[i])uvm_pmr_size_RBT_INIT(&nw->size[i]); | ||||||
1657 | TAILQ_INIT(&nw->single[i])do { (&nw->single[i])->tqh_first = ((void *)0); (& nw->single[i])->tqh_last = &(&nw->single[i]) ->tqh_first; } while (0); | ||||||
1658 | } | ||||||
1659 | return nw; | ||||||
1660 | } | ||||||
1661 | |||||||
1662 | /* | ||||||
1663 | * Initialization of pmr. | ||||||
1664 | * Called by uvm_page_init. | ||||||
1665 | * | ||||||
1666 | * Sets up pmemranges. | ||||||
1667 | */ | ||||||
1668 | void | ||||||
1669 | uvm_pmr_init(void) | ||||||
1670 | { | ||||||
1671 | struct uvm_pmemrange *new_pmr; | ||||||
1672 | int i; | ||||||
1673 | |||||||
1674 | TAILQ_INIT(&uvm.pmr_control.use)do { (&uvm.pmr_control.use)->tqh_first = ((void *)0); ( &uvm.pmr_control.use)->tqh_last = &(&uvm.pmr_control .use)->tqh_first; } while (0); | ||||||
1675 | RBT_INIT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_INIT(&uvm.pmr_control.addr); | ||||||
1676 | TAILQ_INIT(&uvm.pmr_control.allocs)do { (&uvm.pmr_control.allocs)->tqh_first = ((void *)0 ); (&uvm.pmr_control.allocs)->tqh_last = &(&uvm .pmr_control.allocs)->tqh_first; } while (0); | ||||||
1677 | |||||||
1678 | /* By default, one range for the entire address space. */ | ||||||
1679 | new_pmr = uvm_pmr_allocpmr(); | ||||||
1680 | new_pmr->low = 0; | ||||||
1681 | new_pmr->high = atop((paddr_t)-1)(((paddr_t)-1) >> 12) + 1; | ||||||
1682 | |||||||
1683 | RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, new_pmr ); | ||||||
1684 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr); | ||||||
1685 | |||||||
1686 | for (i = 0; uvm_md_constraints[i] != NULL((void *)0); i++) { | ||||||
1687 | uvm_pmr_use_inc(uvm_md_constraints[i]->ucr_low, | ||||||
1688 | uvm_md_constraints[i]->ucr_high); | ||||||
1689 | } | ||||||
1690 | } | ||||||
1691 | |||||||
1692 | /* | ||||||
1693 | * Find the pmemrange that contains the given page number. | ||||||
1694 | * | ||||||
1695 | * (Manually traverses the binary tree, because that is cheaper on stack | ||||||
1696 | * usage.) | ||||||
1697 | */ | ||||||
1698 | struct uvm_pmemrange * | ||||||
1699 | uvm_pmemrange_find(paddr_t pageno) | ||||||
1700 | { | ||||||
1701 | struct uvm_pmemrange *pmr; | ||||||
1702 | |||||||
1703 | pmr = RBT_ROOT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_ROOT(&uvm.pmr_control.addr); | ||||||
1704 | while (pmr != NULL((void *)0)) { | ||||||
1705 | if (pmr->low > pageno) | ||||||
1706 | pmr = RBT_LEFT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_LEFT(pmr); | ||||||
1707 | else if (pmr->high <= pageno) | ||||||
1708 | pmr = RBT_RIGHT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_RIGHT(pmr); | ||||||
1709 | else | ||||||
1710 | break; | ||||||
1711 | } | ||||||
1712 | |||||||
1713 | return pmr; | ||||||
1714 | } | ||||||
1715 | |||||||
1716 | #if defined(DDB1) || defined(DEBUG) | ||||||
1717 | /* | ||||||
1718 | * Return true if the given page is in any of the free lists. | ||||||
1719 | * Used by uvm_page_printit. | ||||||
1720 | * This function is safe, even if the page is not on the freeq. | ||||||
1721 | * Note: does not apply locking, only called from ddb. | ||||||
1722 | */ | ||||||
1723 | int | ||||||
1724 | uvm_pmr_isfree(struct vm_page *pg) | ||||||
1725 | { | ||||||
1726 | struct vm_page *r; | ||||||
1727 | struct uvm_pmemrange *pmr; | ||||||
1728 | |||||||
1729 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12)); | ||||||
1730 | if (pmr == NULL((void *)0)) | ||||||
1731 | return 0; | ||||||
1732 | r = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg); | ||||||
1733 | if (r == NULL((void *)0)) | ||||||
1734 | r = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||||
1735 | else if (r != pg) | ||||||
1736 | r = RBT_PREV(uvm_pmr_addr, r)uvm_pmr_addr_RBT_PREV(r); | ||||||
1737 | if (r == NULL((void *)0)) | ||||||
1738 | return 0; /* Empty tree. */ | ||||||
1739 | |||||||
1740 | KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||||
1741 | return atop(VM_PAGE_TO_PHYS(r))((((r)->phys_addr)) >> 12) + r->fpgsz > | ||||||
1742 | atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12); | ||||||
1743 | } | ||||||
1744 | #endif /* DEBUG */ | ||||||
1745 | |||||||
1746 | /* | ||||||
1747 | * Given a root of a tree, find a range which intersects start, end and | ||||||
1748 | * is of the same memtype. | ||||||
1749 | * | ||||||
1750 | * Page must be in the address tree. | ||||||
1751 | */ | ||||||
1752 | struct vm_page* | ||||||
1753 | uvm_pmr_rootupdate(struct uvm_pmemrange *pmr, struct vm_page *init_root, | ||||||
1754 | paddr_t start, paddr_t end, int memtype) | ||||||
1755 | { | ||||||
1756 | int direction; | ||||||
1757 | struct vm_page *root; | ||||||
1758 | struct vm_page *high, *high_next; | ||||||
1759 | struct vm_page *low, *low_next; | ||||||
1760 | |||||||
1761 | KDASSERT(pmr != NULL && init_root != NULL)((void)0); | ||||||
1762 | root = init_root; | ||||||
1763 | |||||||
1764 | /* Which direction to use for searching. */ | ||||||
1765 | if (start != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) + root->fpgsz <= start) | ||||||
1766 | direction = 1; | ||||||
1767 | else if (end != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) >= end) | ||||||
1768 | direction = -1; | ||||||
1769 | else /* nothing to do */ | ||||||
1770 | return root; | ||||||
1771 | |||||||
1772 | /* First, update root to fall within the chosen range. */ | ||||||
1773 | while (root && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||||
1774 | atop(VM_PAGE_TO_PHYS(root)),(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||||
1775 | atop(VM_PAGE_TO_PHYS(root)) + root->fpgsz,(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||||
1776 | start, end)(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end)))) { | ||||||
1777 | if (direction == 1) | ||||||
1778 | root = RBT_RIGHT(uvm_objtree, root)uvm_objtree_RBT_RIGHT(root); | ||||||
1779 | else | ||||||
1780 | root = RBT_LEFT(uvm_objtree, root)uvm_objtree_RBT_LEFT(root); | ||||||
1781 | } | ||||||
1782 | if (root == NULL((void *)0) || uvm_pmr_pg_to_memtype(root) == memtype) | ||||||
1783 | return root; | ||||||
1784 | |||||||
1785 | /* | ||||||
1786 | * Root is valid, but of the wrong memtype. | ||||||
1787 | * | ||||||
1788 | * Try to find a range that has the given memtype in the subtree | ||||||
1789 | * (memtype mismatches are costly, either because the conversion | ||||||
1790 | * is expensive, or a later allocation will need to do the opposite | ||||||
1791 | * conversion, which will be expensive). | ||||||
1792 | * | ||||||
1793 | * | ||||||
1794 | * First, simply increase address until we hit something we can use. | ||||||
1795 | * Cache the upper page, so we can page-walk later. | ||||||
1796 | */ | ||||||
1797 | high = root; | ||||||
1798 | high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high); | ||||||
1799 | while (high_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||||
1800 | atop(VM_PAGE_TO_PHYS(high_next)),(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||||
1801 | atop(VM_PAGE_TO_PHYS(high_next)) + high_next->fpgsz,(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||||
1802 | start, end)(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) )) { | ||||||
1803 | high = high_next; | ||||||
1804 | if (uvm_pmr_pg_to_memtype(high) == memtype) | ||||||
1805 | return high; | ||||||
1806 | high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high); | ||||||
1807 | } | ||||||
1808 | |||||||
1809 | /* | ||||||
1810 | * Second, decrease the address until we hit something we can use. | ||||||
1811 | * Cache the lower page, so we can page-walk later. | ||||||
1812 | */ | ||||||
1813 | low = root; | ||||||
1814 | low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low); | ||||||
1815 | while (low_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||||
1816 | atop(VM_PAGE_TO_PHYS(low_next)),(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||||
1817 | atop(VM_PAGE_TO_PHYS(low_next)) + low_next->fpgsz,(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||||
1818 | start, end)(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end)))) { | ||||||
1819 | low = low_next; | ||||||
1820 | if (uvm_pmr_pg_to_memtype(low) == memtype) | ||||||
1821 | return low; | ||||||
1822 | low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low); | ||||||
1823 | } | ||||||
1824 | |||||||
1825 | if (low == high) | ||||||
1826 | return NULL((void *)0); | ||||||
1827 | |||||||
1828 | /* No hits. Walk the address tree until we find something usable. */ | ||||||
1829 | for (low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low); | ||||||
1830 | low != high; | ||||||
1831 | low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low)) { | ||||||
1832 | KDASSERT(PMR_IS_SUBRANGE_OF(atop(VM_PAGE_TO_PHYS(low)),((void)0) | ||||||
1833 | atop(VM_PAGE_TO_PHYS(low)) + low->fpgsz,((void)0) | ||||||
1834 | start, end))((void)0); | ||||||
1835 | if (uvm_pmr_pg_to_memtype(low) == memtype) | ||||||
1836 | return low; | ||||||
1837 | } | ||||||
1838 | |||||||
1839 | /* Nothing found. */ | ||||||
1840 | return NULL((void *)0); | ||||||
1841 | } | ||||||
1842 | |||||||
1843 | /* | ||||||
1844 | * Allocate any page, the fastest way. Page number constraints only. | ||||||
1845 | */ | ||||||
1846 | psize_t | ||||||
1847 | uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result, | ||||||
1848 | paddr_t start, paddr_t end, int memtype_only) | ||||||
1849 | { | ||||||
1850 | struct uvm_pmemrange *pmr; | ||||||
1851 | struct vm_page *found, *splitpg; | ||||||
1852 | psize_t fcount; | ||||||
1853 | int memtype; | ||||||
1854 | |||||||
1855 | fcount = 0; | ||||||
1856 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||||
1857 | /* We're done. */ | ||||||
1858 | if (fcount == count) | ||||||
1859 | break; | ||||||
1860 | |||||||
1861 | /* Outside requested range. */ | ||||||
1862 | if (!(start == 0 && end == 0) && | ||||||
1863 | !PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end ) == 0 || (pmr->low) < (end)))) | ||||||
1864 | continue; | ||||||
1865 | |||||||
1866 | /* Range is empty. */ | ||||||
1867 | if (pmr->nsegs == 0) | ||||||
1868 | continue; | ||||||
1869 | |||||||
1870 | /* Loop over all memtypes, starting at memtype_init. */ | ||||||
1871 | memtype = memtype_init; | ||||||
1872 | while (fcount != count) { | ||||||
1873 | found = TAILQ_FIRST(&pmr->single[memtype])((&pmr->single[memtype])->tqh_first); | ||||||
1874 | /* | ||||||
1875 | * If found is outside the range, walk the list | ||||||
1876 | * until we find something that intersects with | ||||||
1877 | * boundaries. | ||||||
1878 | */ | ||||||
1879 | while (found && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||||
1880 | atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||||
1881 | atop(VM_PAGE_TO_PHYS(found)) + 1,(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||||
1882 | start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end)))) | ||||||
1883 | found = TAILQ_NEXT(found, pageq)((found)->pageq.tqe_next); | ||||||
1884 | |||||||
1885 | if (found == NULL((void *)0)) { | ||||||
1886 | /* | ||||||
1887 | * Check if the size tree contains a range | ||||||
1888 | * that intersects with the boundaries. As the | ||||||
1889 | * allocation is for any page, try the smallest | ||||||
1890 | * range so that large ranges are preserved for | ||||||
1891 | * more constrained cases. Only one entry is | ||||||
1892 | * checked here, to avoid a brute-force search. | ||||||
1893 | * | ||||||
1894 | * Note that a size tree gives pg[1] instead of | ||||||
1895 | * pg[0]. | ||||||
1896 | */ | ||||||
1897 | found = RBT_MIN(uvm_pmr_size,uvm_pmr_size_RBT_MIN(&pmr->size[memtype]) | ||||||
1898 | &pmr->size[memtype])uvm_pmr_size_RBT_MIN(&pmr->size[memtype]); | ||||||
1899 | if (found != NULL((void *)0)) { | ||||||
1900 | found--; | ||||||
1901 | if (!PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||||
1902 | atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||||
1903 | atop(VM_PAGE_TO_PHYS(found)) +(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||||
1904 | found->fpgsz, start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end)))) | ||||||
1905 | found = NULL((void *)0); | ||||||
1906 | } | ||||||
1907 | } | ||||||
1908 | if (found == NULL((void *)0)) { | ||||||
1909 | /* | ||||||
1910 | * Try address-guided search to meet the page | ||||||
1911 | * number constraints. | ||||||
1912 | */ | ||||||
1913 | found = RBT_ROOT(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_ROOT(&pmr->addr); | ||||||
1914 | if (found != NULL((void *)0)) { | ||||||
1915 | found = uvm_pmr_rootupdate(pmr, found, | ||||||
1916 | start, end, memtype); | ||||||
1917 | } | ||||||
1918 | } | ||||||
1919 | if (found != NULL((void *)0)) { | ||||||
1920 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1921 | uvm_pmr_remove_size(pmr, found); | ||||||
1922 | |||||||
1923 | /* | ||||||
1924 | * If the page intersects the end, then it'll | ||||||
1925 | * need splitting. | ||||||
1926 | * | ||||||
1927 | * Note that we don't need to split if the page | ||||||
1928 | * intersects start: the drain function will | ||||||
1929 | * simply stop on hitting start. | ||||||
1930 | */ | ||||||
1931 | if (end != 0 && atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||||
1932 | found->fpgsz > end) { | ||||||
1933 | psize_t splitsz = | ||||||
1934 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||||
1935 | found->fpgsz - end; | ||||||
1936 | |||||||
1937 | uvm_pmr_remove_addr(pmr, found); | ||||||
1938 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1939 | found->fpgsz -= splitsz; | ||||||
1940 | splitpg = found + found->fpgsz; | ||||||
1941 | splitpg->fpgsz = splitsz; | ||||||
1942 | uvm_pmr_insert(pmr, splitpg, 1); | ||||||
1943 | |||||||
1944 | /* | ||||||
1945 | * At this point, splitpg and found | ||||||
1946 | * actually should be joined. | ||||||
1947 | * But we explicitly disable that, | ||||||
1948 | * because we will start subtracting | ||||||
1949 | * from found. | ||||||
1950 | */ | ||||||
1951 | KASSERT(start == 0 ||((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )) | ||||||
1952 | atop(VM_PAGE_TO_PHYS(found)) +((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )) | ||||||
1953 | found->fpgsz > start)((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )); | ||||||
1954 | uvm_pmr_insert_addr(pmr, found, 1); | ||||||
1955 | } | ||||||
1956 | |||||||
1957 | /* | ||||||
1958 | * Fetch pages from the end. | ||||||
1959 | * If the range is larger than the requested | ||||||
1960 | * number of pages, this saves us an addr-tree | ||||||
1961 | * update. | ||||||
1962 | * | ||||||
1963 | * Since we take from the end and insert at | ||||||
1964 | * the head, any ranges keep preserved. | ||||||
1965 | */ | ||||||
1966 | while (found->fpgsz > 0 && fcount < count && | ||||||
1967 | (start == 0 || | ||||||
1968 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||||
1969 | found->fpgsz > start)) { | ||||||
1970 | found->fpgsz--; | ||||||
1971 | fcount++; | ||||||
1972 | TAILQ_INSERT_HEAD(result,do { if (((&found[found->fpgsz])->pageq.tqe_next = ( result)->tqh_first) != ((void *)0)) (result)->tqh_first ->pageq.tqe_prev = &(&found[found->fpgsz])-> pageq.tqe_next; else (result)->tqh_last = &(&found [found->fpgsz])->pageq.tqe_next; (result)->tqh_first = (&found[found->fpgsz]); (&found[found->fpgsz ])->pageq.tqe_prev = &(result)->tqh_first; } while ( 0) | ||||||
1973 | &found[found->fpgsz], pageq)do { if (((&found[found->fpgsz])->pageq.tqe_next = ( result)->tqh_first) != ((void *)0)) (result)->tqh_first ->pageq.tqe_prev = &(&found[found->fpgsz])-> pageq.tqe_next; else (result)->tqh_last = &(&found [found->fpgsz])->pageq.tqe_next; (result)->tqh_first = (&found[found->fpgsz]); (&found[found->fpgsz ])->pageq.tqe_prev = &(result)->tqh_first; } while ( 0); | ||||||
1974 | } | ||||||
1975 | if (found->fpgsz > 0) { | ||||||
1976 | uvm_pmr_insert_size(pmr, found); | ||||||
1977 | KDASSERT(fcount == count)((void)0); | ||||||
1978 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1979 | return fcount; | ||||||
1980 | } | ||||||
1981 | |||||||
1982 | /* | ||||||
1983 | * Delayed addr-tree removal. | ||||||
1984 | */ | ||||||
1985 | uvm_pmr_remove_addr(pmr, found); | ||||||
1986 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||||
1987 | } else { | ||||||
1988 | if (memtype_only) | ||||||
1989 | break; | ||||||
1990 | /* | ||||||
1991 | * Skip to the next memtype. | ||||||
1992 | */ | ||||||
1993 | memtype += 1; | ||||||
1994 | if (memtype == UVM_PMR_MEMTYPE_MAX2) | ||||||
1995 | memtype = 0; | ||||||
1996 | if (memtype == memtype_init) | ||||||
1997 | break; | ||||||
1998 | } | ||||||
1999 | } | ||||||
2000 | } | ||||||
2001 | |||||||
2002 | /* | ||||||
2003 | * Search finished. | ||||||
2004 | * | ||||||
2005 | * Ran out of ranges before enough pages were gathered, or we hit the | ||||||
2006 | * case where found->fpgsz == count - fcount, in which case the | ||||||
2007 | * above exit condition didn't trigger. | ||||||
2008 | * | ||||||
2009 | * On failure, caller will free the pages. | ||||||
2010 | */ | ||||||
2011 | return fcount; | ||||||
2012 | } | ||||||
2013 | |||||||
2014 | #ifdef DDB1 | ||||||
2015 | /* | ||||||
2016 | * Print information about pmemrange. | ||||||
2017 | * Does not do locking (so either call it from DDB or acquire fpageq lock | ||||||
2018 | * before invoking. | ||||||
2019 | */ | ||||||
2020 | void | ||||||
2021 | uvm_pmr_print(void) | ||||||
2022 | { | ||||||
2023 | struct uvm_pmemrange *pmr; | ||||||
2024 | struct vm_page *pg; | ||||||
2025 | psize_t size[UVM_PMR_MEMTYPE_MAX2]; | ||||||
2026 | psize_t free; | ||||||
2027 | int useq_len; | ||||||
2028 | int mt; | ||||||
2029 | |||||||
2030 | printf("Ranges, use queue:\n"); | ||||||
2031 | useq_len = 0; | ||||||
2032 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||||
2033 | useq_len++; | ||||||
2034 | free = 0; | ||||||
2035 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||||
2036 | pg = RBT_MAX(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MAX(&pmr->size[mt]); | ||||||
2037 | if (pg != NULL((void *)0)) | ||||||
2038 | pg--; | ||||||
2039 | else | ||||||
2040 | pg = TAILQ_FIRST(&pmr->single[mt])((&pmr->single[mt])->tqh_first); | ||||||
2041 | size[mt] = (pg == NULL((void *)0) ? 0 : pg->fpgsz); | ||||||
2042 | |||||||
2043 | RBT_FOREACH(pg, uvm_pmr_addr, &pmr->addr)for ((pg) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (pg) != ((void *)0); (pg) = uvm_pmr_addr_RBT_NEXT((pg))) | ||||||
2044 | free += pg->fpgsz; | ||||||
2045 | } | ||||||
2046 | |||||||
2047 | printf("* [0x%lx-0x%lx] use=%d nsegs=%ld", | ||||||
2048 | (unsigned long)pmr->low, (unsigned long)pmr->high, | ||||||
2049 | pmr->use, (unsigned long)pmr->nsegs); | ||||||
2050 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||||
2051 | printf(" maxsegsz[%d]=0x%lx", mt, | ||||||
2052 | (unsigned long)size[mt]); | ||||||
2053 | } | ||||||
2054 | printf(" free=0x%lx\n", (unsigned long)free); | ||||||
2055 | } | ||||||
2056 | printf("#ranges = %d\n", useq_len); | ||||||
2057 | } | ||||||
2058 | #endif | ||||||
2059 | |||||||
2060 | /* | ||||||
2061 | * uvm_wait_pla: wait (sleep) for the page daemon to free some pages | ||||||
2062 | * in a specific physmem area. | ||||||
2063 | * | ||||||
2064 | * Returns ENOMEM if the pagedaemon failed to free any pages. | ||||||
2065 | * If not failok, failure will lead to panic. | ||||||
2066 | * | ||||||
2067 | * Must be called with fpageq locked. | ||||||
2068 | */ | ||||||
2069 | int | ||||||
2070 | uvm_wait_pla(paddr_t low, paddr_t high, paddr_t size, int failok) | ||||||
2071 | { | ||||||
2072 | struct uvm_pmalloc pma; | ||||||
2073 | const char *wmsg = "pmrwait"; | ||||||
2074 | |||||||
2075 | if (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == uvm.pagedaemon_proc) { | ||||||
2076 | /* | ||||||
2077 | * This is not that uncommon when the pagedaemon is trying | ||||||
2078 | * to flush out a large mmapped file. VOP_WRITE will circle | ||||||
2079 | * back through the buffer cache and try to get more memory. | ||||||
2080 | * The pagedaemon starts by calling bufbackoff, but we can | ||||||
2081 | * easily use up that reserve in a single scan iteration. | ||||||
2082 | */ | ||||||
2083 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
2084 | if (bufbackoff(NULL((void *)0), atop(size)((size) >> 12)) == 0) { | ||||||
2085 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
2086 | return 0; | ||||||
2087 | } | ||||||
2088 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
2089 | |||||||
2090 | /* | ||||||
2091 | * XXX detect pagedaemon deadlock - see comment in | ||||||
2092 | * uvm_wait(), as this is exactly the same issue. | ||||||
2093 | */ | ||||||
2094 | printf("pagedaemon: wait_pla deadlock detected!\n"); | ||||||
2095 | msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM4, wmsg, | ||||||
2096 | MSEC_TO_NSEC(125)); | ||||||
2097 | #if defined(DEBUG) | ||||||
2098 | /* DEBUG: panic so we can debug it */ | ||||||
2099 | panic("wait_pla pagedaemon deadlock"); | ||||||
2100 | #endif | ||||||
2101 | return 0; | ||||||
2102 | } | ||||||
2103 | |||||||
2104 | for (;;) { | ||||||
2105 | pma.pm_constraint.ucr_low = low; | ||||||
2106 | pma.pm_constraint.ucr_high = high; | ||||||
2107 | pma.pm_size = size; | ||||||
2108 | pma.pm_flags = UVM_PMA_LINKED0x01; | ||||||
2109 | TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, &pma, pmq)do { (&pma)->pmq.tqe_next = ((void *)0); (&pma)-> pmq.tqe_prev = (&uvm.pmr_control.allocs)->tqh_last; *( &uvm.pmr_control.allocs)->tqh_last = (&pma); (& uvm.pmr_control.allocs)->tqh_last = &(&pma)->pmq .tqe_next; } while (0); | ||||||
2110 | |||||||
2111 | wakeup(&uvm.pagedaemon); /* wake the daemon! */ | ||||||
2112 | while (pma.pm_flags & (UVM_PMA_LINKED0x01 | UVM_PMA_BUSY0x02)) | ||||||
2113 | msleep_nsec(&pma, &uvm.fpageqlock, PVM4, wmsg, INFSLP0xffffffffffffffffULL); | ||||||
2114 | |||||||
2115 | if (!(pma.pm_flags & UVM_PMA_FREED0x20) && | ||||||
2116 | pma.pm_flags & UVM_PMA_FAIL0x10) { | ||||||
2117 | if (failok) | ||||||
2118 | return ENOMEM12; | ||||||
| |||||||
2119 | printf("uvm_wait: failed to free %ld pages between " | ||||||
2120 | "0x%lx-0x%lx\n", atop(size)((size) >> 12), low, high); | ||||||
2121 | } else | ||||||
2122 | return 0; | ||||||
2123 | } | ||||||
2124 | /* UNREACHABLE */ | ||||||
2125 | } | ||||||
2126 | |||||||
2127 | /* | ||||||
2128 | * Wake up uvm_pmalloc sleepers. | ||||||
2129 | */ | ||||||
2130 | void | ||||||
2131 | uvm_wakeup_pla(paddr_t low, psize_t len) | ||||||
2132 | { | ||||||
2133 | struct uvm_pmalloc *pma, *pma_next; | ||||||
2134 | paddr_t high; | ||||||
2135 | |||||||
2136 | high = low + len; | ||||||
2137 | |||||||
2138 | /* Wake specific allocations waiting for this memory. */ | ||||||
2139 | for (pma = TAILQ_FIRST(&uvm.pmr_control.allocs)((&uvm.pmr_control.allocs)->tqh_first); pma != NULL((void *)0); | ||||||
2140 | pma = pma_next) { | ||||||
2141 | pma_next = TAILQ_NEXT(pma, pmq)((pma)->pmq.tqe_next); | ||||||
2142 | |||||||
2143 | if (low < pma->pm_constraint.ucr_high && | ||||||
2144 | high > pma->pm_constraint.ucr_low) { | ||||||
2145 | pma->pm_flags |= UVM_PMA_FREED0x20; | ||||||
2146 | if (!(pma->pm_flags & UVM_PMA_BUSY0x02)) { | ||||||
2147 | pma->pm_flags &= ~UVM_PMA_LINKED0x01; | ||||||
2148 | TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0) | ||||||
2149 | pmq)do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0); | ||||||
2150 | wakeup(pma); | ||||||
2151 | } | ||||||
2152 | } | ||||||
2153 | } | ||||||
2154 | } | ||||||
2155 | |||||||
2156 | void | ||||||
2157 | uvm_pagezero_thread(void *arg) | ||||||
2158 | { | ||||||
2159 | struct pglist pgl; | ||||||
2160 | struct vm_page *pg; | ||||||
2161 | int count; | ||||||
2162 | |||||||
2163 | /* Run at the lowest possible priority. */ | ||||||
2164 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_nice = NZERO20 + PRIO_MAX20; | ||||||
2165 | |||||||
2166 | KERNEL_UNLOCK()_kernel_unlock(); | ||||||
2167 | |||||||
2168 | TAILQ_INIT(&pgl)do { (&pgl)->tqh_first = ((void *)0); (&pgl)->tqh_last = &(&pgl)->tqh_first; } while (0); | ||||||
2169 | for (;;) { | ||||||
2170 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
2171 | while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET(uvmexp.free / 8) || | ||||||
2172 | (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY0, | ||||||
2173 | &pgl, 0, 0, 1)) == 0) { | ||||||
2174 | msleep_nsec(&uvmexp.zeropages, &uvm.fpageqlock, | ||||||
2175 | MAXPRI127, "pgzero", INFSLP0xffffffffffffffffULL); | ||||||
2176 | } | ||||||
2177 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
2178 | |||||||
2179 | TAILQ_FOREACH(pg, &pgl, pageq)for((pg) = ((&pgl)->tqh_first); (pg) != ((void *)0); ( pg) = ((pg)->pageq.tqe_next)) { | ||||||
2180 | uvm_pagezero(pg); | ||||||
2181 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||||
2182 | } | ||||||
2183 | |||||||
2184 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||||
2185 | while (!TAILQ_EMPTY(&pgl)(((&pgl)->tqh_first) == ((void *)0))) | ||||||
2186 | uvm_pmr_remove_1strange(&pgl, 0, NULL((void *)0), 0); | ||||||
2187 | uvmexp.zeropages += count; | ||||||
2188 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||||
2189 | |||||||
2190 | yield(); | ||||||
2191 | } | ||||||
2192 | } |