File: | uvm/uvm_pmemrange.c |
Warning: | line 2118, column 5 Address of stack memory associated with local variable 'pma' is still referred to by the global variable 'uvm' upon returning to the caller. This will be a dangling reference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: uvm_pmemrange.c,v 1.63 2023/04/10 04:21:20 jsg Exp $ */ | ||||
2 | |||||
3 | /* | ||||
4 | * Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl> | ||||
5 | * | ||||
6 | * Permission to use, copy, modify, and distribute this software for any | ||||
7 | * purpose with or without fee is hereby granted, provided that the above | ||||
8 | * copyright notice and this permission notice appear in all copies. | ||||
9 | * | ||||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
17 | */ | ||||
18 | |||||
19 | #include <sys/param.h> | ||||
20 | #include <sys/systm.h> | ||||
21 | #include <uvm/uvm.h> | ||||
22 | #include <sys/malloc.h> | ||||
23 | #include <sys/kernel.h> | ||||
24 | #include <sys/proc.h> | ||||
25 | #include <sys/mount.h> | ||||
26 | |||||
27 | /* | ||||
28 | * 2 trees: addr tree and size tree. | ||||
29 | * | ||||
30 | * The allocator keeps chunks of free pages (called a range). | ||||
31 | * Two pages are part of the same range if: | ||||
32 | * - all pages in between are part of that range, | ||||
33 | * - they are of the same memory type (zeroed or non-zeroed), | ||||
34 | * - they are part of the same pmemrange. | ||||
35 | * A pmemrange is a range of memory which is part of the same vm_physseg | ||||
36 | * and has a use-count. | ||||
37 | * | ||||
38 | * addr tree is vm_page[0].objt | ||||
39 | * size tree is vm_page[1].objt | ||||
40 | * | ||||
41 | * The size tree is not used for memory ranges of 1 page, instead, | ||||
42 | * single queue is vm_page[0].pageq | ||||
43 | * | ||||
44 | * vm_page[0].fpgsz describes the length of a free range. Two adjacent ranges | ||||
45 | * are joined, unless: | ||||
46 | * - they have pages in between them which are not free | ||||
47 | * - they belong to different memtypes (zeroed vs dirty memory) | ||||
48 | * - they are in different pmemrange areas (ISA vs non-ISA memory for instance) | ||||
49 | * - they are not a continuation of the same array | ||||
50 | * The latter issue is caused by vm_physseg ordering and splitting from the | ||||
51 | * MD initialization machinery. The MD code is dependant on freelists and | ||||
52 | * happens to split ISA memory from non-ISA memory. | ||||
53 | * (Note: freelists die die die!) | ||||
54 | * | ||||
55 | * uvm_page_init guarantees that every vm_physseg contains an array of | ||||
56 | * struct vm_page. Also, uvm_page_physload allocates an array of struct | ||||
57 | * vm_page. This code depends on that array. The array may break across | ||||
58 | * vm_physsegs boundaries. | ||||
59 | */ | ||||
60 | |||||
61 | /* | ||||
62 | * Validate the flags of the page. (Used in asserts.) | ||||
63 | * Any free page must have the PQ_FREE flag set. | ||||
64 | * Free pages may be zeroed. | ||||
65 | * Pmap flags are left untouched. | ||||
66 | * | ||||
67 | * The PQ_FREE flag is not checked here: by not checking, we can easily use | ||||
68 | * this check in pages which are freed. | ||||
69 | */ | ||||
70 | #define VALID_FLAGS(pg_flags)(((pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0 ) \ | ||||
71 | (((pg_flags) & ~(PQ_FREE0x00010000|PG_ZERO0x00000100|PG_PMAPMASK0x3f000000)) == 0x0) | ||||
72 | |||||
73 | /* Tree comparators. */ | ||||
74 | int uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *, | ||||
75 | const struct uvm_pmemrange *); | ||||
76 | int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); | ||||
77 | int uvm_pmr_pg_to_memtype(struct vm_page *); | ||||
78 | |||||
79 | #ifdef DDB1 | ||||
80 | void uvm_pmr_print(void); | ||||
81 | #endif | ||||
82 | |||||
83 | /* | ||||
84 | * Memory types. The page flags are used to derive what the current memory | ||||
85 | * type of a page is. | ||||
86 | */ | ||||
87 | int | ||||
88 | uvm_pmr_pg_to_memtype(struct vm_page *pg) | ||||
89 | { | ||||
90 | if (pg->pg_flags & PG_ZERO0x00000100) | ||||
91 | return UVM_PMR_MEMTYPE_ZERO1; | ||||
92 | /* Default: dirty memory. */ | ||||
93 | return UVM_PMR_MEMTYPE_DIRTY0; | ||||
94 | } | ||||
95 | |||||
96 | /* Trees. */ | ||||
97 | RBT_GENERATE(uvm_pmr_addr, vm_page, objt, uvm_pmr_addr_cmp)static int uvm_pmr_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_addr_cmp (l, r); } static const struct rb_type uvm_pmr_addr_RBT_INFO = { uvm_pmr_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof( struct vm_page, objt), }; const struct rb_type *const uvm_pmr_addr_RBT_TYPE = &uvm_pmr_addr_RBT_INFO; | ||||
98 | RBT_GENERATE(uvm_pmr_size, vm_page, objt, uvm_pmr_size_cmp)static int uvm_pmr_size_RBT_COMPARE(const void *lptr, const void *rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_size_cmp (l, r); } static const struct rb_type uvm_pmr_size_RBT_INFO = { uvm_pmr_size_RBT_COMPARE, ((void *)0), __builtin_offsetof( struct vm_page, objt), }; const struct rb_type *const uvm_pmr_size_RBT_TYPE = &uvm_pmr_size_RBT_INFO; | ||||
99 | RBT_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr ; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE , ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr ), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE = &uvm_pmemrange_addr_RBT_INFO | ||||
100 | uvm_pmemrange_addr_cmp)static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr ; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE , ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr ), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE = &uvm_pmemrange_addr_RBT_INFO; | ||||
101 | |||||
102 | /* Validation. */ | ||||
103 | #ifdef DEBUG | ||||
104 | void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0); | ||||
105 | #else | ||||
106 | #define uvm_pmr_assertvalid(pmr)do {} while (0) do {} while (0) | ||||
107 | #endif | ||||
108 | |||||
109 | psize_t uvm_pmr_get1page(psize_t, int, struct pglist *, | ||||
110 | paddr_t, paddr_t, int); | ||||
111 | |||||
112 | struct uvm_pmemrange *uvm_pmr_allocpmr(void); | ||||
113 | struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); | ||||
114 | struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *, | ||||
115 | struct vm_page *, int); | ||||
116 | void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, | ||||
117 | struct vm_page *pg, struct vm_page **pg_prev, | ||||
118 | struct vm_page **pg_next); | ||||
119 | struct vm_page *uvm_pmr_findnextsegment(struct uvm_pmemrange *, | ||||
120 | struct vm_page *, paddr_t); | ||||
121 | struct vm_page *uvm_pmr_findprevsegment(struct uvm_pmemrange *, | ||||
122 | struct vm_page *, paddr_t); | ||||
123 | psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t, | ||||
124 | struct vm_page **, int); | ||||
125 | psize_t uvm_pmr_remove_1strange_reverse(struct pglist *, | ||||
126 | paddr_t *); | ||||
127 | void uvm_pmr_split(paddr_t); | ||||
128 | struct uvm_pmemrange *uvm_pmemrange_find(paddr_t); | ||||
129 | struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *, | ||||
130 | struct uvm_pmemrange *); | ||||
131 | psize_t pow2divide(psize_t, psize_t); | ||||
132 | struct vm_page *uvm_pmr_rootupdate(struct uvm_pmemrange *, | ||||
133 | struct vm_page *, paddr_t, paddr_t, int); | ||||
134 | |||||
135 | /* | ||||
136 | * Computes num/denom and rounds it up to the next power-of-2. | ||||
137 | * | ||||
138 | * This is a division function which calculates an approximation of | ||||
139 | * num/denom, with result =~ num/denom. It is meant to be fast and doesn't | ||||
140 | * have to be accurate. | ||||
141 | * | ||||
142 | * Providing too large a value makes the allocator slightly faster, at the | ||||
143 | * risk of hitting the failure case more often. Providing too small a value | ||||
144 | * makes the allocator a bit slower, but less likely to hit a failure case. | ||||
145 | */ | ||||
146 | psize_t | ||||
147 | pow2divide(psize_t num, psize_t denom) | ||||
148 | { | ||||
149 | int rshift; | ||||
150 | |||||
151 | for (rshift = 0; num > denom; rshift++, denom <<= 1) | ||||
152 | ; | ||||
153 | return (paddr_t)1 << rshift; | ||||
154 | } | ||||
155 | |||||
156 | /* | ||||
157 | * Predicate: lhs is a subrange or rhs. | ||||
158 | * | ||||
159 | * If rhs_low == 0: don't care about lower bound. | ||||
160 | * If rhs_high == 0: don't care about upper bound. | ||||
161 | */ | ||||
162 | #define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && ((rhs_high ) == 0 || (lhs_high) <= (rhs_high))) \ | ||||
163 | (((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && \ | ||||
164 | ((rhs_high) == 0 || (lhs_high) <= (rhs_high))) | ||||
165 | |||||
166 | /* | ||||
167 | * Predicate: lhs intersects with rhs. | ||||
168 | * | ||||
169 | * If rhs_low == 0: don't care about lower bound. | ||||
170 | * If rhs_high == 0: don't care about upper bound. | ||||
171 | * Ranges don't intersect if they don't have any page in common, array | ||||
172 | * semantics mean that < instead of <= should be used here. | ||||
173 | */ | ||||
174 | #define PMR_INTERSECTS_WITH(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (rhs_low) < (lhs_high)) && ((rhs_high ) == 0 || (lhs_low) < (rhs_high))) \ | ||||
175 | (((rhs_low) == 0 || (rhs_low) < (lhs_high)) && \ | ||||
176 | ((rhs_high) == 0 || (lhs_low) < (rhs_high))) | ||||
177 | |||||
178 | /* | ||||
179 | * Align to power-of-2 alignment. | ||||
180 | */ | ||||
181 | #define PMR_ALIGN(pgno, align)(((pgno) + ((align) - 1)) & ~((align) - 1)) \ | ||||
182 | (((pgno) + ((align) - 1)) & ~((align) - 1)) | ||||
183 | #define PMR_ALIGN_DOWN(pgno, align)((pgno) & ~((align) - 1)) \ | ||||
184 | ((pgno) & ~((align) - 1)) | ||||
185 | |||||
186 | |||||
187 | /* | ||||
188 | * Comparator: sort by address ascending. | ||||
189 | */ | ||||
190 | int | ||||
191 | uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *lhs, | ||||
192 | const struct uvm_pmemrange *rhs) | ||||
193 | { | ||||
194 | return lhs->low < rhs->low ? -1 : lhs->low > rhs->low; | ||||
195 | } | ||||
196 | |||||
197 | /* | ||||
198 | * Comparator: sort by use ascending. | ||||
199 | * | ||||
200 | * The higher the use value of a range, the more devices need memory in | ||||
201 | * this range. Therefore allocate from the range with the lowest use first. | ||||
202 | */ | ||||
203 | int | ||||
204 | uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) | ||||
205 | { | ||||
206 | int result; | ||||
207 | |||||
208 | result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use; | ||||
209 | if (result == 0) | ||||
210 | result = uvm_pmemrange_addr_cmp(lhs, rhs); | ||||
211 | return result; | ||||
212 | } | ||||
213 | |||||
214 | int | ||||
215 | uvm_pmr_addr_cmp(const struct vm_page *lhs, const struct vm_page *rhs) | ||||
216 | { | ||||
217 | paddr_t lhs_addr, rhs_addr; | ||||
218 | |||||
219 | lhs_addr = VM_PAGE_TO_PHYS(lhs)((lhs)->phys_addr); | ||||
220 | rhs_addr = VM_PAGE_TO_PHYS(rhs)((rhs)->phys_addr); | ||||
221 | |||||
222 | return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr); | ||||
223 | } | ||||
224 | |||||
225 | int | ||||
226 | uvm_pmr_size_cmp(const struct vm_page *lhs, const struct vm_page *rhs) | ||||
227 | { | ||||
228 | psize_t lhs_size, rhs_size; | ||||
229 | int cmp; | ||||
230 | |||||
231 | /* Using second tree, so we receive pg[1] instead of pg[0]. */ | ||||
232 | lhs_size = (lhs - 1)->fpgsz; | ||||
233 | rhs_size = (rhs - 1)->fpgsz; | ||||
234 | |||||
235 | cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size); | ||||
236 | if (cmp == 0) | ||||
237 | cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1); | ||||
238 | return cmp; | ||||
239 | } | ||||
240 | |||||
241 | /* | ||||
242 | * Find the first range of free pages that is at least sz pages long. | ||||
243 | */ | ||||
244 | struct vm_page * | ||||
245 | uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti) | ||||
246 | { | ||||
247 | struct vm_page *node, *best; | ||||
248 | |||||
249 | KASSERT(sz >= 1)((sz >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 249, "sz >= 1")); | ||||
250 | |||||
251 | if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti])(((&pmr->single[mti])->tqh_first) == ((void *)0))) | ||||
252 | return TAILQ_FIRST(&pmr->single[mti])((&pmr->single[mti])->tqh_first); | ||||
253 | |||||
254 | node = RBT_ROOT(uvm_pmr_size, &pmr->size[mti])uvm_pmr_size_RBT_ROOT(&pmr->size[mti]); | ||||
255 | best = NULL((void *)0); | ||||
256 | while (node != NULL((void *)0)) { | ||||
257 | if ((node - 1)->fpgsz >= sz) { | ||||
258 | best = (node - 1); | ||||
259 | node = RBT_LEFT(uvm_objtree, node)uvm_objtree_RBT_LEFT(node); | ||||
260 | } else | ||||
261 | node = RBT_RIGHT(uvm_objtree, node)uvm_objtree_RBT_RIGHT(node); | ||||
262 | } | ||||
263 | return best; | ||||
264 | } | ||||
265 | |||||
266 | /* | ||||
267 | * Finds the next range. The next range has a size >= pg->fpgsz. | ||||
268 | * Returns NULL if no more ranges are available. | ||||
269 | */ | ||||
270 | struct vm_page * | ||||
271 | uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt) | ||||
272 | { | ||||
273 | struct vm_page *npg; | ||||
274 | |||||
275 | KASSERT(pmr != NULL && pg != NULL)((pmr != ((void *)0) && pg != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 275 , "pmr != NULL && pg != NULL")); | ||||
276 | if (pg->fpgsz == 1) { | ||||
277 | if (TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) != NULL((void *)0)) | ||||
278 | return TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next); | ||||
279 | else | ||||
280 | npg = RBT_MIN(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MIN(&pmr->size[mt]); | ||||
281 | } else | ||||
282 | npg = RBT_NEXT(uvm_pmr_size, pg + 1)uvm_pmr_size_RBT_NEXT(pg + 1); | ||||
283 | |||||
284 | return npg == NULL((void *)0) ? NULL((void *)0) : npg - 1; | ||||
285 | } | ||||
286 | |||||
287 | /* | ||||
288 | * Finds the previous and next ranges relative to the (uninserted) pg range. | ||||
289 | * | ||||
290 | * *pg_prev == NULL if no previous range is available, that can join with | ||||
291 | * pg. | ||||
292 | * *pg_next == NULL if no next range is available, that can join with | ||||
293 | * pg. | ||||
294 | */ | ||||
295 | void | ||||
296 | uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg, | ||||
297 | struct vm_page **pg_prev, struct vm_page **pg_next) | ||||
298 | { | ||||
299 | KASSERT(pg_prev != NULL && pg_next != NULL)((pg_prev != ((void *)0) && pg_next != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 299, "pg_prev != NULL && pg_next != NULL")); | ||||
300 | |||||
301 | *pg_next = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg); | ||||
302 | if (*pg_next == NULL((void *)0)) | ||||
303 | *pg_prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||
304 | else | ||||
305 | *pg_prev = RBT_PREV(uvm_pmr_addr, *pg_next)uvm_pmr_addr_RBT_PREV(*pg_next); | ||||
306 | |||||
307 | KDASSERT(*pg_next == NULL ||((void)0) | ||||
308 | VM_PAGE_TO_PHYS(*pg_next) > VM_PAGE_TO_PHYS(pg))((void)0); | ||||
309 | KDASSERT(*pg_prev == NULL ||((void)0) | ||||
310 | VM_PAGE_TO_PHYS(*pg_prev) < VM_PAGE_TO_PHYS(pg))((void)0); | ||||
311 | |||||
312 | /* Reset if not contig. */ | ||||
313 | if (*pg_prev != NULL((void *)0) && | ||||
314 | (atop(VM_PAGE_TO_PHYS(*pg_prev))((((*pg_prev)->phys_addr)) >> 12) + (*pg_prev)->fpgsz | ||||
315 | != atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) || | ||||
316 | *pg_prev + (*pg_prev)->fpgsz != pg || /* Array broke. */ | ||||
317 | uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg))) | ||||
318 | *pg_prev = NULL((void *)0); | ||||
319 | if (*pg_next != NULL((void *)0) && | ||||
320 | (atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz | ||||
321 | != atop(VM_PAGE_TO_PHYS(*pg_next))((((*pg_next)->phys_addr)) >> 12) || | ||||
322 | pg + pg->fpgsz != *pg_next || /* Array broke. */ | ||||
323 | uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg))) | ||||
324 | *pg_next = NULL((void *)0); | ||||
325 | return; | ||||
326 | } | ||||
327 | |||||
328 | /* | ||||
329 | * Remove a range from the address tree. | ||||
330 | * Address tree maintains pmr counters. | ||||
331 | */ | ||||
332 | void | ||||
333 | uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||
334 | { | ||||
335 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0); | ||||
336 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||
337 | RBT_REMOVE(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_REMOVE(&pmr->addr, pg); | ||||
338 | |||||
339 | pmr->nsegs--; | ||||
340 | } | ||||
341 | /* | ||||
342 | * Remove a range from the size tree. | ||||
343 | */ | ||||
344 | void | ||||
345 | uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||
346 | { | ||||
347 | int memtype; | ||||
348 | #ifdef DEBUG | ||||
349 | struct vm_page *i; | ||||
350 | #endif | ||||
351 | |||||
352 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||
353 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||
354 | memtype = uvm_pmr_pg_to_memtype(pg); | ||||
355 | |||||
356 | if (pg->fpgsz == 1) { | ||||
357 | #ifdef DEBUG | ||||
358 | TAILQ_FOREACH(i, &pmr->single[memtype], pageq)for((i) = ((&pmr->single[memtype])->tqh_first); (i) != ((void *)0); (i) = ((i)->pageq.tqe_next)) { | ||||
359 | if (i == pg) | ||||
360 | break; | ||||
361 | } | ||||
362 | KDASSERT(i == pg)((void)0); | ||||
363 | #endif | ||||
364 | TAILQ_REMOVE(&pmr->single[memtype], pg, pageq)do { if (((pg)->pageq.tqe_next) != ((void *)0)) (pg)->pageq .tqe_next->pageq.tqe_prev = (pg)->pageq.tqe_prev; else ( &pmr->single[memtype])->tqh_last = (pg)->pageq.tqe_prev ; *(pg)->pageq.tqe_prev = (pg)->pageq.tqe_next; ((pg)-> pageq.tqe_prev) = ((void *)-1); ((pg)->pageq.tqe_next) = ( (void *)-1); } while (0); | ||||
365 | } else { | ||||
366 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[memtype],((void)0) | ||||
367 | pg + 1) == pg + 1)((void)0); | ||||
368 | RBT_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_REMOVE(&pmr->size[memtype], pg + 1); | ||||
369 | } | ||||
370 | } | ||||
371 | /* Remove from both trees. */ | ||||
372 | void | ||||
373 | uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||
374 | { | ||||
375 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
376 | uvm_pmr_remove_size(pmr, pg); | ||||
377 | uvm_pmr_remove_addr(pmr, pg); | ||||
378 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
379 | } | ||||
380 | |||||
381 | /* | ||||
382 | * Insert the range described in pg. | ||||
383 | * Returns the range thus created (which may be joined with the previous and | ||||
384 | * next ranges). | ||||
385 | * If no_join, the caller guarantees that the range cannot possibly join | ||||
386 | * with adjacent ranges. | ||||
387 | */ | ||||
388 | struct vm_page * | ||||
389 | uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) | ||||
390 | { | ||||
391 | struct vm_page *prev, *next; | ||||
392 | |||||
393 | #ifdef DEBUG | ||||
394 | struct vm_page *i; | ||||
395 | int mt; | ||||
396 | #endif | ||||
397 | |||||
398 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||
399 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||
400 | |||||
401 | #ifdef DEBUG | ||||
402 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||
403 | TAILQ_FOREACH(i, &pmr->single[mt], pageq)for((i) = ((&pmr->single[mt])->tqh_first); (i) != ( (void *)0); (i) = ((i)->pageq.tqe_next)) | ||||
404 | KDASSERT(i != pg)((void)0); | ||||
405 | if (pg->fpgsz > 1) { | ||||
406 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mt],((void)0) | ||||
407 | pg + 1) == NULL)((void)0); | ||||
408 | } | ||||
409 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL)((void)0); | ||||
410 | } | ||||
411 | #endif | ||||
412 | |||||
413 | if (!no_join) { | ||||
414 | uvm_pmr_pnaddr(pmr, pg, &prev, &next); | ||||
415 | if (next != NULL((void *)0)) { | ||||
416 | uvm_pmr_remove_size(pmr, next); | ||||
417 | uvm_pmr_remove_addr(pmr, next); | ||||
418 | pg->fpgsz += next->fpgsz; | ||||
419 | next->fpgsz = 0; | ||||
420 | } | ||||
421 | if (prev != NULL((void *)0)) { | ||||
422 | uvm_pmr_remove_size(pmr, prev); | ||||
423 | prev->fpgsz += pg->fpgsz; | ||||
424 | pg->fpgsz = 0; | ||||
425 | return prev; | ||||
426 | } | ||||
427 | } | ||||
428 | |||||
429 | RBT_INSERT(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_INSERT(&pmr->addr, pg); | ||||
430 | |||||
431 | pmr->nsegs++; | ||||
432 | |||||
433 | return pg; | ||||
434 | } | ||||
435 | /* | ||||
436 | * Insert the range described in pg. | ||||
437 | * Returns the range thus created (which may be joined with the previous and | ||||
438 | * next ranges). | ||||
439 | * Page must already be in the address tree. | ||||
440 | */ | ||||
441 | void | ||||
442 | uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg) | ||||
443 | { | ||||
444 | int memtype; | ||||
445 | #ifdef DEBUG | ||||
446 | struct vm_page *i; | ||||
447 | int mti; | ||||
448 | #endif | ||||
449 | |||||
450 | KDASSERT(pg->fpgsz >= 1)((void)0); | ||||
451 | KDASSERT(pg->pg_flags & PQ_FREE)((void)0); | ||||
452 | |||||
453 | memtype = uvm_pmr_pg_to_memtype(pg); | ||||
454 | #ifdef DEBUG | ||||
455 | for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) { | ||||
456 | TAILQ_FOREACH(i, &pmr->single[mti], pageq)for((i) = ((&pmr->single[mti])->tqh_first); (i) != ( (void *)0); (i) = ((i)->pageq.tqe_next)) | ||||
457 | KDASSERT(i != pg)((void)0); | ||||
458 | if (pg->fpgsz > 1) { | ||||
459 | KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mti],((void)0) | ||||
460 | pg + 1) == NULL)((void)0); | ||||
461 | } | ||||
462 | KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0); | ||||
463 | } | ||||
464 | for (i = pg; i < pg + pg->fpgsz; i++) | ||||
465 | KASSERT(uvm_pmr_pg_to_memtype(i) == memtype)((uvm_pmr_pg_to_memtype(i) == memtype) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 465, "uvm_pmr_pg_to_memtype(i) == memtype" )); | ||||
466 | #endif | ||||
467 | |||||
468 | if (pg->fpgsz == 1) | ||||
469 | TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq)do { (pg)->pageq.tqe_next = ((void *)0); (pg)->pageq.tqe_prev = (&pmr->single[memtype])->tqh_last; *(&pmr-> single[memtype])->tqh_last = (pg); (&pmr->single[memtype ])->tqh_last = &(pg)->pageq.tqe_next; } while (0); | ||||
470 | else | ||||
471 | RBT_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_INSERT(&pmr->size[memtype], pg + 1); | ||||
472 | } | ||||
473 | /* Insert in both trees. */ | ||||
474 | struct vm_page * | ||||
475 | uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) | ||||
476 | { | ||||
477 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
478 | pg = uvm_pmr_insert_addr(pmr, pg, no_join); | ||||
479 | uvm_pmr_insert_size(pmr, pg); | ||||
480 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
481 | return pg; | ||||
482 | } | ||||
483 | |||||
484 | /* | ||||
485 | * Find the last page that is part of this segment. | ||||
486 | * => pg: the range at which to start the search. | ||||
487 | * => boundary: the page number boundary specification (0 = no boundary). | ||||
488 | * => pmr: the pmemrange of the page. | ||||
489 | * | ||||
490 | * This function returns 1 before the next range, so if you want to have the | ||||
491 | * next range, you need to run TAILQ_NEXT(result, pageq) after calling. | ||||
492 | * The reason is that this way, the length of the segment is easily | ||||
493 | * calculated using: atop(result) - atop(pg) + 1. | ||||
494 | * Hence this function also never returns NULL. | ||||
495 | */ | ||||
496 | struct vm_page * | ||||
497 | uvm_pmr_findnextsegment(struct uvm_pmemrange *pmr, | ||||
498 | struct vm_page *pg, paddr_t boundary) | ||||
499 | { | ||||
500 | paddr_t first_boundary; | ||||
501 | struct vm_page *next; | ||||
502 | struct vm_page *prev; | ||||
503 | |||||
504 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0) | ||||
505 | pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||
506 | if (boundary != 0) { | ||||
507 | first_boundary = | ||||
508 | PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary)(((((((pg)->phys_addr)) >> 12) + 1) + ((boundary) - 1 )) & ~((boundary) - 1)); | ||||
509 | } else | ||||
510 | first_boundary = 0; | ||||
511 | |||||
512 | /* | ||||
513 | * Increase next until it hits the first page of the next segment. | ||||
514 | * | ||||
515 | * While loop checks the following: | ||||
516 | * - next != NULL we have not reached the end of pgl | ||||
517 | * - boundary == 0 || next < first_boundary | ||||
518 | * we do not cross a boundary | ||||
519 | * - atop(prev) + 1 == atop(next) | ||||
520 | * still in the same segment | ||||
521 | * - low <= last | ||||
522 | * - high > last still in the same memory range | ||||
523 | * - memtype is equal allocator is unable to view different memtypes | ||||
524 | * as part of the same segment | ||||
525 | * - prev + 1 == next no array breakage occurs | ||||
526 | */ | ||||
527 | prev = pg; | ||||
528 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||
529 | while (next != NULL((void *)0) && | ||||
530 | (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) < first_boundary) && | ||||
531 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
532 | pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
533 | pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
534 | uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) && | ||||
535 | prev + 1 == next) { | ||||
536 | prev = next; | ||||
537 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||
538 | } | ||||
539 | |||||
540 | /* | ||||
541 | * End of this segment. | ||||
542 | */ | ||||
543 | return prev; | ||||
544 | } | ||||
545 | |||||
546 | /* | ||||
547 | * Find the first page that is part of this segment. | ||||
548 | * => pg: the range at which to start the search. | ||||
549 | * => boundary: the page number boundary specification (0 = no boundary). | ||||
550 | * => pmr: the pmemrange of the page. | ||||
551 | * | ||||
552 | * This function returns 1 after the previous range, so if you want to have the | ||||
553 | * previous range, you need to run TAILQ_NEXT(result, pageq) after calling. | ||||
554 | * The reason is that this way, the length of the segment is easily | ||||
555 | * calculated using: atop(pg) - atop(result) + 1. | ||||
556 | * Hence this function also never returns NULL. | ||||
557 | */ | ||||
558 | struct vm_page * | ||||
559 | uvm_pmr_findprevsegment(struct uvm_pmemrange *pmr, | ||||
560 | struct vm_page *pg, paddr_t boundary) | ||||
561 | { | ||||
562 | paddr_t first_boundary; | ||||
563 | struct vm_page *next; | ||||
564 | struct vm_page *prev; | ||||
565 | |||||
566 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0) | ||||
567 | pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||
568 | if (boundary != 0) { | ||||
569 | first_boundary = | ||||
570 | PMR_ALIGN_DOWN(atop(VM_PAGE_TO_PHYS(pg)), boundary)((((((pg)->phys_addr)) >> 12)) & ~((boundary) - 1 )); | ||||
571 | } else | ||||
572 | first_boundary = 0; | ||||
573 | |||||
574 | /* | ||||
575 | * Increase next until it hits the first page of the previous segment. | ||||
576 | * | ||||
577 | * While loop checks the following: | ||||
578 | * - next != NULL we have not reached the end of pgl | ||||
579 | * - boundary == 0 || next >= first_boundary | ||||
580 | * we do not cross a boundary | ||||
581 | * - atop(prev) - 1 == atop(next) | ||||
582 | * still in the same segment | ||||
583 | * - low <= last | ||||
584 | * - high > last still in the same memory range | ||||
585 | * - memtype is equal allocator is unable to view different memtypes | ||||
586 | * as part of the same segment | ||||
587 | * - prev - 1 == next no array breakage occurs | ||||
588 | */ | ||||
589 | prev = pg; | ||||
590 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||
591 | while (next != NULL((void *)0) && | ||||
592 | (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) >= first_boundary) && | ||||
593 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) - 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
594 | pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
595 | pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) && | ||||
596 | uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) && | ||||
597 | prev - 1 == next) { | ||||
598 | prev = next; | ||||
599 | next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next); | ||||
600 | } | ||||
601 | |||||
602 | /* | ||||
603 | * Start of this segment. | ||||
604 | */ | ||||
605 | return prev; | ||||
606 | } | ||||
607 | |||||
608 | /* | ||||
609 | * Remove the first segment of contiguous pages from pgl. | ||||
610 | * A segment ends if it crosses boundary (unless boundary = 0) or | ||||
611 | * if it would enter a different uvm_pmemrange. | ||||
612 | * | ||||
613 | * Work: the page range that the caller is currently working with. | ||||
614 | * May be null. | ||||
615 | * | ||||
616 | * If is_desperate is non-zero, the smallest segment is erased. Otherwise, | ||||
617 | * the first segment is erased (which, if called by uvm_pmr_getpages(), | ||||
618 | * probably is the smallest or very close to it). | ||||
619 | */ | ||||
620 | psize_t | ||||
621 | uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary, | ||||
622 | struct vm_page **work, int is_desperate) | ||||
623 | { | ||||
624 | struct vm_page *start, *end, *iter, *iter_end, *inserted, *lowest; | ||||
625 | psize_t count; | ||||
626 | struct uvm_pmemrange *pmr, *pmr_iter; | ||||
627 | |||||
628 | KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 628, "!TAILQ_EMPTY(pgl)" )); | ||||
629 | |||||
630 | /* | ||||
631 | * Initialize to first page. | ||||
632 | * Unless desperate scan finds a better candidate, this is what'll be | ||||
633 | * erased. | ||||
634 | */ | ||||
635 | start = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||
636 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12)); | ||||
637 | end = uvm_pmr_findnextsegment(pmr, start, boundary); | ||||
638 | |||||
639 | /* | ||||
640 | * If we are desperate, we _really_ want to get rid of the smallest | ||||
641 | * element (rather than a close match to the smallest element). | ||||
642 | */ | ||||
643 | if (is_desperate) { | ||||
644 | /* Linear search for smallest segment. */ | ||||
645 | pmr_iter = pmr; | ||||
646 | for (iter = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||
647 | iter != NULL((void *)0) && start != end; | ||||
648 | iter = TAILQ_NEXT(iter_end, pageq)((iter_end)->pageq.tqe_next)) { | ||||
649 | /* | ||||
650 | * Only update pmr if it doesn't match current | ||||
651 | * iteration. | ||||
652 | */ | ||||
653 | if (pmr->low > atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12) || | ||||
654 | pmr->high <= atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12)) { | ||||
655 | pmr_iter = uvm_pmemrange_find(atop(((((iter)->phys_addr)) >> 12) | ||||
656 | VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12)); | ||||
657 | } | ||||
658 | |||||
659 | iter_end = uvm_pmr_findnextsegment(pmr_iter, iter, | ||||
660 | boundary); | ||||
661 | |||||
662 | /* | ||||
663 | * Current iteration is smaller than best match so | ||||
664 | * far; update. | ||||
665 | */ | ||||
666 | if (VM_PAGE_TO_PHYS(iter_end)((iter_end)->phys_addr) - VM_PAGE_TO_PHYS(iter)((iter)->phys_addr) < | ||||
667 | VM_PAGE_TO_PHYS(end)((end)->phys_addr) - VM_PAGE_TO_PHYS(start)((start)->phys_addr)) { | ||||
668 | start = iter; | ||||
669 | end = iter_end; | ||||
670 | pmr = pmr_iter; | ||||
671 | } | ||||
672 | } | ||||
673 | } | ||||
674 | |||||
675 | /* | ||||
676 | * Calculate count and end of the list. | ||||
677 | */ | ||||
678 | count = atop(VM_PAGE_TO_PHYS(end) - VM_PAGE_TO_PHYS(start))((((end)->phys_addr) - ((start)->phys_addr)) >> 12 ) + 1; | ||||
679 | lowest = start; | ||||
680 | end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||
681 | |||||
682 | /* | ||||
683 | * Actually remove the range of pages. | ||||
684 | * | ||||
685 | * Sadly, this cannot be done using pointer iteration: | ||||
686 | * vm_physseg is not guaranteed to be sorted on address, hence | ||||
687 | * uvm_page_init() may not have initialized its array sorted by | ||||
688 | * page number. | ||||
689 | */ | ||||
690 | for (iter = start; iter != end; iter = iter_end) { | ||||
691 | iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next); | ||||
692 | TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)-> pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev ; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter )->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)-> pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) = ((void *)-1); } while (0); | ||||
693 | } | ||||
694 | |||||
695 | lowest->fpgsz = count; | ||||
696 | inserted = uvm_pmr_insert(pmr, lowest, 0); | ||||
697 | |||||
698 | /* | ||||
699 | * If the caller was working on a range and this function modified | ||||
700 | * that range, update the pointer. | ||||
701 | */ | ||||
702 | if (work != NULL((void *)0) && *work != NULL((void *)0) && | ||||
703 | atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) <= atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12) && | ||||
704 | atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) + inserted->fpgsz > | ||||
705 | atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12)) | ||||
706 | *work = inserted; | ||||
707 | return count; | ||||
708 | } | ||||
709 | |||||
710 | /* | ||||
711 | * Remove the first segment of contiguous pages from a pgl | ||||
712 | * with the list elements in reverse order of physaddr. | ||||
713 | * | ||||
714 | * A segment ends if it would enter a different uvm_pmemrange. | ||||
715 | * | ||||
716 | * Stores starting physical address of the segment in pstart. | ||||
717 | */ | ||||
718 | psize_t | ||||
719 | uvm_pmr_remove_1strange_reverse(struct pglist *pgl, paddr_t *pstart) | ||||
720 | { | ||||
721 | struct vm_page *start, *end, *iter, *iter_end, *lowest; | ||||
722 | psize_t count; | ||||
723 | struct uvm_pmemrange *pmr; | ||||
724 | |||||
725 | KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 725, "!TAILQ_EMPTY(pgl)" )); | ||||
726 | |||||
727 | start = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||
728 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12)); | ||||
729 | end = uvm_pmr_findprevsegment(pmr, start, 0); | ||||
730 | |||||
731 | KASSERT(end <= start)((end <= start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 731, "end <= start")); | ||||
732 | |||||
733 | /* | ||||
734 | * Calculate count and end of the list. | ||||
735 | */ | ||||
736 | count = atop(VM_PAGE_TO_PHYS(start) - VM_PAGE_TO_PHYS(end))((((start)->phys_addr) - ((end)->phys_addr)) >> 12 ) + 1; | ||||
737 | lowest = end; | ||||
738 | end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next); | ||||
739 | |||||
740 | /* | ||||
741 | * Actually remove the range of pages. | ||||
742 | * | ||||
743 | * Sadly, this cannot be done using pointer iteration: | ||||
744 | * vm_physseg is not guaranteed to be sorted on address, hence | ||||
745 | * uvm_page_init() may not have initialized its array sorted by | ||||
746 | * page number. | ||||
747 | */ | ||||
748 | for (iter = start; iter != end; iter = iter_end) { | ||||
749 | iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next); | ||||
750 | TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)-> pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev ; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter )->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)-> pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) = ((void *)-1); } while (0); | ||||
751 | } | ||||
752 | |||||
753 | lowest->fpgsz = count; | ||||
754 | (void) uvm_pmr_insert(pmr, lowest, 0); | ||||
755 | |||||
756 | *pstart = VM_PAGE_TO_PHYS(lowest)((lowest)->phys_addr); | ||||
757 | return count; | ||||
758 | } | ||||
759 | |||||
760 | /* | ||||
761 | * Extract a number of pages from a segment of free pages. | ||||
762 | * Called by uvm_pmr_getpages. | ||||
763 | * | ||||
764 | * Returns the segment that was created from pages left over at the tail | ||||
765 | * of the remove set of pages, or NULL if no pages were left at the tail. | ||||
766 | */ | ||||
767 | struct vm_page * | ||||
768 | uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg, | ||||
769 | paddr_t start, paddr_t end, struct pglist *result) | ||||
770 | { | ||||
771 | struct vm_page *after, *pg_i; | ||||
772 | psize_t before_sz, after_sz; | ||||
773 | #ifdef DEBUG | ||||
774 | psize_t i; | ||||
775 | #endif | ||||
776 | |||||
777 | KDASSERT(end > start)((void)0); | ||||
778 | KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||
779 | KDASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz)((void)0); | ||||
780 | KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start)((void)0); | ||||
781 | KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz >= end)((void)0); | ||||
782 | |||||
783 | before_sz = start - atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12); | ||||
784 | after_sz = atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz - end; | ||||
785 | KDASSERT(before_sz + after_sz + (end - start) == pg->fpgsz)((void)0); | ||||
786 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
787 | |||||
788 | uvm_pmr_remove_size(pmr, pg); | ||||
789 | if (before_sz == 0) | ||||
790 | uvm_pmr_remove_addr(pmr, pg); | ||||
791 | after = pg + before_sz + (end - start); | ||||
792 | |||||
793 | /* Add selected pages to result. */ | ||||
794 | for (pg_i = pg + before_sz; pg_i != after; pg_i++) { | ||||
795 | KASSERT(pg_i->pg_flags & PQ_FREE)((pg_i->pg_flags & 0x00010000) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 795, "pg_i->pg_flags & PQ_FREE" )); | ||||
796 | pg_i->fpgsz = 0; | ||||
797 | TAILQ_INSERT_TAIL(result, pg_i, pageq)do { (pg_i)->pageq.tqe_next = ((void *)0); (pg_i)->pageq .tqe_prev = (result)->tqh_last; *(result)->tqh_last = ( pg_i); (result)->tqh_last = &(pg_i)->pageq.tqe_next ; } while (0); | ||||
798 | } | ||||
799 | |||||
800 | /* Before handling. */ | ||||
801 | if (before_sz > 0) { | ||||
802 | pg->fpgsz = before_sz; | ||||
803 | uvm_pmr_insert_size(pmr, pg); | ||||
804 | } | ||||
805 | |||||
806 | /* After handling. */ | ||||
807 | if (after_sz > 0) { | ||||
808 | #ifdef DEBUG | ||||
809 | for (i = 0; i < after_sz; i++) { | ||||
810 | KASSERT(!uvm_pmr_isfree(after + i))((!uvm_pmr_isfree(after + i)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 810, "!uvm_pmr_isfree(after + i)" )); | ||||
811 | } | ||||
812 | #endif | ||||
813 | KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end)((void)0); | ||||
814 | after->fpgsz = after_sz; | ||||
815 | after = uvm_pmr_insert_addr(pmr, after, 1); | ||||
816 | uvm_pmr_insert_size(pmr, after); | ||||
817 | } | ||||
818 | |||||
819 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
820 | return (after_sz > 0 ? after : NULL((void *)0)); | ||||
821 | } | ||||
822 | |||||
823 | /* | ||||
824 | * Indicate to the page daemon that a nowait call failed and it should | ||||
825 | * recover at least some memory in the most restricted region (assumed | ||||
826 | * to be dma_constraint). | ||||
827 | */ | ||||
828 | extern volatile int uvm_nowait_failed; | ||||
829 | |||||
830 | /* | ||||
831 | * Acquire a number of pages. | ||||
832 | * | ||||
833 | * count: the number of pages returned | ||||
834 | * start: lowest page number | ||||
835 | * end: highest page number +1 | ||||
836 | * (start = end = 0: no limitation) | ||||
837 | * align: power-of-2 alignment constraint (align = 1: no alignment) | ||||
838 | * boundary: power-of-2 boundary (boundary = 0: no boundary) | ||||
839 | * maxseg: maximum number of segments to return | ||||
840 | * flags: UVM_PLA_* flags | ||||
841 | * result: returned pages storage (uses pageq) | ||||
842 | */ | ||||
843 | int | ||||
844 | uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align, | ||||
845 | paddr_t boundary, int maxseg, int flags, struct pglist *result) | ||||
846 | { | ||||
847 | struct uvm_pmemrange *pmr; /* Iterate memory ranges. */ | ||||
848 | struct vm_page *found, *f_next; /* Iterate chunks. */ | ||||
849 | psize_t fcount; /* Current found pages. */ | ||||
850 | int fnsegs; /* Current segment counter. */ | ||||
851 | int try, start_try; | ||||
852 | psize_t search[3]; | ||||
853 | paddr_t fstart, fend; /* Pages to be taken from found. */ | ||||
854 | int memtype; /* Requested memtype. */ | ||||
855 | int memtype_init; /* Best memtype. */ | ||||
856 | int desperate; /* True if allocation failed. */ | ||||
857 | #ifdef DIAGNOSTIC1 | ||||
858 | struct vm_page *diag_prev; /* Used during validation. */ | ||||
859 | #endif /* DIAGNOSTIC */ | ||||
860 | |||||
861 | /* | ||||
862 | * Validate arguments. | ||||
863 | */ | ||||
864 | KASSERT(count > 0)((count > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 864, "count > 0")); | ||||
| |||||
865 | KASSERT(start == 0 || end == 0 || start < end)((start == 0 || end == 0 || start < end) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 865, "start == 0 || end == 0 || start < end" )); | ||||
866 | KASSERT(align >= 1)((align >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 866, "align >= 1")); | ||||
867 | KASSERT(powerof2(align))((((((align)-1)&(align))==0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 867, "powerof2(align)") ); | ||||
868 | KASSERT(maxseg > 0)((maxseg > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 868, "maxseg > 0")); | ||||
869 | KASSERT(boundary == 0 || powerof2(boundary))((boundary == 0 || ((((boundary)-1)&(boundary))==0)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 869, "boundary == 0 || powerof2(boundary)")); | ||||
870 | KASSERT(boundary == 0 || maxseg * boundary >= count)((boundary == 0 || maxseg * boundary >= count) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 870 , "boundary == 0 || maxseg * boundary >= count")); | ||||
871 | KASSERT(TAILQ_EMPTY(result))(((((result)->tqh_first) == ((void *)0))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 871, "TAILQ_EMPTY(result)" )); | ||||
872 | KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT))((!(flags & 0x0001) ^ !(flags & 0x0002)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 872, "!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)" )); | ||||
873 | |||||
874 | /* | ||||
875 | * TRYCONTIG is a noop if you only want a single segment. | ||||
876 | * Remove it if that's the case: otherwise it'll deny the fast | ||||
877 | * allocation. | ||||
878 | */ | ||||
879 | if (maxseg == 1 || count == 1) | ||||
880 | flags &= ~UVM_PLA_TRYCONTIG0x0008; | ||||
881 | |||||
882 | /* | ||||
883 | * Configure search. | ||||
884 | * | ||||
885 | * search[0] is one segment, only used in UVM_PLA_TRYCONTIG case. | ||||
886 | * search[1] is multiple segments, chosen to fulfill the search in | ||||
887 | * approximately even-sized segments. | ||||
888 | * This is a good trade-off between slightly reduced allocation speed | ||||
889 | * and less fragmentation. | ||||
890 | * search[2] is the worst case, in which all segments are evaluated. | ||||
891 | * This provides the least fragmentation, but makes the search | ||||
892 | * possibly longer (although in the case it is selected, that no | ||||
893 | * longer matters most). | ||||
894 | * | ||||
895 | * The exception is when maxseg == 1: since we can only fulfill that | ||||
896 | * with one segment of size pages, only a single search type has to | ||||
897 | * be attempted. | ||||
898 | */ | ||||
899 | if (maxseg
| ||||
900 | start_try = 2; | ||||
901 | search[2] = count; | ||||
902 | } else if (maxseg >= count && (flags & UVM_PLA_TRYCONTIG0x0008) == 0) { | ||||
903 | start_try = 2; | ||||
904 | search[2] = 1; | ||||
905 | } else { | ||||
906 | start_try = 0; | ||||
907 | search[0] = count; | ||||
908 | search[1] = pow2divide(count, maxseg); | ||||
909 | search[2] = 1; | ||||
910 | if ((flags & UVM_PLA_TRYCONTIG0x0008) == 0) | ||||
911 | start_try = 1; | ||||
912 | if (search[1] >= search[0]) { | ||||
913 | search[1] = search[0]; | ||||
914 | start_try = 1; | ||||
915 | } | ||||
916 | if (search[2] >= search[start_try]) { | ||||
917 | start_try = 2; | ||||
918 | } | ||||
919 | } | ||||
920 | |||||
921 | /* | ||||
922 | * Memory type: if zeroed memory is requested, traverse the zero set. | ||||
923 | * Otherwise, traverse the dirty set. | ||||
924 | * | ||||
925 | * The memtype iterator is reinitialized to memtype_init on entrance | ||||
926 | * of a pmemrange. | ||||
927 | */ | ||||
928 | if (flags & UVM_PLA_ZERO0x0004) | ||||
929 | memtype_init = UVM_PMR_MEMTYPE_ZERO1; | ||||
930 | else | ||||
931 | memtype_init = UVM_PMR_MEMTYPE_DIRTY0; | ||||
932 | |||||
933 | /* | ||||
934 | * Initially, we're not desperate. | ||||
935 | * | ||||
936 | * Note that if we return from a sleep, we are still desperate. | ||||
937 | * Chances are that memory pressure is still high, so resetting | ||||
938 | * seems over-optimistic to me. | ||||
939 | */ | ||||
940 | desperate = 0; | ||||
941 | |||||
942 | again: | ||||
943 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
944 | |||||
945 | /* | ||||
946 | * check to see if we need to generate some free pages waking | ||||
947 | * the pagedaemon. | ||||
948 | */ | ||||
949 | if ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)) < uvmexp.freemin || | ||||
950 | ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)) < uvmexp.freetarg && | ||||
951 | (uvmexp.inactive + BUFPAGES_INACT(((bcstats.numcleanpages - buflowpages) < 0) ? 0 : bcstats .numcleanpages - buflowpages)) < uvmexp.inactarg)) | ||||
952 | wakeup(&uvm.pagedaemon); | ||||
953 | |||||
954 | /* | ||||
955 | * fail if any of these conditions is true: | ||||
956 | * [1] there really are no free pages, or | ||||
957 | * [2] only kernel "reserved" pages remain and | ||||
958 | * the UVM_PLA_USERESERVE flag wasn't used. | ||||
959 | * [3] only pagedaemon "reserved" pages remain and | ||||
960 | * the requestor isn't the pagedaemon nor the syncer. | ||||
961 | */ | ||||
962 | if ((uvmexp.free <= (uvmexp.reserve_kernel + count)) && | ||||
963 | !(flags & UVM_PLA_USERESERVE0x0040)) { | ||||
964 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
965 | return ENOMEM12; | ||||
966 | } | ||||
967 | |||||
968 | if ((uvmexp.free <= (uvmexp.reserve_pagedaemon + count)) && | ||||
969 | (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc != uvm.pagedaemon_proc) && (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc != syncerproc)) { | ||||
970 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
971 | if (flags & UVM_PLA_WAITOK0x0001) { | ||||
972 | uvm_wait("uvm_pmr_getpages"); | ||||
973 | goto again; | ||||
974 | } | ||||
975 | return ENOMEM12; | ||||
976 | } | ||||
977 | |||||
978 | retry: /* Return point after sleeping. */ | ||||
979 | fcount = 0; | ||||
980 | fnsegs = 0; | ||||
981 | |||||
982 | retry_desperate: | ||||
983 | /* | ||||
984 | * If we just want any page(s), go for the really fast option. | ||||
985 | */ | ||||
986 | if (count
| ||||
987 | (flags & UVM_PLA_TRYCONTIG0x0008) == 0) { | ||||
988 | fcount += uvm_pmr_get1page(count - fcount, memtype_init, | ||||
989 | result, start, end, 0); | ||||
990 | |||||
991 | /* | ||||
992 | * If we found sufficient pages, go to the success exit code. | ||||
993 | * | ||||
994 | * Otherwise, go immediately to fail, since we collected | ||||
995 | * all we could anyway. | ||||
996 | */ | ||||
997 | if (fcount == count) | ||||
998 | goto out; | ||||
999 | else | ||||
1000 | goto fail; | ||||
1001 | } | ||||
1002 | |||||
1003 | /* | ||||
1004 | * The heart of the contig case. | ||||
1005 | * | ||||
1006 | * The code actually looks like this: | ||||
1007 | * | ||||
1008 | * foreach (struct pmemrange) { | ||||
1009 | * foreach (memtype) { | ||||
1010 | * foreach(try) { | ||||
1011 | * foreach (free range of memtype in pmemrange, | ||||
1012 | * starting at search[try]) { | ||||
1013 | * while (range has space left) | ||||
1014 | * take from range | ||||
1015 | * } | ||||
1016 | * } | ||||
1017 | * } | ||||
1018 | * | ||||
1019 | * if next pmemrange has higher usecount than current: | ||||
1020 | * enter desperate case (which will drain the pmemranges | ||||
1021 | * until empty prior to moving to the next one) | ||||
1022 | * } | ||||
1023 | * | ||||
1024 | * When desperate is activated, try always starts at the highest | ||||
1025 | * value. The memtype loop is using a goto ReScanMemtype. | ||||
1026 | * The try loop is using a goto ReScan. | ||||
1027 | * The 'range has space left' loop uses label DrainFound. | ||||
1028 | * | ||||
1029 | * Writing them all as loops would take up a lot of screen space in | ||||
1030 | * the form of indentation and some parts are easier to express | ||||
1031 | * using the labels. | ||||
1032 | */ | ||||
1033 | |||||
1034 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||
1035 | /* Empty range. */ | ||||
1036 | if (pmr->nsegs == 0) | ||||
1037 | continue; | ||||
1038 | |||||
1039 | /* Outside requested range. */ | ||||
1040 | if (!PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end ) == 0 || (pmr->low) < (end)))) | ||||
1041 | continue; | ||||
1042 | |||||
1043 | memtype = memtype_init; | ||||
1044 | |||||
1045 | rescan_memtype: /* Return point at memtype++. */ | ||||
1046 | try = start_try; | ||||
1047 | |||||
1048 | rescan: /* Return point at try++. */ | ||||
1049 | for (found = uvm_pmr_nfindsz(pmr, search[try], memtype); | ||||
1050 | found != NULL((void *)0); | ||||
1051 | found = f_next) { | ||||
1052 | f_next = uvm_pmr_nextsz(pmr, found, memtype); | ||||
1053 | |||||
1054 | fstart = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12); | ||||
1055 | if (start != 0) | ||||
1056 | fstart = MAX(start, fstart)(((start)>(fstart))?(start):(fstart)); | ||||
1057 | drain_found: | ||||
1058 | /* | ||||
1059 | * Throw away the first segment if fnsegs == maxseg | ||||
1060 | * | ||||
1061 | * Note that f_next is still valid after this call, | ||||
1062 | * since we only allocated from entries before f_next. | ||||
1063 | * We don't revisit the entries we already extracted | ||||
1064 | * from unless we entered the desperate case. | ||||
1065 | */ | ||||
1066 | if (fnsegs == maxseg) { | ||||
1067 | fnsegs--; | ||||
1068 | fcount -= | ||||
1069 | uvm_pmr_remove_1strange(result, boundary, | ||||
1070 | &found, desperate); | ||||
1071 | } | ||||
1072 | |||||
1073 | fstart = PMR_ALIGN(fstart, align)(((fstart) + ((align) - 1)) & ~((align) - 1)); | ||||
1074 | fend = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + found->fpgsz; | ||||
1075 | if (end != 0) | ||||
1076 | fend = MIN(end, fend)(((end)<(fend))?(end):(fend)); | ||||
1077 | if (boundary != 0) { | ||||
1078 | fend = | ||||
1079 | MIN(fend, PMR_ALIGN(fstart + 1, boundary))(((fend)<((((fstart + 1) + ((boundary) - 1)) & ~((boundary ) - 1))))?(fend):((((fstart + 1) + ((boundary) - 1)) & ~( (boundary) - 1)))); | ||||
1080 | } | ||||
1081 | if (fstart >= fend) | ||||
1082 | continue; | ||||
1083 | if (fend - fstart > count - fcount) | ||||
1084 | fend = fstart + (count - fcount); | ||||
1085 | |||||
1086 | fcount += fend - fstart; | ||||
1087 | fnsegs++; | ||||
1088 | found = uvm_pmr_extract_range(pmr, found, | ||||
1089 | fstart, fend, result); | ||||
1090 | |||||
1091 | if (fcount == count) | ||||
1092 | goto out; | ||||
1093 | |||||
1094 | /* | ||||
1095 | * If there's still space left in found, try to | ||||
1096 | * fully drain it prior to continuing. | ||||
1097 | */ | ||||
1098 | if (found != NULL((void *)0)) { | ||||
1099 | fstart = fend; | ||||
1100 | goto drain_found; | ||||
1101 | } | ||||
1102 | } | ||||
1103 | |||||
1104 | /* Try a smaller search now. */ | ||||
1105 | if (++try < nitems(search)(sizeof((search)) / sizeof((search)[0]))) | ||||
1106 | goto rescan; | ||||
1107 | |||||
1108 | /* | ||||
1109 | * Exhaust all memory types prior to going to the next memory | ||||
1110 | * segment. | ||||
1111 | * This means that zero-vs-dirty are eaten prior to moving | ||||
1112 | * to a pmemrange with a higher use-count. | ||||
1113 | * | ||||
1114 | * Code is basically a difficult way of writing: | ||||
1115 | * memtype = memtype_init; | ||||
1116 | * do { | ||||
1117 | * ...; | ||||
1118 | * memtype += 1; | ||||
1119 | * memtype %= MEMTYPE_MAX; | ||||
1120 | * } while (memtype != memtype_init); | ||||
1121 | */ | ||||
1122 | memtype += 1; | ||||
1123 | if (memtype == UVM_PMR_MEMTYPE_MAX2) | ||||
1124 | memtype = 0; | ||||
1125 | if (memtype != memtype_init) | ||||
1126 | goto rescan_memtype; | ||||
1127 | |||||
1128 | /* | ||||
1129 | * If not desperate, enter desperate case prior to eating all | ||||
1130 | * the good stuff in the next range. | ||||
1131 | */ | ||||
1132 | if (!desperate && TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next) != NULL((void *)0) && | ||||
1133 | TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next)->use != pmr->use) | ||||
1134 | break; | ||||
1135 | } | ||||
1136 | |||||
1137 | /* | ||||
1138 | * Not enough memory of the requested type available. Fall back to | ||||
1139 | * less good memory that we'll clean up better later. | ||||
1140 | * | ||||
1141 | * This algorithm is not very smart though, it just starts scanning | ||||
1142 | * a different typed range, but the nicer ranges of the previous | ||||
1143 | * iteration may fall out. Hence there is a small chance of a false | ||||
1144 | * negative. | ||||
1145 | * | ||||
1146 | * When desperate: scan all sizes starting at the smallest | ||||
1147 | * (start_try = 1) and do not consider UVM_PLA_TRYCONTIG (which may | ||||
1148 | * allow us to hit the fast path now). | ||||
1149 | * | ||||
1150 | * Also, because we will revisit entries we scanned before, we need | ||||
1151 | * to reset the page queue, or we may end up releasing entries in | ||||
1152 | * such a way as to invalidate f_next. | ||||
1153 | */ | ||||
1154 | if (!desperate
| ||||
1155 | desperate = 1; | ||||
1156 | start_try = nitems(search)(sizeof((search)) / sizeof((search)[0])) - 1; | ||||
1157 | flags &= ~UVM_PLA_TRYCONTIG0x0008; | ||||
1158 | |||||
1159 | while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0))) | ||||
1160 | uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0); | ||||
1161 | fnsegs = 0; | ||||
1162 | fcount = 0; | ||||
1163 | goto retry_desperate; | ||||
1164 | } | ||||
1165 | |||||
1166 | fail: | ||||
1167 | /* Allocation failed. */ | ||||
1168 | /* XXX: claim from memory reserve here */ | ||||
1169 | |||||
1170 | while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0))) | ||||
1171 | uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0); | ||||
1172 | |||||
1173 | if (flags & UVM_PLA_WAITOK0x0001) { | ||||
1174 | if (uvm_wait_pla(ptoa(start)((paddr_t)(start) << 12), ptoa(end)((paddr_t)(end) << 12) - 1, ptoa(count)((paddr_t)(count) << 12), | ||||
1175 | flags & UVM_PLA_FAILOK0x0010) == 0) | ||||
1176 | goto retry; | ||||
1177 | KASSERT(flags & UVM_PLA_FAILOK)((flags & 0x0010) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1177, "flags & UVM_PLA_FAILOK")); | ||||
1178 | } else { | ||||
1179 | if (!(flags & UVM_PLA_NOWAKE0x0020)) { | ||||
1180 | uvm_nowait_failed = 1; | ||||
1181 | wakeup(&uvm.pagedaemon); | ||||
1182 | } | ||||
1183 | } | ||||
1184 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1185 | |||||
1186 | return ENOMEM12; | ||||
1187 | |||||
1188 | out: | ||||
1189 | /* Allocation successful. */ | ||||
1190 | uvmexp.free -= fcount; | ||||
1191 | |||||
1192 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1193 | |||||
1194 | /* Update statistics and zero pages if UVM_PLA_ZERO. */ | ||||
1195 | #ifdef DIAGNOSTIC1 | ||||
1196 | fnsegs = 0; | ||||
1197 | fcount = 0; | ||||
1198 | diag_prev = NULL((void *)0); | ||||
1199 | #endif /* DIAGNOSTIC */ | ||||
1200 | TAILQ_FOREACH(found, result, pageq)for((found) = ((result)->tqh_first); (found) != ((void *)0 ); (found) = ((found)->pageq.tqe_next)) { | ||||
1201 | atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_PMAPMASK0x3f000000); | ||||
1202 | |||||
1203 | if (found->pg_flags & PG_ZERO0x00000100) { | ||||
1204 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1205 | uvmexp.zeropages--; | ||||
1206 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||
1207 | wakeup(&uvmexp.zeropages); | ||||
1208 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1209 | } | ||||
1210 | if (flags & UVM_PLA_ZERO0x0004) { | ||||
1211 | if (found->pg_flags & PG_ZERO0x00000100) | ||||
1212 | uvmexp.pga_zerohit++; | ||||
1213 | else { | ||||
1214 | uvmexp.pga_zeromiss++; | ||||
1215 | uvm_pagezero(found); | ||||
1216 | } | ||||
1217 | } | ||||
1218 | atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_ZERO0x00000100|PQ_FREE0x00010000); | ||||
1219 | |||||
1220 | found->uobject = NULL((void *)0); | ||||
1221 | found->uanon = NULL((void *)0); | ||||
1222 | found->pg_version++; | ||||
1223 | |||||
1224 | /* | ||||
1225 | * Validate that the page matches range criterium. | ||||
1226 | */ | ||||
1227 | KDASSERT(start == 0 || atop(VM_PAGE_TO_PHYS(found)) >= start)((void)0); | ||||
1228 | KDASSERT(end == 0 || atop(VM_PAGE_TO_PHYS(found)) < end)((void)0); | ||||
1229 | |||||
1230 | #ifdef DIAGNOSTIC1 | ||||
1231 | /* | ||||
1232 | * Update fcount (# found pages) and | ||||
1233 | * fnsegs (# found segments) counters. | ||||
1234 | */ | ||||
1235 | if (diag_prev == NULL((void *)0) || | ||||
1236 | /* new segment if it contains a hole */ | ||||
1237 | atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) + 1 != | ||||
1238 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) || | ||||
1239 | /* new segment if it crosses boundary */ | ||||
1240 | (atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) & ~(boundary - 1)) != | ||||
1241 | (atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) & ~(boundary - 1))) | ||||
1242 | fnsegs++; | ||||
1243 | fcount++; | ||||
1244 | |||||
1245 | diag_prev = found; | ||||
1246 | #endif /* DIAGNOSTIC */ | ||||
1247 | } | ||||
1248 | |||||
1249 | #ifdef DIAGNOSTIC1 | ||||
1250 | /* | ||||
1251 | * Panic on algorithm failure. | ||||
1252 | */ | ||||
1253 | if (fcount != count || fnsegs > maxseg) { | ||||
1254 | panic("pmemrange allocation error: " | ||||
1255 | "allocated %ld pages in %d segments, " | ||||
1256 | "but request was %ld pages in %d segments", | ||||
1257 | fcount, fnsegs, count, maxseg); | ||||
1258 | } | ||||
1259 | #endif /* DIAGNOSTIC */ | ||||
1260 | |||||
1261 | return 0; | ||||
1262 | } | ||||
1263 | |||||
1264 | /* | ||||
1265 | * Free a number of contig pages (invoked by uvm_page_init). | ||||
1266 | */ | ||||
1267 | void | ||||
1268 | uvm_pmr_freepages(struct vm_page *pg, psize_t count) | ||||
1269 | { | ||||
1270 | struct uvm_pmemrange *pmr; | ||||
1271 | psize_t i, pmr_count; | ||||
1272 | struct vm_page *firstpg = pg; | ||||
1273 | |||||
1274 | for (i = 0; i < count; i++) { | ||||
1275 | KASSERT(atop(VM_PAGE_TO_PHYS(&pg[i])) ==((((((&pg[i])->phys_addr)) >> 12) == ((((pg)-> phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i" )) | ||||
1276 | atop(VM_PAGE_TO_PHYS(pg)) + i)((((((&pg[i])->phys_addr)) >> 12) == ((((pg)-> phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i" )); | ||||
1277 | |||||
1278 | if (!((pg[i].pg_flags & PQ_FREE0x00010000) == 0 && | ||||
1279 | VALID_FLAGS(pg[i].pg_flags)(((pg[i].pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))) { | ||||
1280 | printf("Flags: 0x%x, will panic now.\n", | ||||
1281 | pg[i].pg_flags); | ||||
1282 | } | ||||
1283 | KASSERT((pg[i].pg_flags & PQ_FREE) == 0 &&(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i]. pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)" )) | ||||
1284 | VALID_FLAGS(pg[i].pg_flags))(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i]. pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)" )); | ||||
1285 | atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PQ_FREE0x00010000); | ||||
1286 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100); | ||||
1287 | } | ||||
1288 | |||||
1289 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1290 | |||||
1291 | for (i = count; i > 0; i -= pmr_count) { | ||||
1292 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12)); | ||||
1293 | KASSERT(pmr != NULL)((pmr != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1293, "pmr != NULL")); | ||||
1294 | |||||
1295 | pmr_count = MIN(i, pmr->high - atop(VM_PAGE_TO_PHYS(pg)))(((i)<(pmr->high - ((((pg)->phys_addr)) >> 12) ))?(i):(pmr->high - ((((pg)->phys_addr)) >> 12))); | ||||
1296 | pg->fpgsz = pmr_count; | ||||
1297 | uvm_pmr_insert(pmr, pg, 0); | ||||
1298 | |||||
1299 | uvmexp.free += pmr_count; | ||||
1300 | pg += pmr_count; | ||||
1301 | } | ||||
1302 | wakeup(&uvmexp.free); | ||||
1303 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||
1304 | wakeup(&uvmexp.zeropages); | ||||
1305 | |||||
1306 | uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg)((firstpg)->phys_addr), ptoa(count)((paddr_t)(count) << 12)); | ||||
1307 | |||||
1308 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1309 | } | ||||
1310 | |||||
1311 | /* | ||||
1312 | * Free all pages in the queue. | ||||
1313 | */ | ||||
1314 | void | ||||
1315 | uvm_pmr_freepageq(struct pglist *pgl) | ||||
1316 | { | ||||
1317 | struct vm_page *pg; | ||||
1318 | paddr_t pstart; | ||||
1319 | psize_t plen; | ||||
1320 | |||||
1321 | TAILQ_FOREACH(pg, pgl, pageq)for((pg) = ((pgl)->tqh_first); (pg) != ((void *)0); (pg) = ((pg)->pageq.tqe_next)) { | ||||
1322 | if (!((pg->pg_flags & PQ_FREE0x00010000) == 0 && | ||||
1323 | VALID_FLAGS(pg->pg_flags)(((pg->pg_flags) & ~(0x00010000|0x00000100|0x3f000000) ) == 0x0))) { | ||||
1324 | printf("Flags: 0x%x, will panic now.\n", | ||||
1325 | pg->pg_flags); | ||||
1326 | } | ||||
1327 | KASSERT((pg->pg_flags & PQ_FREE) == 0 &&(((pg->pg_flags & 0x00010000) == 0 && (((pg-> pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)" )) | ||||
1328 | VALID_FLAGS(pg->pg_flags))(((pg->pg_flags & 0x00010000) == 0 && (((pg-> pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)" )); | ||||
1329 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PQ_FREE0x00010000); | ||||
1330 | atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||
1331 | } | ||||
1332 | |||||
1333 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1334 | while (!TAILQ_EMPTY(pgl)(((pgl)->tqh_first) == ((void *)0))) { | ||||
1335 | pg = TAILQ_FIRST(pgl)((pgl)->tqh_first); | ||||
1336 | if (pg == TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) + 1) { | ||||
1337 | /* | ||||
1338 | * If pg is one behind the position of the | ||||
1339 | * next page in the list in the page array, | ||||
1340 | * try going backwards instead of forward. | ||||
1341 | */ | ||||
1342 | plen = uvm_pmr_remove_1strange_reverse(pgl, &pstart); | ||||
1343 | } else { | ||||
1344 | pstart = VM_PAGE_TO_PHYS(TAILQ_FIRST(pgl))((((pgl)->tqh_first))->phys_addr); | ||||
1345 | plen = uvm_pmr_remove_1strange(pgl, 0, NULL((void *)0), 0); | ||||
1346 | } | ||||
1347 | uvmexp.free += plen; | ||||
1348 | |||||
1349 | uvm_wakeup_pla(pstart, ptoa(plen)((paddr_t)(plen) << 12)); | ||||
1350 | } | ||||
1351 | wakeup(&uvmexp.free); | ||||
1352 | if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8)) | ||||
1353 | wakeup(&uvmexp.zeropages); | ||||
1354 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1355 | |||||
1356 | return; | ||||
1357 | } | ||||
1358 | |||||
1359 | /* | ||||
1360 | * Store a pmemrange in the list. | ||||
1361 | * | ||||
1362 | * The list is sorted by use. | ||||
1363 | */ | ||||
1364 | struct uvm_pmemrange * | ||||
1365 | uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq, | ||||
1366 | struct uvm_pmemrange *pmr) | ||||
1367 | { | ||||
1368 | struct uvm_pmemrange *iter; | ||||
1369 | int cmp = 1; | ||||
1370 | |||||
1371 | TAILQ_FOREACH(iter, useq, pmr_use)for((iter) = ((useq)->tqh_first); (iter) != ((void *)0); ( iter) = ((iter)->pmr_use.tqe_next)) { | ||||
1372 | cmp = uvm_pmemrange_use_cmp(pmr, iter); | ||||
1373 | if (cmp == 0) | ||||
1374 | return iter; | ||||
1375 | if (cmp == -1) | ||||
1376 | break; | ||||
1377 | } | ||||
1378 | |||||
1379 | if (iter == NULL((void *)0)) | ||||
1380 | TAILQ_INSERT_TAIL(useq, pmr, pmr_use)do { (pmr)->pmr_use.tqe_next = ((void *)0); (pmr)->pmr_use .tqe_prev = (useq)->tqh_last; *(useq)->tqh_last = (pmr) ; (useq)->tqh_last = &(pmr)->pmr_use.tqe_next; } while (0); | ||||
1381 | else | ||||
1382 | TAILQ_INSERT_BEFORE(iter, pmr, pmr_use)do { (pmr)->pmr_use.tqe_prev = (iter)->pmr_use.tqe_prev ; (pmr)->pmr_use.tqe_next = (iter); *(iter)->pmr_use.tqe_prev = (pmr); (iter)->pmr_use.tqe_prev = &(pmr)->pmr_use .tqe_next; } while (0); | ||||
1383 | return NULL((void *)0); | ||||
1384 | } | ||||
1385 | |||||
1386 | #ifdef DEBUG | ||||
1387 | /* | ||||
1388 | * Validation of the whole pmemrange. | ||||
1389 | * Called with fpageq locked. | ||||
1390 | */ | ||||
1391 | void | ||||
1392 | uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0) | ||||
1393 | { | ||||
1394 | struct vm_page *prev, *next, *i, *xref; | ||||
1395 | int lcv, mti; | ||||
1396 | |||||
1397 | /* Empty range */ | ||||
1398 | if (pmr->nsegs == 0) | ||||
1399 | return; | ||||
1400 | |||||
1401 | /* Validate address tree. */ | ||||
1402 | RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != ( (void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i))) { | ||||
1403 | /* Validate the range. */ | ||||
1404 | KASSERT(i->fpgsz > 0)((i->fpgsz > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1404, "i->fpgsz > 0")); | ||||
1405 | KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low)((((((i)->phys_addr)) >> 12) >= pmr->low) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1405, "atop(VM_PAGE_TO_PHYS(i)) >= pmr->low")); | ||||
1406 | KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr ->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high" )) | ||||
1407 | <= pmr->high)((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr ->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high" )); | ||||
1408 | |||||
1409 | /* Validate each page in this range. */ | ||||
1410 | for (lcv = 0; lcv < i->fpgsz; lcv++) { | ||||
1411 | /* | ||||
1412 | * Only the first page has a size specification. | ||||
1413 | * Rest is size 0. | ||||
1414 | */ | ||||
1415 | KASSERT(lcv == 0 || i[lcv].fpgsz == 0)((lcv == 0 || i[lcv].fpgsz == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1415, "lcv == 0 || i[lcv].fpgsz == 0" )); | ||||
1416 | /* | ||||
1417 | * Flag check. | ||||
1418 | */ | ||||
1419 | KASSERT(VALID_FLAGS(i[lcv].pg_flags) &&(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000 )) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000 ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE" )) | ||||
1420 | (i[lcv].pg_flags & PQ_FREE) == PQ_FREE)(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000 )) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000 ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE" )); | ||||
1421 | /* | ||||
1422 | * Free pages are: | ||||
1423 | * - not wired | ||||
1424 | * - have no vm_anon | ||||
1425 | * - have no uvm_object | ||||
1426 | */ | ||||
1427 | KASSERT(i[lcv].wire_count == 0)((i[lcv].wire_count == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 1427, "i[lcv].wire_count == 0" )); | ||||
1428 | KASSERT(i[lcv].uanon == (void*)0xdeadbeef ||((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL" )) | ||||
1429 | i[lcv].uanon == NULL)((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL" )); | ||||
1430 | KASSERT(i[lcv].uobject == (void*)0xdeadbeef ||((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == (( void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL" )) | ||||
1431 | i[lcv].uobject == NULL)((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == (( void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL" )); | ||||
1432 | /* | ||||
1433 | * Pages in a single range always have the same | ||||
1434 | * memtype. | ||||
1435 | */ | ||||
1436 | KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(& i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])" )) | ||||
1437 | uvm_pmr_pg_to_memtype(&i[lcv]))((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(& i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])" )); | ||||
1438 | } | ||||
1439 | |||||
1440 | /* Check that it shouldn't be joined with its predecessor. */ | ||||
1441 | prev = RBT_PREV(uvm_pmr_addr, i)uvm_pmr_addr_RBT_PREV(i); | ||||
1442 | if (prev != NULL((void *)0)) { | ||||
1443 | KASSERT(uvm_pmr_pg_to_memtype(i) !=((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||
1444 | uvm_pmr_pg_to_memtype(prev) ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||
1445 | atop(VM_PAGE_TO_PHYS(i)) >((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||
1446 | atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )) | ||||
1447 | prev + prev->fpgsz != i)((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || ( (((i)->phys_addr)) >> 12) > ((((prev)->phys_addr )) >> 12) + prev->fpgsz || prev + prev->fpgsz != i ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i" )); | ||||
1448 | } | ||||
1449 | |||||
1450 | /* Assert i is in the size tree as well. */ | ||||
1451 | if (i->fpgsz == 1) { | ||||
1452 | TAILQ_FOREACH(xref,for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)]) ->tqh_first); (xref) != ((void *)0); (xref) = ((xref)-> pageq.tqe_next)) | ||||
1453 | &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq)for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)]) ->tqh_first); (xref) != ((void *)0); (xref) = ((xref)-> pageq.tqe_next)) { | ||||
1454 | if (xref == i) | ||||
1455 | break; | ||||
1456 | } | ||||
1457 | KASSERT(xref == i)((xref == i) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1457, "xref == i")); | ||||
1458 | } else { | ||||
1459 | KASSERT(RBT_FIND(uvm_pmr_size,((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )) | ||||
1460 | &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )) | ||||
1461 | i + 1)((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype (i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1" )); | ||||
1462 | } | ||||
1463 | } | ||||
1464 | |||||
1465 | /* Validate size tree. */ | ||||
1466 | for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) { | ||||
1467 | for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL((void *)0); i = next) { | ||||
1468 | next = uvm_pmr_nextsz(pmr, i, mti); | ||||
1469 | if (next != NULL((void *)0)) { | ||||
1470 | KASSERT(i->fpgsz <=((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz" )) | ||||
1471 | next->fpgsz)((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz" )); | ||||
1472 | } | ||||
1473 | |||||
1474 | /* Assert i is in the addr tree as well. */ | ||||
1475 | KASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i)((uvm_pmr_addr_RBT_FIND(&pmr->addr, i) == i) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1475, "RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i")); | ||||
1476 | |||||
1477 | /* Assert i is of the correct memory type. */ | ||||
1478 | KASSERT(uvm_pmr_pg_to_memtype(i) == mti)((uvm_pmr_pg_to_memtype(i) == mti) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1478, "uvm_pmr_pg_to_memtype(i) == mti" )); | ||||
1479 | } | ||||
1480 | } | ||||
1481 | |||||
1482 | /* Validate nsegs statistic. */ | ||||
1483 | lcv = 0; | ||||
1484 | RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != ( (void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i))) | ||||
1485 | lcv++; | ||||
1486 | KASSERT(pmr->nsegs == lcv)((pmr->nsegs == lcv) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1486, "pmr->nsegs == lcv")); | ||||
1487 | } | ||||
1488 | #endif /* DEBUG */ | ||||
1489 | |||||
1490 | /* | ||||
1491 | * Split pmr at split point pageno. | ||||
1492 | * Called with fpageq unlocked. | ||||
1493 | * | ||||
1494 | * Split is only applied if a pmemrange spans pageno. | ||||
1495 | */ | ||||
1496 | void | ||||
1497 | uvm_pmr_split(paddr_t pageno) | ||||
1498 | { | ||||
1499 | struct uvm_pmemrange *pmr, *drain; | ||||
1500 | struct vm_page *rebuild, *prev, *next; | ||||
1501 | psize_t prev_sz; | ||||
1502 | |||||
1503 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1504 | pmr = uvm_pmemrange_find(pageno); | ||||
1505 | if (pmr == NULL((void *)0) || !(pmr->low < pageno)) { | ||||
1506 | /* No split required. */ | ||||
1507 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1508 | return; | ||||
1509 | } | ||||
1510 | |||||
1511 | KASSERT(pmr->low < pageno)((pmr->low < pageno) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1511, "pmr->low < pageno" )); | ||||
1512 | KASSERT(pmr->high > pageno)((pmr->high > pageno) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pmemrange.c", 1512, "pmr->high > pageno" )); | ||||
1513 | |||||
1514 | /* | ||||
1515 | * uvm_pmr_allocpmr() calls into malloc() which in turn calls into | ||||
1516 | * uvm_kmemalloc which calls into pmemrange, making the locking | ||||
1517 | * a bit hard, so we just race! | ||||
1518 | */ | ||||
1519 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1520 | drain = uvm_pmr_allocpmr(); | ||||
1521 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1522 | pmr = uvm_pmemrange_find(pageno); | ||||
1523 | if (pmr == NULL((void *)0) || !(pmr->low < pageno)) { | ||||
1524 | /* | ||||
1525 | * We lost the race since someone else ran this or a related | ||||
1526 | * function, however this should be triggered very rarely so | ||||
1527 | * we just leak the pmr. | ||||
1528 | */ | ||||
1529 | printf("uvm_pmr_split: lost one pmr\n"); | ||||
1530 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1531 | return; | ||||
1532 | } | ||||
1533 | |||||
1534 | drain->low = pageno; | ||||
1535 | drain->high = pmr->high; | ||||
1536 | drain->use = pmr->use; | ||||
1537 | |||||
1538 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1539 | uvm_pmr_assertvalid(drain)do {} while (0); | ||||
1540 | KASSERT(drain->nsegs == 0)((drain->nsegs == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1540, "drain->nsegs == 0")); | ||||
1541 | |||||
1542 | RBT_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr)for ((rebuild) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); ( rebuild) != ((void *)0); (rebuild) = uvm_pmr_addr_RBT_NEXT((rebuild ))) { | ||||
1543 | if (atop(VM_PAGE_TO_PHYS(rebuild))((((rebuild)->phys_addr)) >> 12) >= pageno) | ||||
1544 | break; | ||||
1545 | } | ||||
1546 | if (rebuild == NULL((void *)0)) | ||||
1547 | prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||
1548 | else | ||||
1549 | prev = RBT_PREV(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_PREV(rebuild); | ||||
1550 | KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno)((prev == ((void *)0) || ((((prev)->phys_addr)) >> 12 ) < pageno) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1550, "prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno" )); | ||||
1551 | |||||
1552 | /* | ||||
1553 | * Handle free chunk that spans the split point. | ||||
1554 | */ | ||||
1555 | if (prev != NULL((void *)0) && | ||||
1556 | atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev->fpgsz > pageno) { | ||||
1557 | psize_t before, after; | ||||
1558 | |||||
1559 | KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno)((((((prev)->phys_addr)) >> 12) < pageno) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1559, "atop(VM_PAGE_TO_PHYS(prev)) < pageno")); | ||||
1560 | |||||
1561 | uvm_pmr_remove(pmr, prev); | ||||
1562 | prev_sz = prev->fpgsz; | ||||
1563 | before = pageno - atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12); | ||||
1564 | after = atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev_sz - pageno; | ||||
1565 | |||||
1566 | KASSERT(before > 0)((before > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1566, "before > 0")); | ||||
1567 | KASSERT(after > 0)((after > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1567, "after > 0")); | ||||
1568 | |||||
1569 | prev->fpgsz = before; | ||||
1570 | uvm_pmr_insert(pmr, prev, 1); | ||||
1571 | (prev + before)->fpgsz = after; | ||||
1572 | uvm_pmr_insert(drain, prev + before, 1); | ||||
1573 | } | ||||
1574 | |||||
1575 | /* Move free chunks that no longer fall in the range. */ | ||||
1576 | for (; rebuild != NULL((void *)0); rebuild = next) { | ||||
1577 | next = RBT_NEXT(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_NEXT(rebuild); | ||||
1578 | |||||
1579 | uvm_pmr_remove(pmr, rebuild); | ||||
1580 | uvm_pmr_insert(drain, rebuild, 1); | ||||
1581 | } | ||||
1582 | |||||
1583 | pmr->high = pageno; | ||||
1584 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1585 | uvm_pmr_assertvalid(drain)do {} while (0); | ||||
1586 | |||||
1587 | RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, drain ); | ||||
1588 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain); | ||||
1589 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1590 | } | ||||
1591 | |||||
1592 | /* | ||||
1593 | * Increase the usage counter for the given range of memory. | ||||
1594 | * | ||||
1595 | * The more usage counters a given range of memory has, the more will be | ||||
1596 | * attempted not to allocate from it. | ||||
1597 | * | ||||
1598 | * Addresses here are in paddr_t, not page-numbers. | ||||
1599 | * The lowest and highest allowed address are specified. | ||||
1600 | */ | ||||
1601 | void | ||||
1602 | uvm_pmr_use_inc(paddr_t low, paddr_t high) | ||||
1603 | { | ||||
1604 | struct uvm_pmemrange *pmr; | ||||
1605 | paddr_t sz; | ||||
1606 | |||||
1607 | /* pmr uses page numbers, translate low and high. */ | ||||
1608 | high++; | ||||
1609 | high = atop(trunc_page(high))((((high) & ~((1 << 12) - 1))) >> 12); | ||||
1610 | low = atop(round_page(low))(((((low) + ((1 << 12) - 1)) & ~((1 << 12) - 1 ))) >> 12); | ||||
1611 | uvm_pmr_split(low); | ||||
1612 | uvm_pmr_split(high); | ||||
1613 | |||||
1614 | sz = 0; | ||||
1615 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
1616 | /* Increase use count on segments in range. */ | ||||
1617 | RBT_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr)for ((pmr) = uvm_pmemrange_addr_RBT_MIN((&uvm.pmr_control .addr)); (pmr) != ((void *)0); (pmr) = uvm_pmemrange_addr_RBT_NEXT ((pmr))) { | ||||
1618 | if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)(((low) == 0 || (pmr->low) >= (low)) && ((high) == 0 || (pmr->high) <= (high)))) { | ||||
1619 | TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use)do { if (((pmr)->pmr_use.tqe_next) != ((void *)0)) (pmr)-> pmr_use.tqe_next->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_prev ; else (&uvm.pmr_control.use)->tqh_last = (pmr)->pmr_use .tqe_prev; *(pmr)->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_next ; ((pmr)->pmr_use.tqe_prev) = ((void *)-1); ((pmr)->pmr_use .tqe_next) = ((void *)-1); } while (0); | ||||
1620 | pmr->use++; | ||||
1621 | sz += pmr->high - pmr->low; | ||||
1622 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr); | ||||
1623 | } | ||||
1624 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1625 | } | ||||
1626 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
1627 | |||||
1628 | KASSERT(sz >= high - low)((sz >= high - low) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1628, "sz >= high - low")); | ||||
1629 | } | ||||
1630 | |||||
1631 | /* | ||||
1632 | * Allocate a pmemrange. | ||||
1633 | * | ||||
1634 | * If called from uvm_page_init, the uvm_pageboot_alloc is used. | ||||
1635 | * If called after uvm_init, malloc is used. | ||||
1636 | * (And if called in between, you're dead.) | ||||
1637 | */ | ||||
1638 | struct uvm_pmemrange * | ||||
1639 | uvm_pmr_allocpmr(void) | ||||
1640 | { | ||||
1641 | struct uvm_pmemrange *nw; | ||||
1642 | int i; | ||||
1643 | |||||
1644 | /* We're only ever hitting the !uvm.page_init_done case for now. */ | ||||
1645 | if (!uvm.page_init_done) { | ||||
1646 | nw = (struct uvm_pmemrange *) | ||||
1647 | uvm_pageboot_alloc(sizeof(struct uvm_pmemrange)); | ||||
1648 | } else { | ||||
1649 | nw = malloc(sizeof(struct uvm_pmemrange), | ||||
1650 | M_VMMAP30, M_NOWAIT0x0002); | ||||
1651 | } | ||||
1652 | KASSERT(nw != NULL)((nw != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1652, "nw != NULL")); | ||||
1653 | memset(nw, 0, sizeof(struct uvm_pmemrange))__builtin_memset((nw), (0), (sizeof(struct uvm_pmemrange))); | ||||
1654 | RBT_INIT(uvm_pmr_addr, &nw->addr)uvm_pmr_addr_RBT_INIT(&nw->addr); | ||||
1655 | for (i = 0; i < UVM_PMR_MEMTYPE_MAX2; i++) { | ||||
1656 | RBT_INIT(uvm_pmr_size, &nw->size[i])uvm_pmr_size_RBT_INIT(&nw->size[i]); | ||||
1657 | TAILQ_INIT(&nw->single[i])do { (&nw->single[i])->tqh_first = ((void *)0); (& nw->single[i])->tqh_last = &(&nw->single[i]) ->tqh_first; } while (0); | ||||
1658 | } | ||||
1659 | return nw; | ||||
1660 | } | ||||
1661 | |||||
1662 | /* | ||||
1663 | * Initialization of pmr. | ||||
1664 | * Called by uvm_page_init. | ||||
1665 | * | ||||
1666 | * Sets up pmemranges. | ||||
1667 | */ | ||||
1668 | void | ||||
1669 | uvm_pmr_init(void) | ||||
1670 | { | ||||
1671 | struct uvm_pmemrange *new_pmr; | ||||
1672 | int i; | ||||
1673 | |||||
1674 | TAILQ_INIT(&uvm.pmr_control.use)do { (&uvm.pmr_control.use)->tqh_first = ((void *)0); ( &uvm.pmr_control.use)->tqh_last = &(&uvm.pmr_control .use)->tqh_first; } while (0); | ||||
1675 | RBT_INIT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_INIT(&uvm.pmr_control.addr); | ||||
1676 | TAILQ_INIT(&uvm.pmr_control.allocs)do { (&uvm.pmr_control.allocs)->tqh_first = ((void *)0 ); (&uvm.pmr_control.allocs)->tqh_last = &(&uvm .pmr_control.allocs)->tqh_first; } while (0); | ||||
1677 | |||||
1678 | /* By default, one range for the entire address space. */ | ||||
1679 | new_pmr = uvm_pmr_allocpmr(); | ||||
1680 | new_pmr->low = 0; | ||||
1681 | new_pmr->high = atop((paddr_t)-1)(((paddr_t)-1) >> 12) + 1; | ||||
1682 | |||||
1683 | RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, new_pmr ); | ||||
1684 | uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr); | ||||
1685 | |||||
1686 | for (i = 0; uvm_md_constraints[i] != NULL((void *)0); i++) { | ||||
1687 | uvm_pmr_use_inc(uvm_md_constraints[i]->ucr_low, | ||||
1688 | uvm_md_constraints[i]->ucr_high); | ||||
1689 | } | ||||
1690 | } | ||||
1691 | |||||
1692 | /* | ||||
1693 | * Find the pmemrange that contains the given page number. | ||||
1694 | * | ||||
1695 | * (Manually traverses the binary tree, because that is cheaper on stack | ||||
1696 | * usage.) | ||||
1697 | */ | ||||
1698 | struct uvm_pmemrange * | ||||
1699 | uvm_pmemrange_find(paddr_t pageno) | ||||
1700 | { | ||||
1701 | struct uvm_pmemrange *pmr; | ||||
1702 | |||||
1703 | pmr = RBT_ROOT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_ROOT(&uvm.pmr_control.addr); | ||||
1704 | while (pmr != NULL((void *)0)) { | ||||
1705 | if (pmr->low > pageno) | ||||
1706 | pmr = RBT_LEFT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_LEFT(pmr); | ||||
1707 | else if (pmr->high <= pageno) | ||||
1708 | pmr = RBT_RIGHT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_RIGHT(pmr); | ||||
1709 | else | ||||
1710 | break; | ||||
1711 | } | ||||
1712 | |||||
1713 | return pmr; | ||||
1714 | } | ||||
1715 | |||||
1716 | #if defined(DDB1) || defined(DEBUG) | ||||
1717 | /* | ||||
1718 | * Return true if the given page is in any of the free lists. | ||||
1719 | * Used by uvm_page_printit. | ||||
1720 | * This function is safe, even if the page is not on the freeq. | ||||
1721 | * Note: does not apply locking, only called from ddb. | ||||
1722 | */ | ||||
1723 | int | ||||
1724 | uvm_pmr_isfree(struct vm_page *pg) | ||||
1725 | { | ||||
1726 | struct vm_page *r; | ||||
1727 | struct uvm_pmemrange *pmr; | ||||
1728 | |||||
1729 | pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12)); | ||||
1730 | if (pmr == NULL((void *)0)) | ||||
1731 | return 0; | ||||
1732 | r = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg); | ||||
1733 | if (r == NULL((void *)0)) | ||||
1734 | r = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr); | ||||
1735 | else if (r != pg) | ||||
1736 | r = RBT_PREV(uvm_pmr_addr, r)uvm_pmr_addr_RBT_PREV(r); | ||||
1737 | if (r == NULL((void *)0)) | ||||
1738 | return 0; /* Empty tree. */ | ||||
1739 | |||||
1740 | KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)))((void)0); | ||||
1741 | return atop(VM_PAGE_TO_PHYS(r))((((r)->phys_addr)) >> 12) + r->fpgsz > | ||||
1742 | atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12); | ||||
1743 | } | ||||
1744 | #endif /* DEBUG */ | ||||
1745 | |||||
1746 | /* | ||||
1747 | * Given a root of a tree, find a range which intersects start, end and | ||||
1748 | * is of the same memtype. | ||||
1749 | * | ||||
1750 | * Page must be in the address tree. | ||||
1751 | */ | ||||
1752 | struct vm_page* | ||||
1753 | uvm_pmr_rootupdate(struct uvm_pmemrange *pmr, struct vm_page *init_root, | ||||
1754 | paddr_t start, paddr_t end, int memtype) | ||||
1755 | { | ||||
1756 | int direction; | ||||
1757 | struct vm_page *root; | ||||
1758 | struct vm_page *high, *high_next; | ||||
1759 | struct vm_page *low, *low_next; | ||||
1760 | |||||
1761 | KDASSERT(pmr != NULL && init_root != NULL)((void)0); | ||||
1762 | root = init_root; | ||||
1763 | |||||
1764 | /* Which direction to use for searching. */ | ||||
1765 | if (start != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) + root->fpgsz <= start) | ||||
1766 | direction = 1; | ||||
1767 | else if (end != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) >= end) | ||||
1768 | direction = -1; | ||||
1769 | else /* nothing to do */ | ||||
1770 | return root; | ||||
1771 | |||||
1772 | /* First, update root to fall within the chosen range. */ | ||||
1773 | while (root && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||
1774 | atop(VM_PAGE_TO_PHYS(root)),(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||
1775 | atop(VM_PAGE_TO_PHYS(root)) + root->fpgsz,(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end))) | ||||
1776 | start, end)(((start) == 0 || (start) < (((((root)->phys_addr)) >> 12) + root->fpgsz)) && ((end) == 0 || (((((root)-> phys_addr)) >> 12)) < (end)))) { | ||||
1777 | if (direction == 1) | ||||
1778 | root = RBT_RIGHT(uvm_objtree, root)uvm_objtree_RBT_RIGHT(root); | ||||
1779 | else | ||||
1780 | root = RBT_LEFT(uvm_objtree, root)uvm_objtree_RBT_LEFT(root); | ||||
1781 | } | ||||
1782 | if (root == NULL((void *)0) || uvm_pmr_pg_to_memtype(root) == memtype) | ||||
1783 | return root; | ||||
1784 | |||||
1785 | /* | ||||
1786 | * Root is valid, but of the wrong memtype. | ||||
1787 | * | ||||
1788 | * Try to find a range that has the given memtype in the subtree | ||||
1789 | * (memtype mismatches are costly, either because the conversion | ||||
1790 | * is expensive, or a later allocation will need to do the opposite | ||||
1791 | * conversion, which will be expensive). | ||||
1792 | * | ||||
1793 | * | ||||
1794 | * First, simply increase address until we hit something we can use. | ||||
1795 | * Cache the upper page, so we can page-walk later. | ||||
1796 | */ | ||||
1797 | high = root; | ||||
1798 | high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high); | ||||
1799 | while (high_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||
1800 | atop(VM_PAGE_TO_PHYS(high_next)),(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||
1801 | atop(VM_PAGE_TO_PHYS(high_next)) + high_next->fpgsz,(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) ) | ||||
1802 | start, end)(((start) == 0 || (start) < (((((high_next)->phys_addr) ) >> 12) + high_next->fpgsz)) && ((end) == 0 || (((((high_next)->phys_addr)) >> 12)) < (end)) )) { | ||||
1803 | high = high_next; | ||||
1804 | if (uvm_pmr_pg_to_memtype(high) == memtype) | ||||
1805 | return high; | ||||
1806 | high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high); | ||||
1807 | } | ||||
1808 | |||||
1809 | /* | ||||
1810 | * Second, decrease the address until we hit something we can use. | ||||
1811 | * Cache the lower page, so we can page-walk later. | ||||
1812 | */ | ||||
1813 | low = root; | ||||
1814 | low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low); | ||||
1815 | while (low_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||
1816 | atop(VM_PAGE_TO_PHYS(low_next)),(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||
1817 | atop(VM_PAGE_TO_PHYS(low_next)) + low_next->fpgsz,(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end))) | ||||
1818 | start, end)(((start) == 0 || (start) < (((((low_next)->phys_addr)) >> 12) + low_next->fpgsz)) && ((end) == 0 || (((((low_next)->phys_addr)) >> 12)) < (end)))) { | ||||
1819 | low = low_next; | ||||
1820 | if (uvm_pmr_pg_to_memtype(low) == memtype) | ||||
1821 | return low; | ||||
1822 | low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low); | ||||
1823 | } | ||||
1824 | |||||
1825 | if (low == high) | ||||
1826 | return NULL((void *)0); | ||||
1827 | |||||
1828 | /* No hits. Walk the address tree until we find something usable. */ | ||||
1829 | for (low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low); | ||||
1830 | low != high; | ||||
1831 | low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low)) { | ||||
1832 | KDASSERT(PMR_IS_SUBRANGE_OF(atop(VM_PAGE_TO_PHYS(low)),((void)0) | ||||
1833 | atop(VM_PAGE_TO_PHYS(low)) + low->fpgsz,((void)0) | ||||
1834 | start, end))((void)0); | ||||
1835 | if (uvm_pmr_pg_to_memtype(low) == memtype) | ||||
1836 | return low; | ||||
1837 | } | ||||
1838 | |||||
1839 | /* Nothing found. */ | ||||
1840 | return NULL((void *)0); | ||||
1841 | } | ||||
1842 | |||||
1843 | /* | ||||
1844 | * Allocate any page, the fastest way. Page number constraints only. | ||||
1845 | */ | ||||
1846 | psize_t | ||||
1847 | uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result, | ||||
1848 | paddr_t start, paddr_t end, int memtype_only) | ||||
1849 | { | ||||
1850 | struct uvm_pmemrange *pmr; | ||||
1851 | struct vm_page *found, *splitpg; | ||||
1852 | psize_t fcount; | ||||
1853 | int memtype; | ||||
1854 | |||||
1855 | fcount = 0; | ||||
1856 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||
1857 | /* We're done. */ | ||||
1858 | if (fcount == count) | ||||
1859 | break; | ||||
1860 | |||||
1861 | /* Outside requested range. */ | ||||
1862 | if (!(start == 0 && end == 0) && | ||||
1863 | !PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end ) == 0 || (pmr->low) < (end)))) | ||||
1864 | continue; | ||||
1865 | |||||
1866 | /* Range is empty. */ | ||||
1867 | if (pmr->nsegs == 0) | ||||
1868 | continue; | ||||
1869 | |||||
1870 | /* Loop over all memtypes, starting at memtype_init. */ | ||||
1871 | memtype = memtype_init; | ||||
1872 | while (fcount != count) { | ||||
1873 | found = TAILQ_FIRST(&pmr->single[memtype])((&pmr->single[memtype])->tqh_first); | ||||
1874 | /* | ||||
1875 | * If found is outside the range, walk the list | ||||
1876 | * until we find something that intersects with | ||||
1877 | * boundaries. | ||||
1878 | */ | ||||
1879 | while (found && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||
1880 | atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||
1881 | atop(VM_PAGE_TO_PHYS(found)) + 1,(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end))) | ||||
1882 | start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + 1)) && ((end) == 0 || (((((found)->phys_addr )) >> 12)) < (end)))) | ||||
1883 | found = TAILQ_NEXT(found, pageq)((found)->pageq.tqe_next); | ||||
1884 | |||||
1885 | if (found == NULL((void *)0)) { | ||||
1886 | /* | ||||
1887 | * Check if the size tree contains a range | ||||
1888 | * that intersects with the boundaries. As the | ||||
1889 | * allocation is for any page, try the smallest | ||||
1890 | * range so that large ranges are preserved for | ||||
1891 | * more constrained cases. Only one entry is | ||||
1892 | * checked here, to avoid a brute-force search. | ||||
1893 | * | ||||
1894 | * Note that a size tree gives pg[1] instead of | ||||
1895 | * pg[0]. | ||||
1896 | */ | ||||
1897 | found = RBT_MIN(uvm_pmr_size,uvm_pmr_size_RBT_MIN(&pmr->size[memtype]) | ||||
1898 | &pmr->size[memtype])uvm_pmr_size_RBT_MIN(&pmr->size[memtype]); | ||||
1899 | if (found != NULL((void *)0)) { | ||||
1900 | found--; | ||||
1901 | if (!PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||
1902 | atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||
1903 | atop(VM_PAGE_TO_PHYS(found)) +(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end))) | ||||
1904 | found->fpgsz, start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >> 12) + found->fpgsz)) && ((end) == 0 || (((((found )->phys_addr)) >> 12)) < (end)))) | ||||
1905 | found = NULL((void *)0); | ||||
1906 | } | ||||
1907 | } | ||||
1908 | if (found == NULL((void *)0)) { | ||||
1909 | /* | ||||
1910 | * Try address-guided search to meet the page | ||||
1911 | * number constraints. | ||||
1912 | */ | ||||
1913 | found = RBT_ROOT(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_ROOT(&pmr->addr); | ||||
1914 | if (found != NULL((void *)0)) { | ||||
1915 | found = uvm_pmr_rootupdate(pmr, found, | ||||
1916 | start, end, memtype); | ||||
1917 | } | ||||
1918 | } | ||||
1919 | if (found != NULL((void *)0)) { | ||||
1920 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1921 | uvm_pmr_remove_size(pmr, found); | ||||
1922 | |||||
1923 | /* | ||||
1924 | * If the page intersects the end, then it'll | ||||
1925 | * need splitting. | ||||
1926 | * | ||||
1927 | * Note that we don't need to split if the page | ||||
1928 | * intersects start: the drain function will | ||||
1929 | * simply stop on hitting start. | ||||
1930 | */ | ||||
1931 | if (end != 0 && atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||
1932 | found->fpgsz > end) { | ||||
1933 | psize_t splitsz = | ||||
1934 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||
1935 | found->fpgsz - end; | ||||
1936 | |||||
1937 | uvm_pmr_remove_addr(pmr, found); | ||||
1938 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1939 | found->fpgsz -= splitsz; | ||||
1940 | splitpg = found + found->fpgsz; | ||||
1941 | splitpg->fpgsz = splitsz; | ||||
1942 | uvm_pmr_insert(pmr, splitpg, 1); | ||||
1943 | |||||
1944 | /* | ||||
1945 | * At this point, splitpg and found | ||||
1946 | * actually should be joined. | ||||
1947 | * But we explicitly disable that, | ||||
1948 | * because we will start subtracting | ||||
1949 | * from found. | ||||
1950 | */ | ||||
1951 | KASSERT(start == 0 ||((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )) | ||||
1952 | atop(VM_PAGE_TO_PHYS(found)) +((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )) | ||||
1953 | found->fpgsz > start)((start == 0 || ((((found)->phys_addr)) >> 12) + found ->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c" , 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start" )); | ||||
1954 | uvm_pmr_insert_addr(pmr, found, 1); | ||||
1955 | } | ||||
1956 | |||||
1957 | /* | ||||
1958 | * Fetch pages from the end. | ||||
1959 | * If the range is larger than the requested | ||||
1960 | * number of pages, this saves us an addr-tree | ||||
1961 | * update. | ||||
1962 | * | ||||
1963 | * Since we take from the end and insert at | ||||
1964 | * the head, any ranges keep preserved. | ||||
1965 | */ | ||||
1966 | while (found->fpgsz > 0 && fcount < count && | ||||
1967 | (start == 0 || | ||||
1968 | atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + | ||||
1969 | found->fpgsz > start)) { | ||||
1970 | found->fpgsz--; | ||||
1971 | fcount++; | ||||
1972 | TAILQ_INSERT_HEAD(result,do { if (((&found[found->fpgsz])->pageq.tqe_next = ( result)->tqh_first) != ((void *)0)) (result)->tqh_first ->pageq.tqe_prev = &(&found[found->fpgsz])-> pageq.tqe_next; else (result)->tqh_last = &(&found [found->fpgsz])->pageq.tqe_next; (result)->tqh_first = (&found[found->fpgsz]); (&found[found->fpgsz ])->pageq.tqe_prev = &(result)->tqh_first; } while ( 0) | ||||
1973 | &found[found->fpgsz], pageq)do { if (((&found[found->fpgsz])->pageq.tqe_next = ( result)->tqh_first) != ((void *)0)) (result)->tqh_first ->pageq.tqe_prev = &(&found[found->fpgsz])-> pageq.tqe_next; else (result)->tqh_last = &(&found [found->fpgsz])->pageq.tqe_next; (result)->tqh_first = (&found[found->fpgsz]); (&found[found->fpgsz ])->pageq.tqe_prev = &(result)->tqh_first; } while ( 0); | ||||
1974 | } | ||||
1975 | if (found->fpgsz > 0) { | ||||
1976 | uvm_pmr_insert_size(pmr, found); | ||||
1977 | KDASSERT(fcount == count)((void)0); | ||||
1978 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1979 | return fcount; | ||||
1980 | } | ||||
1981 | |||||
1982 | /* | ||||
1983 | * Delayed addr-tree removal. | ||||
1984 | */ | ||||
1985 | uvm_pmr_remove_addr(pmr, found); | ||||
1986 | uvm_pmr_assertvalid(pmr)do {} while (0); | ||||
1987 | } else { | ||||
1988 | if (memtype_only) | ||||
1989 | break; | ||||
1990 | /* | ||||
1991 | * Skip to the next memtype. | ||||
1992 | */ | ||||
1993 | memtype += 1; | ||||
1994 | if (memtype == UVM_PMR_MEMTYPE_MAX2) | ||||
1995 | memtype = 0; | ||||
1996 | if (memtype == memtype_init) | ||||
1997 | break; | ||||
1998 | } | ||||
1999 | } | ||||
2000 | } | ||||
2001 | |||||
2002 | /* | ||||
2003 | * Search finished. | ||||
2004 | * | ||||
2005 | * Ran out of ranges before enough pages were gathered, or we hit the | ||||
2006 | * case where found->fpgsz == count - fcount, in which case the | ||||
2007 | * above exit condition didn't trigger. | ||||
2008 | * | ||||
2009 | * On failure, caller will free the pages. | ||||
2010 | */ | ||||
2011 | return fcount; | ||||
2012 | } | ||||
2013 | |||||
2014 | #ifdef DDB1 | ||||
2015 | /* | ||||
2016 | * Print information about pmemrange. | ||||
2017 | * Does not do locking (so either call it from DDB or acquire fpageq lock | ||||
2018 | * before invoking. | ||||
2019 | */ | ||||
2020 | void | ||||
2021 | uvm_pmr_print(void) | ||||
2022 | { | ||||
2023 | struct uvm_pmemrange *pmr; | ||||
2024 | struct vm_page *pg; | ||||
2025 | psize_t size[UVM_PMR_MEMTYPE_MAX2]; | ||||
2026 | psize_t free; | ||||
2027 | int useq_len; | ||||
2028 | int mt; | ||||
2029 | |||||
2030 | printf("Ranges, use queue:\n"); | ||||
2031 | useq_len = 0; | ||||
2032 | TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr) != ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next)) { | ||||
2033 | useq_len++; | ||||
2034 | free = 0; | ||||
2035 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||
2036 | pg = RBT_MAX(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MAX(&pmr->size[mt]); | ||||
2037 | if (pg != NULL((void *)0)) | ||||
2038 | pg--; | ||||
2039 | else | ||||
2040 | pg = TAILQ_FIRST(&pmr->single[mt])((&pmr->single[mt])->tqh_first); | ||||
2041 | size[mt] = (pg == NULL((void *)0) ? 0 : pg->fpgsz); | ||||
2042 | |||||
2043 | RBT_FOREACH(pg, uvm_pmr_addr, &pmr->addr)for ((pg) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (pg) != ((void *)0); (pg) = uvm_pmr_addr_RBT_NEXT((pg))) | ||||
2044 | free += pg->fpgsz; | ||||
2045 | } | ||||
2046 | |||||
2047 | printf("* [0x%lx-0x%lx] use=%d nsegs=%ld", | ||||
2048 | (unsigned long)pmr->low, (unsigned long)pmr->high, | ||||
2049 | pmr->use, (unsigned long)pmr->nsegs); | ||||
2050 | for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) { | ||||
2051 | printf(" maxsegsz[%d]=0x%lx", mt, | ||||
2052 | (unsigned long)size[mt]); | ||||
2053 | } | ||||
2054 | printf(" free=0x%lx\n", (unsigned long)free); | ||||
2055 | } | ||||
2056 | printf("#ranges = %d\n", useq_len); | ||||
2057 | } | ||||
2058 | #endif | ||||
2059 | |||||
2060 | /* | ||||
2061 | * uvm_wait_pla: wait (sleep) for the page daemon to free some pages | ||||
2062 | * in a specific physmem area. | ||||
2063 | * | ||||
2064 | * Returns ENOMEM if the pagedaemon failed to free any pages. | ||||
2065 | * If not failok, failure will lead to panic. | ||||
2066 | * | ||||
2067 | * Must be called with fpageq locked. | ||||
2068 | */ | ||||
2069 | int | ||||
2070 | uvm_wait_pla(paddr_t low, paddr_t high, paddr_t size, int failok) | ||||
2071 | { | ||||
2072 | struct uvm_pmalloc pma; | ||||
2073 | const char *wmsg = "pmrwait"; | ||||
2074 | |||||
2075 | if (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == uvm.pagedaemon_proc) { | ||||
2076 | /* | ||||
2077 | * This is not that uncommon when the pagedaemon is trying | ||||
2078 | * to flush out a large mmapped file. VOP_WRITE will circle | ||||
2079 | * back through the buffer cache and try to get more memory. | ||||
2080 | * The pagedaemon starts by calling bufbackoff, but we can | ||||
2081 | * easily use up that reserve in a single scan iteration. | ||||
2082 | */ | ||||
2083 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
2084 | if (bufbackoff(NULL((void *)0), atop(size)((size) >> 12)) == 0) { | ||||
2085 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
2086 | return 0; | ||||
2087 | } | ||||
2088 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
2089 | |||||
2090 | /* | ||||
2091 | * XXX detect pagedaemon deadlock - see comment in | ||||
2092 | * uvm_wait(), as this is exactly the same issue. | ||||
2093 | */ | ||||
2094 | printf("pagedaemon: wait_pla deadlock detected!\n"); | ||||
2095 | msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM4, wmsg, | ||||
2096 | MSEC_TO_NSEC(125)); | ||||
2097 | #if defined(DEBUG) | ||||
2098 | /* DEBUG: panic so we can debug it */ | ||||
2099 | panic("wait_pla pagedaemon deadlock"); | ||||
2100 | #endif | ||||
2101 | return 0; | ||||
2102 | } | ||||
2103 | |||||
2104 | for (;;) { | ||||
2105 | pma.pm_constraint.ucr_low = low; | ||||
2106 | pma.pm_constraint.ucr_high = high; | ||||
2107 | pma.pm_size = size; | ||||
2108 | pma.pm_flags = UVM_PMA_LINKED0x01; | ||||
2109 | TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, &pma, pmq)do { (&pma)->pmq.tqe_next = ((void *)0); (&pma)-> pmq.tqe_prev = (&uvm.pmr_control.allocs)->tqh_last; *( &uvm.pmr_control.allocs)->tqh_last = (&pma); (& uvm.pmr_control.allocs)->tqh_last = &(&pma)->pmq .tqe_next; } while (0); | ||||
2110 | |||||
2111 | wakeup(&uvm.pagedaemon); /* wake the daemon! */ | ||||
2112 | while (pma.pm_flags & (UVM_PMA_LINKED0x01 | UVM_PMA_BUSY0x02)) | ||||
2113 | msleep_nsec(&pma, &uvm.fpageqlock, PVM4, wmsg, INFSLP0xffffffffffffffffULL); | ||||
2114 | |||||
2115 | if (!(pma.pm_flags & UVM_PMA_FREED0x20) && | ||||
2116 | pma.pm_flags & UVM_PMA_FAIL0x10) { | ||||
2117 | if (failok) | ||||
2118 | return ENOMEM12; | ||||
| |||||
2119 | printf("uvm_wait: failed to free %ld pages between " | ||||
2120 | "0x%lx-0x%lx\n", atop(size)((size) >> 12), low, high); | ||||
2121 | } else | ||||
2122 | return 0; | ||||
2123 | } | ||||
2124 | /* UNREACHABLE */ | ||||
2125 | } | ||||
2126 | |||||
2127 | /* | ||||
2128 | * Wake up uvm_pmalloc sleepers. | ||||
2129 | */ | ||||
2130 | void | ||||
2131 | uvm_wakeup_pla(paddr_t low, psize_t len) | ||||
2132 | { | ||||
2133 | struct uvm_pmalloc *pma, *pma_next; | ||||
2134 | paddr_t high; | ||||
2135 | |||||
2136 | high = low + len; | ||||
2137 | |||||
2138 | /* Wake specific allocations waiting for this memory. */ | ||||
2139 | for (pma = TAILQ_FIRST(&uvm.pmr_control.allocs)((&uvm.pmr_control.allocs)->tqh_first); pma != NULL((void *)0); | ||||
2140 | pma = pma_next) { | ||||
2141 | pma_next = TAILQ_NEXT(pma, pmq)((pma)->pmq.tqe_next); | ||||
2142 | |||||
2143 | if (low < pma->pm_constraint.ucr_high && | ||||
2144 | high > pma->pm_constraint.ucr_low) { | ||||
2145 | pma->pm_flags |= UVM_PMA_FREED0x20; | ||||
2146 | if (!(pma->pm_flags & UVM_PMA_BUSY0x02)) { | ||||
2147 | pma->pm_flags &= ~UVM_PMA_LINKED0x01; | ||||
2148 | TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0) | ||||
2149 | pmq)do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0); | ||||
2150 | wakeup(pma); | ||||
2151 | } | ||||
2152 | } | ||||
2153 | } | ||||
2154 | } | ||||
2155 | |||||
2156 | void | ||||
2157 | uvm_pagezero_thread(void *arg) | ||||
2158 | { | ||||
2159 | struct pglist pgl; | ||||
2160 | struct vm_page *pg; | ||||
2161 | int count; | ||||
2162 | |||||
2163 | /* Run at the lowest possible priority. */ | ||||
2164 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_nice = NZERO20 + PRIO_MAX20; | ||||
2165 | |||||
2166 | KERNEL_UNLOCK()_kernel_unlock(); | ||||
2167 | |||||
2168 | TAILQ_INIT(&pgl)do { (&pgl)->tqh_first = ((void *)0); (&pgl)->tqh_last = &(&pgl)->tqh_first; } while (0); | ||||
2169 | for (;;) { | ||||
2170 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
2171 | while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET(uvmexp.free / 8) || | ||||
2172 | (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY0, | ||||
2173 | &pgl, 0, 0, 1)) == 0) { | ||||
2174 | msleep_nsec(&uvmexp.zeropages, &uvm.fpageqlock, | ||||
2175 | MAXPRI127, "pgzero", INFSLP0xffffffffffffffffULL); | ||||
2176 | } | ||||
2177 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
2178 | |||||
2179 | TAILQ_FOREACH(pg, &pgl, pageq)for((pg) = ((&pgl)->tqh_first); (pg) != ((void *)0); ( pg) = ((pg)->pageq.tqe_next)) { | ||||
2180 | uvm_pagezero(pg); | ||||
2181 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100); | ||||
2182 | } | ||||
2183 | |||||
2184 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); | ||||
2185 | while (!TAILQ_EMPTY(&pgl)(((&pgl)->tqh_first) == ((void *)0))) | ||||
2186 | uvm_pmr_remove_1strange(&pgl, 0, NULL((void *)0), 0); | ||||
2187 | uvmexp.zeropages += count; | ||||
2188 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); | ||||
2189 | |||||
2190 | yield(); | ||||
2191 | } | ||||
2192 | } |