Bug Summary

File:uvm/uvm_pmemrange.c
Warning:line 2122, column 4
Address of stack memory associated with local variable 'pma' is still referred to by the global variable 'uvm' upon returning to the caller. This will be a dangling reference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name uvm_pmemrange.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/uvm/uvm_pmemrange.c
1/* $OpenBSD: uvm_pmemrange.c,v 1.61 2021/03/12 14:15:49 jsg Exp $ */
2
3/*
4 * Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <uvm/uvm.h>
22#include <sys/malloc.h>
23#include <sys/kernel.h>
24#include <sys/proc.h>
25#include <sys/mount.h>
26
27/*
28 * 2 trees: addr tree and size tree.
29 *
30 * The allocator keeps chunks of free pages (called a range).
31 * Two pages are part of the same range if:
32 * - all pages in between are part of that range,
33 * - they are of the same memory type (zeroed or non-zeroed),
34 * - they are part of the same pmemrange.
35 * A pmemrange is a range of memory which is part of the same vm_physseg
36 * and has a use-count.
37 *
38 * addr tree is vm_page[0].objt
39 * size tree is vm_page[1].objt
40 *
41 * The size tree is not used for memory ranges of 1 page, instead,
42 * single queue is vm_page[0].pageq
43 *
44 * vm_page[0].fpgsz describes the length of a free range. Two adjecent ranges
45 * are joined, unless:
46 * - they have pages in between them which are not free
47 * - they belong to different memtypes (zeroed vs dirty memory)
48 * - they are in different pmemrange areas (ISA vs non-ISA memory for instance)
49 * - they are not a continuation of the same array
50 * The latter issue is caused by vm_physseg ordering and splitting from the
51 * MD initialization machinery. The MD code is dependant on freelists and
52 * happens to split ISA memory from non-ISA memory.
53 * (Note: freelists die die die!)
54 *
55 * uvm_page_init guarantees that every vm_physseg contains an array of
56 * struct vm_page. Also, uvm_page_physload allocates an array of struct
57 * vm_page. This code depends on that array. The array may break across
58 * vm_physsegs boundaries.
59 */
60
61/*
62 * Validate the flags of the page. (Used in asserts.)
63 * Any free page must have the PQ_FREE flag set.
64 * Free pages may be zeroed.
65 * Pmap flags are left untouched.
66 *
67 * The PQ_FREE flag is not checked here: by not checking, we can easily use
68 * this check in pages which are freed.
69 */
70#define VALID_FLAGS(pg_flags)(((pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0
)
\
71 (((pg_flags) & ~(PQ_FREE0x00010000|PG_ZERO0x00000100|PG_PMAPMASK0x3f000000)) == 0x0)
72
73/* Tree comparators. */
74int uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *,
75 const struct uvm_pmemrange *);
76int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
77int uvm_pmr_pg_to_memtype(struct vm_page *);
78
79#ifdef DDB1
80void uvm_pmr_print(void);
81#endif
82
83/*
84 * Memory types. The page flags are used to derive what the current memory
85 * type of a page is.
86 */
87int
88uvm_pmr_pg_to_memtype(struct vm_page *pg)
89{
90 if (pg->pg_flags & PG_ZERO0x00000100)
91 return UVM_PMR_MEMTYPE_ZERO1;
92 /* Default: dirty memory. */
93 return UVM_PMR_MEMTYPE_DIRTY0;
94}
95
96/* Trees. */
97RBT_GENERATE(uvm_pmr_addr, vm_page, objt, uvm_pmr_addr_cmp)static int uvm_pmr_addr_RBT_COMPARE(const void *lptr, const void
*rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_addr_cmp
(l, r); } static const struct rb_type uvm_pmr_addr_RBT_INFO =
{ uvm_pmr_addr_RBT_COMPARE, ((void *)0), __builtin_offsetof(
struct vm_page, objt), }; const struct rb_type *const uvm_pmr_addr_RBT_TYPE
= &uvm_pmr_addr_RBT_INFO
;
98RBT_GENERATE(uvm_pmr_size, vm_page, objt, uvm_pmr_size_cmp)static int uvm_pmr_size_RBT_COMPARE(const void *lptr, const void
*rptr) { const struct vm_page *l = lptr, *r = rptr; return uvm_pmr_size_cmp
(l, r); } static const struct rb_type uvm_pmr_size_RBT_INFO =
{ uvm_pmr_size_RBT_COMPARE, ((void *)0), __builtin_offsetof(
struct vm_page, objt), }; const struct rb_type *const uvm_pmr_size_RBT_TYPE
= &uvm_pmr_size_RBT_INFO
;
99RBT_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const
void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr
; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type
uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE
, ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr
), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE
= &uvm_pmemrange_addr_RBT_INFO
100 uvm_pmemrange_addr_cmp)static int uvm_pmemrange_addr_RBT_COMPARE(const void *lptr, const
void *rptr) { const struct uvm_pmemrange *l = lptr, *r = rptr
; return uvm_pmemrange_addr_cmp(l, r); } static const struct rb_type
uvm_pmemrange_addr_RBT_INFO = { uvm_pmemrange_addr_RBT_COMPARE
, ((void *)0), __builtin_offsetof(struct uvm_pmemrange, pmr_addr
), }; const struct rb_type *const uvm_pmemrange_addr_RBT_TYPE
= &uvm_pmemrange_addr_RBT_INFO
;
101
102/* Validation. */
103#ifdef DEBUG
104void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0);
105#else
106#define uvm_pmr_assertvalid(pmr)do {} while (0) do {} while (0)
107#endif
108
109psize_t uvm_pmr_get1page(psize_t, int, struct pglist *,
110 paddr_t, paddr_t, int);
111
112struct uvm_pmemrange *uvm_pmr_allocpmr(void);
113struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
114struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *,
115 struct vm_page *, int);
116void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr,
117 struct vm_page *pg, struct vm_page **pg_prev,
118 struct vm_page **pg_next);
119struct vm_page *uvm_pmr_findnextsegment(struct uvm_pmemrange *,
120 struct vm_page *, paddr_t);
121struct vm_page *uvm_pmr_findprevsegment(struct uvm_pmemrange *,
122 struct vm_page *, paddr_t);
123psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t,
124 struct vm_page **, int);
125psize_t uvm_pmr_remove_1strange_reverse(struct pglist *,
126 paddr_t *);
127void uvm_pmr_split(paddr_t);
128struct uvm_pmemrange *uvm_pmemrange_find(paddr_t);
129struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *,
130 struct uvm_pmemrange *);
131psize_t pow2divide(psize_t, psize_t);
132struct vm_page *uvm_pmr_rootupdate(struct uvm_pmemrange *,
133 struct vm_page *, paddr_t, paddr_t, int);
134
135/*
136 * Computes num/denom and rounds it up to the next power-of-2.
137 *
138 * This is a division function which calculates an approximation of
139 * num/denom, with result =~ num/denom. It is meant to be fast and doesn't
140 * have to be accurate.
141 *
142 * Providing too large a value makes the allocator slightly faster, at the
143 * risk of hitting the failure case more often. Providing too small a value
144 * makes the allocator a bit slower, but less likely to hit a failure case.
145 */
146psize_t
147pow2divide(psize_t num, psize_t denom)
148{
149 int rshift;
150
151 for (rshift = 0; num > denom; rshift++, denom <<= 1)
152 ;
153 return (paddr_t)1 << rshift;
154}
155
156/*
157 * Predicate: lhs is a subrange or rhs.
158 *
159 * If rhs_low == 0: don't care about lower bound.
160 * If rhs_high == 0: don't care about upper bound.
161 */
162#define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && ((rhs_high
) == 0 || (lhs_high) <= (rhs_high)))
\
163 (((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && \
164 ((rhs_high) == 0 || (lhs_high) <= (rhs_high)))
165
166/*
167 * Predicate: lhs intersects with rhs.
168 *
169 * If rhs_low == 0: don't care about lower bound.
170 * If rhs_high == 0: don't care about upper bound.
171 * Ranges don't intersect if they don't have any page in common, array
172 * semantics mean that < instead of <= should be used here.
173 */
174#define PMR_INTERSECTS_WITH(lhs_low, lhs_high, rhs_low, rhs_high)(((rhs_low) == 0 || (rhs_low) < (lhs_high)) && ((rhs_high
) == 0 || (lhs_low) < (rhs_high)))
\
175 (((rhs_low) == 0 || (rhs_low) < (lhs_high)) && \
176 ((rhs_high) == 0 || (lhs_low) < (rhs_high)))
177
178/*
179 * Align to power-of-2 alignment.
180 */
181#define PMR_ALIGN(pgno, align)(((pgno) + ((align) - 1)) & ~((align) - 1)) \
182 (((pgno) + ((align) - 1)) & ~((align) - 1))
183#define PMR_ALIGN_DOWN(pgno, align)((pgno) & ~((align) - 1)) \
184 ((pgno) & ~((align) - 1))
185
186
187/*
188 * Comparator: sort by address ascending.
189 */
190int
191uvm_pmemrange_addr_cmp(const struct uvm_pmemrange *lhs,
192 const struct uvm_pmemrange *rhs)
193{
194 return lhs->low < rhs->low ? -1 : lhs->low > rhs->low;
195}
196
197/*
198 * Comparator: sort by use ascending.
199 *
200 * The higher the use value of a range, the more devices need memory in
201 * this range. Therefore allocate from the range with the lowest use first.
202 */
203int
204uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
205{
206 int result;
207
208 result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use;
209 if (result == 0)
210 result = uvm_pmemrange_addr_cmp(lhs, rhs);
211 return result;
212}
213
214int
215uvm_pmr_addr_cmp(const struct vm_page *lhs, const struct vm_page *rhs)
216{
217 paddr_t lhs_addr, rhs_addr;
218
219 lhs_addr = VM_PAGE_TO_PHYS(lhs)((lhs)->phys_addr);
220 rhs_addr = VM_PAGE_TO_PHYS(rhs)((rhs)->phys_addr);
221
222 return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr);
223}
224
225int
226uvm_pmr_size_cmp(const struct vm_page *lhs, const struct vm_page *rhs)
227{
228 psize_t lhs_size, rhs_size;
229 int cmp;
230
231 /* Using second tree, so we receive pg[1] instead of pg[0]. */
232 lhs_size = (lhs - 1)->fpgsz;
233 rhs_size = (rhs - 1)->fpgsz;
234
235 cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size);
236 if (cmp == 0)
237 cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1);
238 return cmp;
239}
240
241/*
242 * Find the first range of free pages that is at least sz pages long.
243 */
244struct vm_page *
245uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti)
246{
247 struct vm_page *node, *best;
248
249 KASSERT(sz >= 1)((sz >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 249, "sz >= 1"))
;
250
251 if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti])(((&pmr->single[mti])->tqh_first) == ((void *)0)))
252 return TAILQ_FIRST(&pmr->single[mti])((&pmr->single[mti])->tqh_first);
253
254 node = RBT_ROOT(uvm_pmr_size, &pmr->size[mti])uvm_pmr_size_RBT_ROOT(&pmr->size[mti]);
255 best = NULL((void *)0);
256 while (node != NULL((void *)0)) {
257 if ((node - 1)->fpgsz >= sz) {
258 best = (node - 1);
259 node = RBT_LEFT(uvm_objtree, node)uvm_objtree_RBT_LEFT(node);
260 } else
261 node = RBT_RIGHT(uvm_objtree, node)uvm_objtree_RBT_RIGHT(node);
262 }
263 return best;
264}
265
266/*
267 * Finds the next range. The next range has a size >= pg->fpgsz.
268 * Returns NULL if no more ranges are available.
269 */
270struct vm_page *
271uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt)
272{
273 struct vm_page *npg;
274
275 KASSERT(pmr != NULL && pg != NULL)((pmr != ((void *)0) && pg != ((void *)0)) ? (void)0 :
__assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 275
, "pmr != NULL && pg != NULL"))
;
276 if (pg->fpgsz == 1) {
277 if (TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) != NULL((void *)0))
278 return TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next);
279 else
280 npg = RBT_MIN(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MIN(&pmr->size[mt]);
281 } else
282 npg = RBT_NEXT(uvm_pmr_size, pg + 1)uvm_pmr_size_RBT_NEXT(pg + 1);
283
284 return npg == NULL((void *)0) ? NULL((void *)0) : npg - 1;
285}
286
287/*
288 * Finds the previous and next ranges relative to the (uninserted) pg range.
289 *
290 * *pg_prev == NULL if no previous range is available, that can join with
291 * pg.
292 * *pg_next == NULL if no next range is available, that can join with
293 * pg.
294 */
295void
296uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg,
297 struct vm_page **pg_prev, struct vm_page **pg_next)
298{
299 KASSERT(pg_prev != NULL && pg_next != NULL)((pg_prev != ((void *)0) && pg_next != ((void *)0)) ?
(void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 299, "pg_prev != NULL && pg_next != NULL"))
;
300
301 *pg_next = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg);
302 if (*pg_next == NULL((void *)0))
303 *pg_prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr);
304 else
305 *pg_prev = RBT_PREV(uvm_pmr_addr, *pg_next)uvm_pmr_addr_RBT_PREV(*pg_next);
306
307 KDASSERT(*pg_next == NULL ||((void)0)
308 VM_PAGE_TO_PHYS(*pg_next) > VM_PAGE_TO_PHYS(pg))((void)0);
309 KDASSERT(*pg_prev == NULL ||((void)0)
310 VM_PAGE_TO_PHYS(*pg_prev) < VM_PAGE_TO_PHYS(pg))((void)0);
311
312 /* Reset if not contig. */
313 if (*pg_prev != NULL((void *)0) &&
314 (atop(VM_PAGE_TO_PHYS(*pg_prev))((((*pg_prev)->phys_addr)) >> 12) + (*pg_prev)->fpgsz
315 != atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) ||
316 *pg_prev + (*pg_prev)->fpgsz != pg || /* Array broke. */
317 uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg)))
318 *pg_prev = NULL((void *)0);
319 if (*pg_next != NULL((void *)0) &&
320 (atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz
321 != atop(VM_PAGE_TO_PHYS(*pg_next))((((*pg_next)->phys_addr)) >> 12) ||
322 pg + pg->fpgsz != *pg_next || /* Array broke. */
323 uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg)))
324 *pg_next = NULL((void *)0);
325 return;
326}
327
328/*
329 * Remove a range from the address tree.
330 * Address tree maintains pmr counters.
331 */
332void
333uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg)
334{
335 KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0);
336 KDASSERT(pg->pg_flags & PQ_FREE)((void)0);
337 RBT_REMOVE(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_REMOVE(&pmr->addr, pg);
338
339 pmr->nsegs--;
340}
341/*
342 * Remove a range from the size tree.
343 */
344void
345uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
346{
347 int memtype;
348#ifdef DEBUG
349 struct vm_page *i;
350#endif
351
352 KDASSERT(pg->fpgsz >= 1)((void)0);
353 KDASSERT(pg->pg_flags & PQ_FREE)((void)0);
354 memtype = uvm_pmr_pg_to_memtype(pg);
355
356 if (pg->fpgsz == 1) {
357#ifdef DEBUG
358 TAILQ_FOREACH(i, &pmr->single[memtype], pageq)for((i) = ((&pmr->single[memtype])->tqh_first); (i)
!= ((void *)0); (i) = ((i)->pageq.tqe_next))
{
359 if (i == pg)
360 break;
361 }
362 KDASSERT(i == pg)((void)0);
363#endif
364 TAILQ_REMOVE(&pmr->single[memtype], pg, pageq)do { if (((pg)->pageq.tqe_next) != ((void *)0)) (pg)->pageq
.tqe_next->pageq.tqe_prev = (pg)->pageq.tqe_prev; else (
&pmr->single[memtype])->tqh_last = (pg)->pageq.tqe_prev
; *(pg)->pageq.tqe_prev = (pg)->pageq.tqe_next; ((pg)->
pageq.tqe_prev) = ((void *)-1); ((pg)->pageq.tqe_next) = (
(void *)-1); } while (0)
;
365 } else {
366 KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[memtype],((void)0)
367 pg + 1) == pg + 1)((void)0);
368 RBT_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_REMOVE(&pmr->size[memtype], pg + 1);
369 }
370}
371/* Remove from both trees. */
372void
373uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg)
374{
375 uvm_pmr_assertvalid(pmr)do {} while (0);
376 uvm_pmr_remove_size(pmr, pg);
377 uvm_pmr_remove_addr(pmr, pg);
378 uvm_pmr_assertvalid(pmr)do {} while (0);
379}
380
381/*
382 * Insert the range described in pg.
383 * Returns the range thus created (which may be joined with the previous and
384 * next ranges).
385 * If no_join, the caller guarantees that the range cannot possibly join
386 * with adjecent ranges.
387 */
388struct vm_page *
389uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
390{
391 struct vm_page *prev, *next;
392
393#ifdef DEBUG
394 struct vm_page *i;
395 int mt;
396#endif
397
398 KDASSERT(pg->pg_flags & PQ_FREE)((void)0);
399 KDASSERT(pg->fpgsz >= 1)((void)0);
400
401#ifdef DEBUG
402 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) {
403 TAILQ_FOREACH(i, &pmr->single[mt], pageq)for((i) = ((&pmr->single[mt])->tqh_first); (i) != (
(void *)0); (i) = ((i)->pageq.tqe_next))
404 KDASSERT(i != pg)((void)0);
405 if (pg->fpgsz > 1) {
406 KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mt],((void)0)
407 pg + 1) == NULL)((void)0);
408 }
409 KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL)((void)0);
410 }
411#endif
412
413 if (!no_join) {
414 uvm_pmr_pnaddr(pmr, pg, &prev, &next);
415 if (next != NULL((void *)0)) {
416 uvm_pmr_remove_size(pmr, next);
417 uvm_pmr_remove_addr(pmr, next);
418 pg->fpgsz += next->fpgsz;
419 next->fpgsz = 0;
420 }
421 if (prev != NULL((void *)0)) {
422 uvm_pmr_remove_size(pmr, prev);
423 prev->fpgsz += pg->fpgsz;
424 pg->fpgsz = 0;
425 return prev;
426 }
427 }
428
429 RBT_INSERT(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_INSERT(&pmr->addr, pg);
430
431 pmr->nsegs++;
432
433 return pg;
434}
435/*
436 * Insert the range described in pg.
437 * Returns the range thus created (which may be joined with the previous and
438 * next ranges).
439 * Page must already be in the address tree.
440 */
441void
442uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
443{
444 int memtype;
445#ifdef DEBUG
446 struct vm_page *i;
447 int mti;
448#endif
449
450 KDASSERT(pg->fpgsz >= 1)((void)0);
451 KDASSERT(pg->pg_flags & PQ_FREE)((void)0);
452
453 memtype = uvm_pmr_pg_to_memtype(pg);
454#ifdef DEBUG
455 for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) {
456 TAILQ_FOREACH(i, &pmr->single[mti], pageq)for((i) = ((&pmr->single[mti])->tqh_first); (i) != (
(void *)0); (i) = ((i)->pageq.tqe_next))
457 KDASSERT(i != pg)((void)0);
458 if (pg->fpgsz > 1) {
459 KDASSERT(RBT_FIND(uvm_pmr_size, &pmr->size[mti],((void)0)
460 pg + 1) == NULL)((void)0);
461 }
462 KDASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg)((void)0);
463 }
464 for (i = pg; i < pg + pg->fpgsz; i++)
465 KASSERT(uvm_pmr_pg_to_memtype(i) == memtype)((uvm_pmr_pg_to_memtype(i) == memtype) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 465, "uvm_pmr_pg_to_memtype(i) == memtype"
))
;
466#endif
467
468 if (pg->fpgsz == 1)
469 TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq)do { (pg)->pageq.tqe_next = ((void *)0); (pg)->pageq.tqe_prev
= (&pmr->single[memtype])->tqh_last; *(&pmr->
single[memtype])->tqh_last = (pg); (&pmr->single[memtype
])->tqh_last = &(pg)->pageq.tqe_next; } while (0)
;
470 else
471 RBT_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1)uvm_pmr_size_RBT_INSERT(&pmr->size[memtype], pg + 1);
472}
473/* Insert in both trees. */
474struct vm_page *
475uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
476{
477 uvm_pmr_assertvalid(pmr)do {} while (0);
478 pg = uvm_pmr_insert_addr(pmr, pg, no_join);
479 uvm_pmr_insert_size(pmr, pg);
480 uvm_pmr_assertvalid(pmr)do {} while (0);
481 return pg;
482}
483
484/*
485 * Find the last page that is part of this segment.
486 * => pg: the range at which to start the search.
487 * => boundary: the page number boundary specification (0 = no boundary).
488 * => pmr: the pmemrange of the page.
489 *
490 * This function returns 1 before the next range, so if you want to have the
491 * next range, you need to run TAILQ_NEXT(result, pageq) after calling.
492 * The reason is that this way, the length of the segment is easily
493 * calculated using: atop(result) - atop(pg) + 1.
494 * Hence this function also never returns NULL.
495 */
496struct vm_page *
497uvm_pmr_findnextsegment(struct uvm_pmemrange *pmr,
498 struct vm_page *pg, paddr_t boundary)
499{
500 paddr_t first_boundary;
501 struct vm_page *next;
502 struct vm_page *prev;
503
504 KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0)
505 pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0);
506 if (boundary != 0) {
507 first_boundary =
508 PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary)(((((((pg)->phys_addr)) >> 12) + 1) + ((boundary) - 1
)) & ~((boundary) - 1))
;
509 } else
510 first_boundary = 0;
511
512 /*
513 * Increase next until it hits the first page of the next segment.
514 *
515 * While loop checks the following:
516 * - next != NULL we have not reached the end of pgl
517 * - boundary == 0 || next < first_boundary
518 * we do not cross a boundary
519 * - atop(prev) + 1 == atop(next)
520 * still in the same segment
521 * - low <= last
522 * - high > last still in the same memory range
523 * - memtype is equal allocator is unable to view different memtypes
524 * as part of the same segment
525 * - prev + 1 == next no array breakage occurs
526 */
527 prev = pg;
528 next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next);
529 while (next != NULL((void *)0) &&
530 (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) < first_boundary) &&
531 atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
532 pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
533 pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
534 uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) &&
535 prev + 1 == next) {
536 prev = next;
537 next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next);
538 }
539
540 /*
541 * End of this segment.
542 */
543 return prev;
544}
545
546/*
547 * Find the first page that is part of this segment.
548 * => pg: the range at which to start the search.
549 * => boundary: the page number boundary specification (0 = no boundary).
550 * => pmr: the pmemrange of the page.
551 *
552 * This function returns 1 after the previous range, so if you want to have the
553 * previous range, you need to run TAILQ_NEXT(result, pageq) after calling.
554 * The reason is that this way, the length of the segment is easily
555 * calculated using: atop(pg) - atop(result) + 1.
556 * Hence this function also never returns NULL.
557 */
558struct vm_page *
559uvm_pmr_findprevsegment(struct uvm_pmemrange *pmr,
560 struct vm_page *pg, paddr_t boundary)
561{
562 paddr_t first_boundary;
563 struct vm_page *next;
564 struct vm_page *prev;
565
566 KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) &&((void)0)
567 pmr->high > atop(VM_PAGE_TO_PHYS(pg)))((void)0);
568 if (boundary != 0) {
569 first_boundary =
570 PMR_ALIGN_DOWN(atop(VM_PAGE_TO_PHYS(pg)), boundary)((((((pg)->phys_addr)) >> 12)) & ~((boundary) - 1
))
;
571 } else
572 first_boundary = 0;
573
574 /*
575 * Increase next until it hits the first page of the previous segment.
576 *
577 * While loop checks the following:
578 * - next != NULL we have not reached the end of pgl
579 * - boundary == 0 || next >= first_boundary
580 * we do not cross a boundary
581 * - atop(prev) - 1 == atop(next)
582 * still in the same segment
583 * - low <= last
584 * - high > last still in the same memory range
585 * - memtype is equal allocator is unable to view different memtypes
586 * as part of the same segment
587 * - prev - 1 == next no array breakage occurs
588 */
589 prev = pg;
590 next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next);
591 while (next != NULL((void *)0) &&
592 (boundary == 0 || atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) >= first_boundary) &&
593 atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) - 1 == atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
594 pmr->low <= atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
595 pmr->high > atop(VM_PAGE_TO_PHYS(next))((((next)->phys_addr)) >> 12) &&
596 uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) &&
597 prev - 1 == next) {
598 prev = next;
599 next = TAILQ_NEXT(prev, pageq)((prev)->pageq.tqe_next);
600 }
601
602 /*
603 * Start of this segment.
604 */
605 return prev;
606}
607
608/*
609 * Remove the first segment of contiguous pages from pgl.
610 * A segment ends if it crosses boundary (unless boundary = 0) or
611 * if it would enter a different uvm_pmemrange.
612 *
613 * Work: the page range that the caller is currently working with.
614 * May be null.
615 *
616 * If is_desperate is non-zero, the smallest segment is erased. Otherwise,
617 * the first segment is erased (which, if called by uvm_pmr_getpages(),
618 * probably is the smallest or very close to it).
619 */
620psize_t
621uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary,
622 struct vm_page **work, int is_desperate)
623{
624 struct vm_page *start, *end, *iter, *iter_end, *inserted, *lowest;
625 psize_t count;
626 struct uvm_pmemrange *pmr, *pmr_iter;
627
628 KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 628, "!TAILQ_EMPTY(pgl)"
))
;
629
630 /*
631 * Initialize to first page.
632 * Unless desperate scan finds a better candidate, this is what'll be
633 * erased.
634 */
635 start = TAILQ_FIRST(pgl)((pgl)->tqh_first);
636 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12));
637 end = uvm_pmr_findnextsegment(pmr, start, boundary);
638
639 /*
640 * If we are desperate, we _really_ want to get rid of the smallest
641 * element (rather than a close match to the smallest element).
642 */
643 if (is_desperate) {
644 /* Linear search for smallest segment. */
645 pmr_iter = pmr;
646 for (iter = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next);
647 iter != NULL((void *)0) && start != end;
648 iter = TAILQ_NEXT(iter_end, pageq)((iter_end)->pageq.tqe_next)) {
649 /*
650 * Only update pmr if it doesn't match current
651 * iteration.
652 */
653 if (pmr->low > atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12) ||
654 pmr->high <= atop(VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12)) {
655 pmr_iter = uvm_pmemrange_find(atop(((((iter)->phys_addr)) >> 12)
656 VM_PAGE_TO_PHYS(iter))((((iter)->phys_addr)) >> 12));
657 }
658
659 iter_end = uvm_pmr_findnextsegment(pmr_iter, iter,
660 boundary);
661
662 /*
663 * Current iteration is smaller than best match so
664 * far; update.
665 */
666 if (VM_PAGE_TO_PHYS(iter_end)((iter_end)->phys_addr) - VM_PAGE_TO_PHYS(iter)((iter)->phys_addr) <
667 VM_PAGE_TO_PHYS(end)((end)->phys_addr) - VM_PAGE_TO_PHYS(start)((start)->phys_addr)) {
668 start = iter;
669 end = iter_end;
670 pmr = pmr_iter;
671 }
672 }
673 }
674
675 /*
676 * Calculate count and end of the list.
677 */
678 count = atop(VM_PAGE_TO_PHYS(end) - VM_PAGE_TO_PHYS(start))((((end)->phys_addr) - ((start)->phys_addr)) >> 12
)
+ 1;
679 lowest = start;
680 end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next);
681
682 /*
683 * Actually remove the range of pages.
684 *
685 * Sadly, this cannot be done using pointer iteration:
686 * vm_physseg is not guaranteed to be sorted on address, hence
687 * uvm_page_init() may not have initialized its array sorted by
688 * page number.
689 */
690 for (iter = start; iter != end; iter = iter_end) {
691 iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next);
692 TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)->
pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev
; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter
)->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)->
pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) =
((void *)-1); } while (0)
;
693 }
694
695 lowest->fpgsz = count;
696 inserted = uvm_pmr_insert(pmr, lowest, 0);
697
698 /*
699 * If the caller was working on a range and this function modified
700 * that range, update the pointer.
701 */
702 if (work != NULL((void *)0) && *work != NULL((void *)0) &&
703 atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) <= atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12) &&
704 atop(VM_PAGE_TO_PHYS(inserted))((((inserted)->phys_addr)) >> 12) + inserted->fpgsz >
705 atop(VM_PAGE_TO_PHYS(*work))((((*work)->phys_addr)) >> 12))
706 *work = inserted;
707 return count;
708}
709
710/*
711 * Remove the first segment of contiguous pages from a pgl
712 * with the list elements in reverse order of physaddr.
713 *
714 * A segment ends if it would enter a different uvm_pmemrange.
715 *
716 * Stores starting physical address of the segment in pstart.
717 */
718psize_t
719uvm_pmr_remove_1strange_reverse(struct pglist *pgl, paddr_t *pstart)
720{
721 struct vm_page *start, *end, *iter, *iter_end, *lowest;
722 psize_t count;
723 struct uvm_pmemrange *pmr;
724
725 KASSERT(!TAILQ_EMPTY(pgl))((!(((pgl)->tqh_first) == ((void *)0))) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 725, "!TAILQ_EMPTY(pgl)"
))
;
726
727 start = TAILQ_FIRST(pgl)((pgl)->tqh_first);
728 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))((((start)->phys_addr)) >> 12));
729 end = uvm_pmr_findprevsegment(pmr, start, 0);
730
731 KASSERT(end <= start)((end <= start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 731, "end <= start"))
;
732
733 /*
734 * Calculate count and end of the list.
735 */
736 count = atop(VM_PAGE_TO_PHYS(start) - VM_PAGE_TO_PHYS(end))((((start)->phys_addr) - ((end)->phys_addr)) >> 12
)
+ 1;
737 lowest = end;
738 end = TAILQ_NEXT(end, pageq)((end)->pageq.tqe_next);
739
740 /*
741 * Actually remove the range of pages.
742 *
743 * Sadly, this cannot be done using pointer iteration:
744 * vm_physseg is not guaranteed to be sorted on address, hence
745 * uvm_page_init() may not have initialized its array sorted by
746 * page number.
747 */
748 for (iter = start; iter != end; iter = iter_end) {
749 iter_end = TAILQ_NEXT(iter, pageq)((iter)->pageq.tqe_next);
750 TAILQ_REMOVE(pgl, iter, pageq)do { if (((iter)->pageq.tqe_next) != ((void *)0)) (iter)->
pageq.tqe_next->pageq.tqe_prev = (iter)->pageq.tqe_prev
; else (pgl)->tqh_last = (iter)->pageq.tqe_prev; *(iter
)->pageq.tqe_prev = (iter)->pageq.tqe_next; ((iter)->
pageq.tqe_prev) = ((void *)-1); ((iter)->pageq.tqe_next) =
((void *)-1); } while (0)
;
751 }
752
753 lowest->fpgsz = count;
754 (void) uvm_pmr_insert(pmr, lowest, 0);
755
756 *pstart = VM_PAGE_TO_PHYS(lowest)((lowest)->phys_addr);
757 return count;
758}
759
760/*
761 * Extract a number of pages from a segment of free pages.
762 * Called by uvm_pmr_getpages.
763 *
764 * Returns the segment that was created from pages left over at the tail
765 * of the remove set of pages, or NULL if no pages were left at the tail.
766 */
767struct vm_page *
768uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg,
769 paddr_t start, paddr_t end, struct pglist *result)
770{
771 struct vm_page *after, *pg_i;
772 psize_t before_sz, after_sz;
773#ifdef DEBUG
774 psize_t i;
775#endif
776
777 KDASSERT(end > start)((void)0);
778 KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)))((void)0);
779 KDASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz)((void)0);
780 KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start)((void)0);
781 KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz >= end)((void)0);
782
783 before_sz = start - atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12);
784 after_sz = atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12) + pg->fpgsz - end;
785 KDASSERT(before_sz + after_sz + (end - start) == pg->fpgsz)((void)0);
786 uvm_pmr_assertvalid(pmr)do {} while (0);
787
788 uvm_pmr_remove_size(pmr, pg);
789 if (before_sz == 0)
790 uvm_pmr_remove_addr(pmr, pg);
791 after = pg + before_sz + (end - start);
792
793 /* Add selected pages to result. */
794 for (pg_i = pg + before_sz; pg_i != after; pg_i++) {
795 KASSERT(pg_i->pg_flags & PQ_FREE)((pg_i->pg_flags & 0x00010000) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 795, "pg_i->pg_flags & PQ_FREE"
))
;
796 pg_i->fpgsz = 0;
797 TAILQ_INSERT_TAIL(result, pg_i, pageq)do { (pg_i)->pageq.tqe_next = ((void *)0); (pg_i)->pageq
.tqe_prev = (result)->tqh_last; *(result)->tqh_last = (
pg_i); (result)->tqh_last = &(pg_i)->pageq.tqe_next
; } while (0)
;
798 }
799
800 /* Before handling. */
801 if (before_sz > 0) {
802 pg->fpgsz = before_sz;
803 uvm_pmr_insert_size(pmr, pg);
804 }
805
806 /* After handling. */
807 if (after_sz > 0) {
808#ifdef DEBUG
809 for (i = 0; i < after_sz; i++) {
810 KASSERT(!uvm_pmr_isfree(after + i))((!uvm_pmr_isfree(after + i)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 810, "!uvm_pmr_isfree(after + i)"
))
;
811 }
812#endif
813 KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end)((void)0);
814 after->fpgsz = after_sz;
815 after = uvm_pmr_insert_addr(pmr, after, 1);
816 uvm_pmr_insert_size(pmr, after);
817 }
818
819 uvm_pmr_assertvalid(pmr)do {} while (0);
820 return (after_sz > 0 ? after : NULL((void *)0));
821}
822
823/*
824 * Indicate to the page daemon that a nowait call failed and it should
825 * recover at least some memory in the most restricted region (assumed
826 * to be dma_constraint).
827 */
828extern volatile int uvm_nowait_failed;
829
830/*
831 * Acquire a number of pages.
832 *
833 * count: the number of pages returned
834 * start: lowest page number
835 * end: highest page number +1
836 * (start = end = 0: no limitation)
837 * align: power-of-2 alignment constraint (align = 1: no alignment)
838 * boundary: power-of-2 boundary (boundary = 0: no boundary)
839 * maxseg: maximum number of segments to return
840 * flags: UVM_PLA_* flags
841 * result: returned pages storage (uses pageq)
842 */
843int
844uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align,
845 paddr_t boundary, int maxseg, int flags, struct pglist *result)
846{
847 struct uvm_pmemrange *pmr; /* Iterate memory ranges. */
848 struct vm_page *found, *f_next; /* Iterate chunks. */
849 psize_t fcount; /* Current found pages. */
850 int fnsegs; /* Current segment counter. */
851 int try, start_try;
852 psize_t search[3];
853 paddr_t fstart, fend; /* Pages to be taken from found. */
854 int memtype; /* Requested memtype. */
855 int memtype_init; /* Best memtype. */
856 int desperate; /* True if allocation failed. */
857#ifdef DIAGNOSTIC1
858 struct vm_page *diag_prev; /* Used during validation. */
859#endif /* DIAGNOSTIC */
860
861 /*
862 * Validate arguments.
863 */
864 KASSERT(count > 0)((count > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 864, "count > 0"))
;
1
Assuming 'count' is > 0
2
'?' condition is true
865 KASSERT(start == 0 || end == 0 || start < end)((start == 0 || end == 0 || start < end) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 865, "start == 0 || end == 0 || start < end"
))
;
3
Assuming 'start' is not equal to 0
4
Assuming 'end' is not equal to 0
5
Assuming 'start' is < 'end'
6
'?' condition is true
866 KASSERT(align >= 1)((align >= 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 866, "align >= 1"))
;
7
Assuming 'align' is >= 1
8
'?' condition is true
867 KASSERT(powerof2(align))((((((align)-1)&(align))==0)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 867, "powerof2(align)")
)
;
9
Assuming the condition is true
10
'?' condition is true
868 KASSERT(maxseg > 0)((maxseg > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 868, "maxseg > 0"))
;
11
Assuming 'maxseg' is > 0
12
'?' condition is true
869 KASSERT(boundary == 0 || powerof2(boundary))((boundary == 0 || ((((boundary)-1)&(boundary))==0)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 869, "boundary == 0 || powerof2(boundary)"))
;
13
Assuming 'boundary' is not equal to 0
14
Assuming the condition is true
15
'?' condition is true
870 KASSERT(boundary == 0 || maxseg * boundary >= count)((boundary == 0 || maxseg * boundary >= count) ? (void)0 :
__assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 870
, "boundary == 0 || maxseg * boundary >= count"))
;
16
Assuming the condition is true
17
'?' condition is true
871 KASSERT(TAILQ_EMPTY(result))(((((result)->tqh_first) == ((void *)0))) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 871, "TAILQ_EMPTY(result)"
))
;
18
Assuming field 'tqh_first' is equal to null
19
'?' condition is true
872 KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT))((!(flags & 0x0001) ^ !(flags & 0x0002)) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c", 872, "!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)"
))
;
20
Assuming the condition is false
21
Assuming the condition is true
22
'?' condition is true
873
874 /*
875 * TRYCONTIG is a noop if you only want a single segment.
876 * Remove it if that's the case: otherwise it'll deny the fast
877 * allocation.
878 */
879 if (maxseg == 1 || count == 1)
23
Assuming 'maxseg' is not equal to 1
24
Assuming 'count' is not equal to 1
25
Taking false branch
880 flags &= ~UVM_PLA_TRYCONTIG0x0008;
881
882 /*
883 * Configure search.
884 *
885 * search[0] is one segment, only used in UVM_PLA_TRYCONTIG case.
886 * search[1] is multiple segments, chosen to fulfill the search in
887 * approximately even-sized segments.
888 * This is a good trade-off between slightly reduced allocation speed
889 * and less fragmentation.
890 * search[2] is the worst case, in which all segments are evaluated.
891 * This provides the least fragmentation, but makes the search
892 * possibly longer (although in the case it is selected, that no
893 * longer matters most).
894 *
895 * The exception is when maxseg == 1: since we can only fulfill that
896 * with one segment of size pages, only a single search type has to
897 * be attempted.
898 */
899 if (maxseg
25.1
'maxseg' is not equal to 1
== 1 || count
25.2
'count' is not equal to 1
== 1) {
26
Taking false branch
900 start_try = 2;
901 search[2] = count;
902 } else if (maxseg >= count && (flags & UVM_PLA_TRYCONTIG0x0008) == 0) {
27
Assuming 'maxseg' is >= 'count'
28
Assuming the condition is true
29
Taking true branch
903 start_try = 2;
904 search[2] = 1;
905 } else {
906 start_try = 0;
907 search[0] = count;
908 search[1] = pow2divide(count, maxseg);
909 search[2] = 1;
910 if ((flags & UVM_PLA_TRYCONTIG0x0008) == 0)
911 start_try = 1;
912 if (search[1] >= search[0]) {
913 search[1] = search[0];
914 start_try = 1;
915 }
916 if (search[2] >= search[start_try]) {
917 start_try = 2;
918 }
919 }
920
921 /*
922 * Memory type: if zeroed memory is requested, traverse the zero set.
923 * Otherwise, traverse the dirty set.
924 *
925 * The memtype iterator is reinitialized to memtype_init on entrance
926 * of a pmemrange.
927 */
928 if (flags & UVM_PLA_ZERO0x0004)
30
Assuming the condition is false
31
Taking false branch
929 memtype_init = UVM_PMR_MEMTYPE_ZERO1;
930 else
931 memtype_init = UVM_PMR_MEMTYPE_DIRTY0;
932
933 /*
934 * Initially, we're not desperate.
935 *
936 * Note that if we return from a sleep, we are still desperate.
937 * Chances are that memory pressure is still high, so resetting
938 * seems over-optimistic to me.
939 */
940 desperate = 0;
941
942again:
943 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
944
945 /*
946 * check to see if we need to generate some free pages waking
947 * the pagedaemon.
948 */
949 if ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages
- bcstats.numbufpages)
) < uvmexp.freemin
||
32
Assuming the condition is false
33
'?' condition is false
34
Assuming the condition is false
950 ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages
- bcstats.numbufpages)
) < uvmexp.freetarg
&&
35
'?' condition is false
36
Assuming the condition is false
951 (uvmexp.inactive + BUFPAGES_INACT(((bcstats.numcleanpages - buflowpages) < 0) ? 0 : bcstats
.numcleanpages - buflowpages)
) < uvmexp.inactarg))
952 wakeup(&uvm.pagedaemon);
953
954 /*
955 * fail if any of these conditions is true:
956 * [1] there really are no free pages, or
957 * [2] only kernel "reserved" pages remain and
958 * the UVM_PLA_USERESERVE flag wasn't used.
959 * [3] only pagedaemon "reserved" pages remain and
960 * the requestor isn't the pagedaemon nor the syncer.
961 */
962 if ((uvmexp.free <= uvmexp.reserve_kernel) &&
37
Assuming field 'free' is > field 'reserve_kernel'
963 !(flags & UVM_PLA_USERESERVE0x0040)) {
964 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
965 return ENOMEM12;
966 }
967
968 if ((uvmexp.free <= (uvmexp.reserve_pagedaemon + count)) &&
38
Assuming the condition is false
969 (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
!= uvm.pagedaemon_proc) && (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
!= syncerproc)) {
970 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
971 if (flags & UVM_PLA_WAITOK0x0001) {
972 uvm_wait("uvm_pmr_getpages");
973 goto again;
974 }
975 return ENOMEM12;
976 }
977
978retry: /* Return point after sleeping. */
979 fcount = 0;
980 fnsegs = 0;
981
982retry_desperate:
983 /*
984 * If we just want any page(s), go for the really fast option.
985 */
986 if (count
38.1
'count' is <= 'maxseg'
44.1
'count' is <= 'maxseg'
<= maxseg && align
44.2
'align' is not equal to 1
== 1
&& boundary == 0 &&
39
Assuming 'align' is not equal to 1
987 (flags & UVM_PLA_TRYCONTIG0x0008) == 0) {
988 fcount += uvm_pmr_get1page(count - fcount, memtype_init,
989 result, start, end, 0);
990
991 /*
992 * If we found sufficient pages, go to the success exit code.
993 *
994 * Otherwise, go immediately to fail, since we collected
995 * all we could anyway.
996 */
997 if (fcount == count)
998 goto out;
999 else
1000 goto fail;
1001 }
1002
1003 /*
1004 * The heart of the contig case.
1005 *
1006 * The code actually looks like this:
1007 *
1008 * foreach (struct pmemrange) {
1009 * foreach (memtype) {
1010 * foreach(try) {
1011 * foreach (free range of memtype in pmemrange,
1012 * starting at search[try]) {
1013 * while (range has space left)
1014 * take from range
1015 * }
1016 * }
1017 * }
1018 *
1019 * if next pmemrange has higher usecount than current:
1020 * enter desperate case (which will drain the pmemranges
1021 * until empty prior to moving to the next one)
1022 * }
1023 *
1024 * When desperate is activated, try always starts at the highest
1025 * value. The memtype loop is using a goto ReScanMemtype.
1026 * The try loop is using a goto ReScan.
1027 * The 'range has space left' loop uses label DrainFound.
1028 *
1029 * Writing them all as loops would take up a lot of screen space in
1030 * the form of indentation and some parts are easier to express
1031 * using the labels.
1032 */
1033
1034 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr)
!= ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next))
{
40
Assuming 'pmr' is equal to null
41
Loop condition is false. Execution continues on line 1154
45
Loop condition is false. Execution continues on line 1154
1035 /* Empty range. */
1036 if (pmr->nsegs == 0)
1037 continue;
1038
1039 /* Outside requested range. */
1040 if (!PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end
) == 0 || (pmr->low) < (end)))
)
1041 continue;
1042
1043 memtype = memtype_init;
1044
1045rescan_memtype: /* Return point at memtype++. */
1046 try = start_try;
1047
1048rescan: /* Return point at try++. */
1049 for (found = uvm_pmr_nfindsz(pmr, search[try], memtype);
1050 found != NULL((void *)0);
1051 found = f_next) {
1052 f_next = uvm_pmr_nextsz(pmr, found, memtype);
1053
1054 fstart = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12);
1055 if (start != 0)
1056 fstart = MAX(start, fstart)(((start)>(fstart))?(start):(fstart));
1057drain_found:
1058 /*
1059 * Throw away the first segment if fnsegs == maxseg
1060 *
1061 * Note that f_next is still valid after this call,
1062 * since we only allocated from entries before f_next.
1063 * We don't revisit the entries we already extracted
1064 * from unless we entered the desperate case.
1065 */
1066 if (fnsegs == maxseg) {
1067 fnsegs--;
1068 fcount -=
1069 uvm_pmr_remove_1strange(result, boundary,
1070 &found, desperate);
1071 }
1072
1073 fstart = PMR_ALIGN(fstart, align)(((fstart) + ((align) - 1)) & ~((align) - 1));
1074 fend = atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) + found->fpgsz;
1075 if (end != 0)
1076 fend = MIN(end, fend)(((end)<(fend))?(end):(fend));
1077 if (boundary != 0) {
1078 fend =
1079 MIN(fend, PMR_ALIGN(fstart + 1, boundary))(((fend)<((((fstart + 1) + ((boundary) - 1)) & ~((boundary
) - 1))))?(fend):((((fstart + 1) + ((boundary) - 1)) & ~(
(boundary) - 1))))
;
1080 }
1081 if (fstart >= fend)
1082 continue;
1083 if (fend - fstart > count - fcount)
1084 fend = fstart + (count - fcount);
1085
1086 fcount += fend - fstart;
1087 fnsegs++;
1088 found = uvm_pmr_extract_range(pmr, found,
1089 fstart, fend, result);
1090
1091 if (fcount == count)
1092 goto out;
1093
1094 /*
1095 * If there's still space left in found, try to
1096 * fully drain it prior to continuing.
1097 */
1098 if (found != NULL((void *)0)) {
1099 fstart = fend;
1100 goto drain_found;
1101 }
1102 }
1103
1104 /* Try a smaller search now. */
1105 if (++try < nitems(search)(sizeof((search)) / sizeof((search)[0])))
1106 goto rescan;
1107
1108 /*
1109 * Exhaust all memory types prior to going to the next memory
1110 * segment.
1111 * This means that zero-vs-dirty are eaten prior to moving
1112 * to a pmemrange with a higher use-count.
1113 *
1114 * Code is basically a difficult way of writing:
1115 * memtype = memtype_init;
1116 * do {
1117 * ...;
1118 * memtype += 1;
1119 * memtype %= MEMTYPE_MAX;
1120 * } while (memtype != memtype_init);
1121 */
1122 memtype += 1;
1123 if (memtype == UVM_PMR_MEMTYPE_MAX2)
1124 memtype = 0;
1125 if (memtype != memtype_init)
1126 goto rescan_memtype;
1127
1128 /*
1129 * If not desperate, enter desperate case prior to eating all
1130 * the good stuff in the next range.
1131 */
1132 if (!desperate && TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next) != NULL((void *)0) &&
1133 TAILQ_NEXT(pmr, pmr_use)((pmr)->pmr_use.tqe_next)->use != pmr->use)
1134 break;
1135 }
1136
1137 /*
1138 * Not enough memory of the requested type available. Fall back to
1139 * less good memory that we'll clean up better later.
1140 *
1141 * This algorithm is not very smart though, it just starts scanning
1142 * a different typed range, but the nicer ranges of the previous
1143 * iteration may fall out. Hence there is a small chance of a false
1144 * negative.
1145 *
1146 * When desperate: scan all sizes starting at the smallest
1147 * (start_try = 1) and do not consider UVM_PLA_TRYCONTIG (which may
1148 * allow us to hit the fast path now).
1149 *
1150 * Also, because we will revisit entries we scanned before, we need
1151 * to reset the page queue, or we may end up releasing entries in
1152 * such a way as to invalidate f_next.
1153 */
1154 if (!desperate
41.1
'desperate' is 0
45.1
'desperate' is 1
) {
42
Taking true branch
46
Taking false branch
1155 desperate = 1;
1156 start_try = nitems(search)(sizeof((search)) / sizeof((search)[0])) - 1;
1157 flags &= ~UVM_PLA_TRYCONTIG0x0008;
1158
1159 while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0)))
43
Loop condition is false. Execution continues on line 1161
1160 uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0);
1161 fnsegs = 0;
1162 fcount = 0;
1163 goto retry_desperate;
44
Control jumps to line 986
1164 }
1165
1166fail:
1167 /* Allocation failed. */
1168 /* XXX: claim from memory reserve here */
1169
1170 while (!TAILQ_EMPTY(result)(((result)->tqh_first) == ((void *)0)))
47
Loop condition is false. Execution continues on line 1173
1171 uvm_pmr_remove_1strange(result, 0, NULL((void *)0), 0);
1172
1173 if (flags & UVM_PLA_WAITOK0x0001) {
48
Assuming the condition is true
49
Taking true branch
1174 if (uvm_wait_pla(ptoa(start)((paddr_t)(start) << 12), ptoa(end)((paddr_t)(end) << 12) - 1, ptoa(count)((paddr_t)(count) << 12),
50
Calling 'uvm_wait_pla'
1175 flags & UVM_PLA_FAILOK0x0010) == 0)
1176 goto retry;
1177 KASSERT(flags & UVM_PLA_FAILOK)((flags & 0x0010) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1177, "flags & UVM_PLA_FAILOK"))
;
1178 } else {
1179 if (!(flags & UVM_PLA_NOWAKE0x0020)) {
1180 uvm_nowait_failed = 1;
1181 wakeup(&uvm.pagedaemon);
1182 }
1183 }
1184 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1185
1186 return ENOMEM12;
1187
1188out:
1189 /* Allocation successful. */
1190 uvmexp.free -= fcount;
1191
1192 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1193
1194 /* Update statistics and zero pages if UVM_PLA_ZERO. */
1195#ifdef DIAGNOSTIC1
1196 fnsegs = 0;
1197 fcount = 0;
1198 diag_prev = NULL((void *)0);
1199#endif /* DIAGNOSTIC */
1200 TAILQ_FOREACH(found, result, pageq)for((found) = ((result)->tqh_first); (found) != ((void *)0
); (found) = ((found)->pageq.tqe_next))
{
1201 atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_PMAPMASK0x3f000000);
1202
1203 if (found->pg_flags & PG_ZERO0x00000100) {
1204 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1205 uvmexp.zeropages--;
1206 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8))
1207 wakeup(&uvmexp.zeropages);
1208 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1209 }
1210 if (flags & UVM_PLA_ZERO0x0004) {
1211 if (found->pg_flags & PG_ZERO0x00000100)
1212 uvmexp.pga_zerohit++;
1213 else {
1214 uvmexp.pga_zeromiss++;
1215 uvm_pagezero(found);
1216 }
1217 }
1218 atomic_clearbits_intx86_atomic_clearbits_u32(&found->pg_flags, PG_ZERO0x00000100|PQ_FREE0x00010000);
1219
1220 found->uobject = NULL((void *)0);
1221 found->uanon = NULL((void *)0);
1222 found->pg_version++;
1223
1224 /*
1225 * Validate that the page matches range criterium.
1226 */
1227 KDASSERT(start == 0 || atop(VM_PAGE_TO_PHYS(found)) >= start)((void)0);
1228 KDASSERT(end == 0 || atop(VM_PAGE_TO_PHYS(found)) < end)((void)0);
1229
1230#ifdef DIAGNOSTIC1
1231 /*
1232 * Update fcount (# found pages) and
1233 * fnsegs (# found segments) counters.
1234 */
1235 if (diag_prev == NULL((void *)0) ||
1236 /* new segment if it contains a hole */
1237 atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) + 1 !=
1238 atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) ||
1239 /* new segment if it crosses boundary */
1240 (atop(VM_PAGE_TO_PHYS(diag_prev))((((diag_prev)->phys_addr)) >> 12) & ~(boundary - 1)) !=
1241 (atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) & ~(boundary - 1)))
1242 fnsegs++;
1243 fcount++;
1244
1245 diag_prev = found;
1246#endif /* DIAGNOSTIC */
1247 }
1248
1249#ifdef DIAGNOSTIC1
1250 /*
1251 * Panic on algorithm failure.
1252 */
1253 if (fcount != count || fnsegs > maxseg) {
1254 panic("pmemrange allocation error: "
1255 "allocated %ld pages in %d segments, "
1256 "but request was %ld pages in %d segments",
1257 fcount, fnsegs, count, maxseg);
1258 }
1259#endif /* DIAGNOSTIC */
1260
1261 return 0;
1262}
1263
1264/*
1265 * Free a number of contig pages (invoked by uvm_page_init).
1266 */
1267void
1268uvm_pmr_freepages(struct vm_page *pg, psize_t count)
1269{
1270 struct uvm_pmemrange *pmr;
1271 psize_t i, pmr_count;
1272 struct vm_page *firstpg = pg;
1273
1274 for (i = 0; i < count; i++) {
1275 KASSERT(atop(VM_PAGE_TO_PHYS(&pg[i])) ==((((((&pg[i])->phys_addr)) >> 12) == ((((pg)->
phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i"
))
1276 atop(VM_PAGE_TO_PHYS(pg)) + i)((((((&pg[i])->phys_addr)) >> 12) == ((((pg)->
phys_addr)) >> 12) + i) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1276, "atop(VM_PAGE_TO_PHYS(&pg[i])) == atop(VM_PAGE_TO_PHYS(pg)) + i"
))
;
1277
1278 if (!((pg[i].pg_flags & PQ_FREE0x00010000) == 0 &&
1279 VALID_FLAGS(pg[i].pg_flags)(((pg[i].pg_flags) & ~(0x00010000|0x00000100|0x3f000000))
== 0x0)
)) {
1280 printf("Flags: 0x%x, will panic now.\n",
1281 pg[i].pg_flags);
1282 }
1283 KASSERT((pg[i].pg_flags & PQ_FREE) == 0 &&(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i].
pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))
? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)"
))
1284 VALID_FLAGS(pg[i].pg_flags))(((pg[i].pg_flags & 0x00010000) == 0 && (((pg[i].
pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))
? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1284, "(pg[i].pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg[i].pg_flags)"
))
;
1285 atomic_setbits_intx86_atomic_setbits_u32(&pg[i].pg_flags, PQ_FREE0x00010000);
1286 atomic_clearbits_intx86_atomic_clearbits_u32(&pg[i].pg_flags, PG_ZERO0x00000100);
1287 }
1288
1289 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1290
1291 for (i = count; i > 0; i -= pmr_count) {
1292 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12));
1293 KASSERT(pmr != NULL)((pmr != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1293, "pmr != NULL"))
;
1294
1295 pmr_count = MIN(i, pmr->high - atop(VM_PAGE_TO_PHYS(pg)))(((i)<(pmr->high - ((((pg)->phys_addr)) >> 12)
))?(i):(pmr->high - ((((pg)->phys_addr)) >> 12)))
;
1296 pg->fpgsz = pmr_count;
1297 uvm_pmr_insert(pmr, pg, 0);
1298
1299 uvmexp.free += pmr_count;
1300 pg += pmr_count;
1301 }
1302 wakeup(&uvmexp.free);
1303 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8))
1304 wakeup(&uvmexp.zeropages);
1305
1306 uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg)((firstpg)->phys_addr), ptoa(count)((paddr_t)(count) << 12));
1307
1308 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1309}
1310
1311/*
1312 * Free all pages in the queue.
1313 */
1314void
1315uvm_pmr_freepageq(struct pglist *pgl)
1316{
1317 struct vm_page *pg;
1318 paddr_t pstart;
1319 psize_t plen;
1320
1321 TAILQ_FOREACH(pg, pgl, pageq)for((pg) = ((pgl)->tqh_first); (pg) != ((void *)0); (pg) =
((pg)->pageq.tqe_next))
{
1322 if (!((pg->pg_flags & PQ_FREE0x00010000) == 0 &&
1323 VALID_FLAGS(pg->pg_flags)(((pg->pg_flags) & ~(0x00010000|0x00000100|0x3f000000)
) == 0x0)
)) {
1324 printf("Flags: 0x%x, will panic now.\n",
1325 pg->pg_flags);
1326 }
1327 KASSERT((pg->pg_flags & PQ_FREE) == 0 &&(((pg->pg_flags & 0x00010000) == 0 && (((pg->
pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))
? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)"
))
1328 VALID_FLAGS(pg->pg_flags))(((pg->pg_flags & 0x00010000) == 0 && (((pg->
pg_flags) & ~(0x00010000|0x00000100|0x3f000000)) == 0x0))
? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1328, "(pg->pg_flags & PQ_FREE) == 0 && VALID_FLAGS(pg->pg_flags)"
))
;
1329 atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PQ_FREE0x00010000);
1330 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_ZERO0x00000100);
1331 }
1332
1333 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1334 while (!TAILQ_EMPTY(pgl)(((pgl)->tqh_first) == ((void *)0))) {
1335 pg = TAILQ_FIRST(pgl)((pgl)->tqh_first);
1336 if (pg == TAILQ_NEXT(pg, pageq)((pg)->pageq.tqe_next) + 1) {
1337 /*
1338 * If pg is one behind the position of the
1339 * next page in the list in the page array,
1340 * try going backwards instead of forward.
1341 */
1342 plen = uvm_pmr_remove_1strange_reverse(pgl, &pstart);
1343 } else {
1344 pstart = VM_PAGE_TO_PHYS(TAILQ_FIRST(pgl))((((pgl)->tqh_first))->phys_addr);
1345 plen = uvm_pmr_remove_1strange(pgl, 0, NULL((void *)0), 0);
1346 }
1347 uvmexp.free += plen;
1348
1349 uvm_wakeup_pla(pstart, ptoa(plen)((paddr_t)(plen) << 12));
1350 }
1351 wakeup(&uvmexp.free);
1352 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET(uvmexp.free / 8))
1353 wakeup(&uvmexp.zeropages);
1354 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1355
1356 return;
1357}
1358
1359/*
1360 * Store a pmemrange in the list.
1361 *
1362 * The list is sorted by use.
1363 */
1364struct uvm_pmemrange *
1365uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq,
1366 struct uvm_pmemrange *pmr)
1367{
1368 struct uvm_pmemrange *iter;
1369 int cmp = 1;
1370
1371 TAILQ_FOREACH(iter, useq, pmr_use)for((iter) = ((useq)->tqh_first); (iter) != ((void *)0); (
iter) = ((iter)->pmr_use.tqe_next))
{
1372 cmp = uvm_pmemrange_use_cmp(pmr, iter);
1373 if (cmp == 0)
1374 return iter;
1375 if (cmp == -1)
1376 break;
1377 }
1378
1379 if (iter == NULL((void *)0))
1380 TAILQ_INSERT_TAIL(useq, pmr, pmr_use)do { (pmr)->pmr_use.tqe_next = ((void *)0); (pmr)->pmr_use
.tqe_prev = (useq)->tqh_last; *(useq)->tqh_last = (pmr)
; (useq)->tqh_last = &(pmr)->pmr_use.tqe_next; } while
(0)
;
1381 else
1382 TAILQ_INSERT_BEFORE(iter, pmr, pmr_use)do { (pmr)->pmr_use.tqe_prev = (iter)->pmr_use.tqe_prev
; (pmr)->pmr_use.tqe_next = (iter); *(iter)->pmr_use.tqe_prev
= (pmr); (iter)->pmr_use.tqe_prev = &(pmr)->pmr_use
.tqe_next; } while (0)
;
1383 return NULL((void *)0);
1384}
1385
1386#ifdef DEBUG
1387/*
1388 * Validation of the whole pmemrange.
1389 * Called with fpageq locked.
1390 */
1391void
1392uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)do {} while (0)
1393{
1394 struct vm_page *prev, *next, *i, *xref;
1395 int lcv, mti;
1396
1397 /* Empty range */
1398 if (pmr->nsegs == 0)
1399 return;
1400
1401 /* Validate address tree. */
1402 RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != (
(void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i)))
{
1403 /* Validate the range. */
1404 KASSERT(i->fpgsz > 0)((i->fpgsz > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1404, "i->fpgsz > 0"))
;
1405 KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low)((((((i)->phys_addr)) >> 12) >= pmr->low) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1405, "atop(VM_PAGE_TO_PHYS(i)) >= pmr->low"))
;
1406 KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr
->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high"
))
1407 <= pmr->high)((((((i)->phys_addr)) >> 12) + i->fpgsz <= pmr
->high) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1407, "atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz <= pmr->high"
))
;
1408
1409 /* Validate each page in this range. */
1410 for (lcv = 0; lcv < i->fpgsz; lcv++) {
1411 /*
1412 * Only the first page has a size specification.
1413 * Rest is size 0.
1414 */
1415 KASSERT(lcv == 0 || i[lcv].fpgsz == 0)((lcv == 0 || i[lcv].fpgsz == 0) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1415, "lcv == 0 || i[lcv].fpgsz == 0"
))
;
1416 /*
1417 * Flag check.
1418 */
1419 KASSERT(VALID_FLAGS(i[lcv].pg_flags) &&(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000
)) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE"
))
1420 (i[lcv].pg_flags & PQ_FREE) == PQ_FREE)(((((i[lcv].pg_flags) & ~(0x00010000|0x00000100|0x3f000000
)) == 0x0) && (i[lcv].pg_flags & 0x00010000) == 0x00010000
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1420, "VALID_FLAGS(i[lcv].pg_flags) && (i[lcv].pg_flags & PQ_FREE) == PQ_FREE"
))
;
1421 /*
1422 * Free pages are:
1423 * - not wired
1424 * - have no vm_anon
1425 * - have no uvm_object
1426 */
1427 KASSERT(i[lcv].wire_count == 0)((i[lcv].wire_count == 0) ? (void)0 : __assert("diagnostic ",
"/usr/src/sys/uvm/uvm_pmemrange.c", 1427, "i[lcv].wire_count == 0"
))
;
1428 KASSERT(i[lcv].uanon == (void*)0xdeadbeef ||((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void
*)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL"
))
1429 i[lcv].uanon == NULL)((i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == ((void
*)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1429, "i[lcv].uanon == (void*)0xdeadbeef || i[lcv].uanon == NULL"
))
;
1430 KASSERT(i[lcv].uobject == (void*)0xdeadbeef ||((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == ((
void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL"
))
1431 i[lcv].uobject == NULL)((i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == ((
void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1431, "i[lcv].uobject == (void*)0xdeadbeef || i[lcv].uobject == NULL"
))
;
1432 /*
1433 * Pages in a single range always have the same
1434 * memtype.
1435 */
1436 KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&
i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])"
))
1437 uvm_pmr_pg_to_memtype(&i[lcv]))((uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&
i[lcv])) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1437, "uvm_pmr_pg_to_memtype(&i[0]) == uvm_pmr_pg_to_memtype(&i[lcv])"
))
;
1438 }
1439
1440 /* Check that it shouldn't be joined with its predecessor. */
1441 prev = RBT_PREV(uvm_pmr_addr, i)uvm_pmr_addr_RBT_PREV(i);
1442 if (prev != NULL((void *)0)) {
1443 KASSERT(uvm_pmr_pg_to_memtype(i) !=((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || (
(((i)->phys_addr)) >> 12) > ((((prev)->phys_addr
)) >> 12) + prev->fpgsz || prev + prev->fpgsz != i
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i"
))
1444 uvm_pmr_pg_to_memtype(prev) ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || (
(((i)->phys_addr)) >> 12) > ((((prev)->phys_addr
)) >> 12) + prev->fpgsz || prev + prev->fpgsz != i
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i"
))
1445 atop(VM_PAGE_TO_PHYS(i)) >((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || (
(((i)->phys_addr)) >> 12) > ((((prev)->phys_addr
)) >> 12) + prev->fpgsz || prev + prev->fpgsz != i
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i"
))
1446 atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz ||((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || (
(((i)->phys_addr)) >> 12) > ((((prev)->phys_addr
)) >> 12) + prev->fpgsz || prev + prev->fpgsz != i
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i"
))
1447 prev + prev->fpgsz != i)((uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || (
(((i)->phys_addr)) >> 12) > ((((prev)->phys_addr
)) >> 12) + prev->fpgsz || prev + prev->fpgsz != i
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1447, "uvm_pmr_pg_to_memtype(i) != uvm_pmr_pg_to_memtype(prev) || atop(VM_PAGE_TO_PHYS(i)) > atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || prev + prev->fpgsz != i"
))
;
1448 }
1449
1450 /* Assert i is in the size tree as well. */
1451 if (i->fpgsz == 1) {
1452 TAILQ_FOREACH(xref,for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)])
->tqh_first); (xref) != ((void *)0); (xref) = ((xref)->
pageq.tqe_next))
1453 &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq)for((xref) = ((&pmr->single[uvm_pmr_pg_to_memtype(i)])
->tqh_first); (xref) != ((void *)0); (xref) = ((xref)->
pageq.tqe_next))
{
1454 if (xref == i)
1455 break;
1456 }
1457 KASSERT(xref == i)((xref == i) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1457, "xref == i"))
;
1458 } else {
1459 KASSERT(RBT_FIND(uvm_pmr_size,((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype
(i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1"
))
1460 &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype
(i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1"
))
1461 i + 1)((uvm_pmr_size_RBT_FIND(&pmr->size[uvm_pmr_pg_to_memtype
(i)], i + 1) == i + 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1461, "RBT_FIND(uvm_pmr_size, &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == i + 1"
))
;
1462 }
1463 }
1464
1465 /* Validate size tree. */
1466 for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX2; mti++) {
1467 for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL((void *)0); i = next) {
1468 next = uvm_pmr_nextsz(pmr, i, mti);
1469 if (next != NULL((void *)0)) {
1470 KASSERT(i->fpgsz <=((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz"
))
1471 next->fpgsz)((i->fpgsz <= next->fpgsz) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1471, "i->fpgsz <= next->fpgsz"
))
;
1472 }
1473
1474 /* Assert i is in the addr tree as well. */
1475 KASSERT(RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i)((uvm_pmr_addr_RBT_FIND(&pmr->addr, i) == i) ? (void)0
: __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1475, "RBT_FIND(uvm_pmr_addr, &pmr->addr, i) == i"))
;
1476
1477 /* Assert i is of the correct memory type. */
1478 KASSERT(uvm_pmr_pg_to_memtype(i) == mti)((uvm_pmr_pg_to_memtype(i) == mti) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1478, "uvm_pmr_pg_to_memtype(i) == mti"
))
;
1479 }
1480 }
1481
1482 /* Validate nsegs statistic. */
1483 lcv = 0;
1484 RBT_FOREACH(i, uvm_pmr_addr, &pmr->addr)for ((i) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (i) != (
(void *)0); (i) = uvm_pmr_addr_RBT_NEXT((i)))
1485 lcv++;
1486 KASSERT(pmr->nsegs == lcv)((pmr->nsegs == lcv) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1486, "pmr->nsegs == lcv"))
;
1487}
1488#endif /* DEBUG */
1489
1490/*
1491 * Split pmr at split point pageno.
1492 * Called with fpageq unlocked.
1493 *
1494 * Split is only applied if a pmemrange spans pageno.
1495 */
1496void
1497uvm_pmr_split(paddr_t pageno)
1498{
1499 struct uvm_pmemrange *pmr, *drain;
1500 struct vm_page *rebuild, *prev, *next;
1501 psize_t prev_sz;
1502
1503 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1504 pmr = uvm_pmemrange_find(pageno);
1505 if (pmr == NULL((void *)0) || !(pmr->low < pageno)) {
1506 /* No split required. */
1507 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1508 return;
1509 }
1510
1511 KASSERT(pmr->low < pageno)((pmr->low < pageno) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1511, "pmr->low < pageno"
))
;
1512 KASSERT(pmr->high > pageno)((pmr->high > pageno) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_pmemrange.c", 1512, "pmr->high > pageno"
))
;
1513
1514 /*
1515 * uvm_pmr_allocpmr() calls into malloc() which in turn calls into
1516 * uvm_kmemalloc which calls into pmemrange, making the locking
1517 * a bit hard, so we just race!
1518 */
1519 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1520 drain = uvm_pmr_allocpmr();
1521 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1522 pmr = uvm_pmemrange_find(pageno);
1523 if (pmr == NULL((void *)0) || !(pmr->low < pageno)) {
1524 /*
1525 * We lost the race since someone else ran this or a related
1526 * function, however this should be triggered very rarely so
1527 * we just leak the pmr.
1528 */
1529 printf("uvm_pmr_split: lost one pmr\n");
1530 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1531 return;
1532 }
1533
1534 drain->low = pageno;
1535 drain->high = pmr->high;
1536 drain->use = pmr->use;
1537
1538 uvm_pmr_assertvalid(pmr)do {} while (0);
1539 uvm_pmr_assertvalid(drain)do {} while (0);
1540 KASSERT(drain->nsegs == 0)((drain->nsegs == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1540, "drain->nsegs == 0"))
;
1541
1542 RBT_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr)for ((rebuild) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (
rebuild) != ((void *)0); (rebuild) = uvm_pmr_addr_RBT_NEXT((rebuild
)))
{
1543 if (atop(VM_PAGE_TO_PHYS(rebuild))((((rebuild)->phys_addr)) >> 12) >= pageno)
1544 break;
1545 }
1546 if (rebuild == NULL((void *)0))
1547 prev = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr);
1548 else
1549 prev = RBT_PREV(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_PREV(rebuild);
1550 KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno)((prev == ((void *)0) || ((((prev)->phys_addr)) >> 12
) < pageno) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1550, "prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno"
))
;
1551
1552 /*
1553 * Handle free chunk that spans the split point.
1554 */
1555 if (prev != NULL((void *)0) &&
1556 atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev->fpgsz > pageno) {
1557 psize_t before, after;
1558
1559 KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno)((((((prev)->phys_addr)) >> 12) < pageno) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1559, "atop(VM_PAGE_TO_PHYS(prev)) < pageno"))
;
1560
1561 uvm_pmr_remove(pmr, prev);
1562 prev_sz = prev->fpgsz;
1563 before = pageno - atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12);
1564 after = atop(VM_PAGE_TO_PHYS(prev))((((prev)->phys_addr)) >> 12) + prev_sz - pageno;
1565
1566 KASSERT(before > 0)((before > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1566, "before > 0"))
;
1567 KASSERT(after > 0)((after > 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1567, "after > 0"))
;
1568
1569 prev->fpgsz = before;
1570 uvm_pmr_insert(pmr, prev, 1);
1571 (prev + before)->fpgsz = after;
1572 uvm_pmr_insert(drain, prev + before, 1);
1573 }
1574
1575 /* Move free chunks that no longer fall in the range. */
1576 for (; rebuild != NULL((void *)0); rebuild = next) {
1577 next = RBT_NEXT(uvm_pmr_addr, rebuild)uvm_pmr_addr_RBT_NEXT(rebuild);
1578
1579 uvm_pmr_remove(pmr, rebuild);
1580 uvm_pmr_insert(drain, rebuild, 1);
1581 }
1582
1583 pmr->high = pageno;
1584 uvm_pmr_assertvalid(pmr)do {} while (0);
1585 uvm_pmr_assertvalid(drain)do {} while (0);
1586
1587 RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, drain
)
;
1588 uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain);
1589 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1590}
1591
1592/*
1593 * Increase the usage counter for the given range of memory.
1594 *
1595 * The more usage counters a given range of memory has, the more will be
1596 * attempted not to allocate from it.
1597 *
1598 * Addresses here are in paddr_t, not page-numbers.
1599 * The lowest and highest allowed address are specified.
1600 */
1601void
1602uvm_pmr_use_inc(paddr_t low, paddr_t high)
1603{
1604 struct uvm_pmemrange *pmr;
1605 paddr_t sz;
1606
1607 /* pmr uses page numbers, translate low and high. */
1608 high++;
1609 high = atop(trunc_page(high))((((high) & ~((1 << 12) - 1))) >> 12);
1610 low = atop(round_page(low))(((((low) + ((1 << 12) - 1)) & ~((1 << 12) - 1
))) >> 12)
;
1611 uvm_pmr_split(low);
1612 uvm_pmr_split(high);
1613
1614 sz = 0;
1615 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
1616 /* Increase use count on segments in range. */
1617 RBT_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr)for ((pmr) = uvm_pmemrange_addr_RBT_MIN((&uvm.pmr_control
.addr)); (pmr) != ((void *)0); (pmr) = uvm_pmemrange_addr_RBT_NEXT
((pmr)))
{
1618 if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)(((low) == 0 || (pmr->low) >= (low)) && ((high)
== 0 || (pmr->high) <= (high)))
) {
1619 TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use)do { if (((pmr)->pmr_use.tqe_next) != ((void *)0)) (pmr)->
pmr_use.tqe_next->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_prev
; else (&uvm.pmr_control.use)->tqh_last = (pmr)->pmr_use
.tqe_prev; *(pmr)->pmr_use.tqe_prev = (pmr)->pmr_use.tqe_next
; ((pmr)->pmr_use.tqe_prev) = ((void *)-1); ((pmr)->pmr_use
.tqe_next) = ((void *)-1); } while (0)
;
1620 pmr->use++;
1621 sz += pmr->high - pmr->low;
1622 uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr);
1623 }
1624 uvm_pmr_assertvalid(pmr)do {} while (0);
1625 }
1626 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
1627
1628 KASSERT(sz >= high - low)((sz >= high - low) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1628, "sz >= high - low"))
;
1629}
1630
1631/*
1632 * Allocate a pmemrange.
1633 *
1634 * If called from uvm_page_init, the uvm_pageboot_alloc is used.
1635 * If called after uvm_init, malloc is used.
1636 * (And if called in between, you're dead.)
1637 */
1638struct uvm_pmemrange *
1639uvm_pmr_allocpmr(void)
1640{
1641 struct uvm_pmemrange *nw;
1642 int i;
1643
1644 /* We're only ever hitting the !uvm.page_init_done case for now. */
1645 if (!uvm.page_init_done) {
1646 nw = (struct uvm_pmemrange *)
1647 uvm_pageboot_alloc(sizeof(struct uvm_pmemrange));
1648 } else {
1649 nw = malloc(sizeof(struct uvm_pmemrange),
1650 M_VMMAP30, M_NOWAIT0x0002);
1651 }
1652 KASSERT(nw != NULL)((nw != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1652, "nw != NULL"))
;
1653 memset(nw, 0, sizeof(struct uvm_pmemrange))__builtin_memset((nw), (0), (sizeof(struct uvm_pmemrange)));
1654 RBT_INIT(uvm_pmr_addr, &nw->addr)uvm_pmr_addr_RBT_INIT(&nw->addr);
1655 for (i = 0; i < UVM_PMR_MEMTYPE_MAX2; i++) {
1656 RBT_INIT(uvm_pmr_size, &nw->size[i])uvm_pmr_size_RBT_INIT(&nw->size[i]);
1657 TAILQ_INIT(&nw->single[i])do { (&nw->single[i])->tqh_first = ((void *)0); (&
nw->single[i])->tqh_last = &(&nw->single[i])
->tqh_first; } while (0)
;
1658 }
1659 return nw;
1660}
1661
1662/*
1663 * Initialization of pmr.
1664 * Called by uvm_page_init.
1665 *
1666 * Sets up pmemranges.
1667 */
1668void
1669uvm_pmr_init(void)
1670{
1671 struct uvm_pmemrange *new_pmr;
1672 int i;
1673
1674 TAILQ_INIT(&uvm.pmr_control.use)do { (&uvm.pmr_control.use)->tqh_first = ((void *)0); (
&uvm.pmr_control.use)->tqh_last = &(&uvm.pmr_control
.use)->tqh_first; } while (0)
;
1675 RBT_INIT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_INIT(&uvm.pmr_control.addr);
1676 TAILQ_INIT(&uvm.pmr_control.allocs)do { (&uvm.pmr_control.allocs)->tqh_first = ((void *)0
); (&uvm.pmr_control.allocs)->tqh_last = &(&uvm
.pmr_control.allocs)->tqh_first; } while (0)
;
1677
1678 /* By default, one range for the entire address space. */
1679 new_pmr = uvm_pmr_allocpmr();
1680 new_pmr->low = 0;
1681 new_pmr->high = atop((paddr_t)-1)(((paddr_t)-1) >> 12) + 1;
1682
1683 RBT_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr)uvm_pmemrange_addr_RBT_INSERT(&uvm.pmr_control.addr, new_pmr
)
;
1684 uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr);
1685
1686 for (i = 0; uvm_md_constraints[i] != NULL((void *)0); i++) {
1687 uvm_pmr_use_inc(uvm_md_constraints[i]->ucr_low,
1688 uvm_md_constraints[i]->ucr_high);
1689 }
1690}
1691
1692/*
1693 * Find the pmemrange that contains the given page number.
1694 *
1695 * (Manually traverses the binary tree, because that is cheaper on stack
1696 * usage.)
1697 */
1698struct uvm_pmemrange *
1699uvm_pmemrange_find(paddr_t pageno)
1700{
1701 struct uvm_pmemrange *pmr;
1702
1703 pmr = RBT_ROOT(uvm_pmemrange_addr, &uvm.pmr_control.addr)uvm_pmemrange_addr_RBT_ROOT(&uvm.pmr_control.addr);
1704 while (pmr != NULL((void *)0)) {
1705 if (pmr->low > pageno)
1706 pmr = RBT_LEFT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_LEFT(pmr);
1707 else if (pmr->high <= pageno)
1708 pmr = RBT_RIGHT(uvm_pmemrange_addr, pmr)uvm_pmemrange_addr_RBT_RIGHT(pmr);
1709 else
1710 break;
1711 }
1712
1713 return pmr;
1714}
1715
1716#if defined(DDB1) || defined(DEBUG)
1717/*
1718 * Return true if the given page is in any of the free lists.
1719 * Used by uvm_page_printit.
1720 * This function is safe, even if the page is not on the freeq.
1721 * Note: does not apply locking, only called from ddb.
1722 */
1723int
1724uvm_pmr_isfree(struct vm_page *pg)
1725{
1726 struct vm_page *r;
1727 struct uvm_pmemrange *pmr;
1728
1729 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12));
1730 if (pmr == NULL((void *)0))
1731 return 0;
1732 r = RBT_NFIND(uvm_pmr_addr, &pmr->addr, pg)uvm_pmr_addr_RBT_NFIND(&pmr->addr, pg);
1733 if (r == NULL((void *)0))
1734 r = RBT_MAX(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_MAX(&pmr->addr);
1735 else if (r != pg)
1736 r = RBT_PREV(uvm_pmr_addr, r)uvm_pmr_addr_RBT_PREV(r);
1737 if (r == NULL((void *)0))
1738 return 0; /* Empty tree. */
1739
1740 KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)))((void)0);
1741 return atop(VM_PAGE_TO_PHYS(r))((((r)->phys_addr)) >> 12) + r->fpgsz >
1742 atop(VM_PAGE_TO_PHYS(pg))((((pg)->phys_addr)) >> 12);
1743}
1744#endif /* DEBUG */
1745
1746/*
1747 * Given a root of a tree, find a range which intersects start, end and
1748 * is of the same memtype.
1749 *
1750 * Page must be in the address tree.
1751 */
1752struct vm_page*
1753uvm_pmr_rootupdate(struct uvm_pmemrange *pmr, struct vm_page *init_root,
1754 paddr_t start, paddr_t end, int memtype)
1755{
1756 int direction;
1757 struct vm_page *root;
1758 struct vm_page *high, *high_next;
1759 struct vm_page *low, *low_next;
1760
1761 KDASSERT(pmr != NULL && init_root != NULL)((void)0);
1762 root = init_root;
1763
1764 /* Which direction to use for searching. */
1765 if (start != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) + root->fpgsz <= start)
1766 direction = 1;
1767 else if (end != 0 && atop(VM_PAGE_TO_PHYS(root))((((root)->phys_addr)) >> 12) >= end)
1768 direction = -1;
1769 else /* nothing to do */
1770 return root;
1771
1772 /* First, update root to fall within the chosen range. */
1773 while (root && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((root)->phys_addr)) >>
12) + root->fpgsz)) && ((end) == 0 || (((((root)->
phys_addr)) >> 12)) < (end)))
1774 atop(VM_PAGE_TO_PHYS(root)),(((start) == 0 || (start) < (((((root)->phys_addr)) >>
12) + root->fpgsz)) && ((end) == 0 || (((((root)->
phys_addr)) >> 12)) < (end)))
1775 atop(VM_PAGE_TO_PHYS(root)) + root->fpgsz,(((start) == 0 || (start) < (((((root)->phys_addr)) >>
12) + root->fpgsz)) && ((end) == 0 || (((((root)->
phys_addr)) >> 12)) < (end)))
1776 start, end)(((start) == 0 || (start) < (((((root)->phys_addr)) >>
12) + root->fpgsz)) && ((end) == 0 || (((((root)->
phys_addr)) >> 12)) < (end)))
) {
1777 if (direction == 1)
1778 root = RBT_RIGHT(uvm_objtree, root)uvm_objtree_RBT_RIGHT(root);
1779 else
1780 root = RBT_LEFT(uvm_objtree, root)uvm_objtree_RBT_LEFT(root);
1781 }
1782 if (root == NULL((void *)0) || uvm_pmr_pg_to_memtype(root) == memtype)
1783 return root;
1784
1785 /*
1786 * Root is valid, but of the wrong memtype.
1787 *
1788 * Try to find a range that has the given memtype in the subtree
1789 * (memtype mismatches are costly, either because the conversion
1790 * is expensive, or a later allocation will need to do the opposite
1791 * conversion, which will be expensive).
1792 *
1793 *
1794 * First, simply increase address until we hit something we can use.
1795 * Cache the upper page, so we can page-walk later.
1796 */
1797 high = root;
1798 high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high);
1799 while (high_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((high_next)->phys_addr)
) >> 12) + high_next->fpgsz)) && ((end) == 0
|| (((((high_next)->phys_addr)) >> 12)) < (end))
)
1800 atop(VM_PAGE_TO_PHYS(high_next)),(((start) == 0 || (start) < (((((high_next)->phys_addr)
) >> 12) + high_next->fpgsz)) && ((end) == 0
|| (((((high_next)->phys_addr)) >> 12)) < (end))
)
1801 atop(VM_PAGE_TO_PHYS(high_next)) + high_next->fpgsz,(((start) == 0 || (start) < (((((high_next)->phys_addr)
) >> 12) + high_next->fpgsz)) && ((end) == 0
|| (((((high_next)->phys_addr)) >> 12)) < (end))
)
1802 start, end)(((start) == 0 || (start) < (((((high_next)->phys_addr)
) >> 12) + high_next->fpgsz)) && ((end) == 0
|| (((((high_next)->phys_addr)) >> 12)) < (end))
)
) {
1803 high = high_next;
1804 if (uvm_pmr_pg_to_memtype(high) == memtype)
1805 return high;
1806 high_next = RBT_RIGHT(uvm_objtree, high)uvm_objtree_RBT_RIGHT(high);
1807 }
1808
1809 /*
1810 * Second, decrease the address until we hit something we can use.
1811 * Cache the lower page, so we can page-walk later.
1812 */
1813 low = root;
1814 low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low);
1815 while (low_next != NULL((void *)0) && PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((low_next)->phys_addr))
>> 12) + low_next->fpgsz)) && ((end) == 0 ||
(((((low_next)->phys_addr)) >> 12)) < (end)))
1816 atop(VM_PAGE_TO_PHYS(low_next)),(((start) == 0 || (start) < (((((low_next)->phys_addr))
>> 12) + low_next->fpgsz)) && ((end) == 0 ||
(((((low_next)->phys_addr)) >> 12)) < (end)))
1817 atop(VM_PAGE_TO_PHYS(low_next)) + low_next->fpgsz,(((start) == 0 || (start) < (((((low_next)->phys_addr))
>> 12) + low_next->fpgsz)) && ((end) == 0 ||
(((((low_next)->phys_addr)) >> 12)) < (end)))
1818 start, end)(((start) == 0 || (start) < (((((low_next)->phys_addr))
>> 12) + low_next->fpgsz)) && ((end) == 0 ||
(((((low_next)->phys_addr)) >> 12)) < (end)))
) {
1819 low = low_next;
1820 if (uvm_pmr_pg_to_memtype(low) == memtype)
1821 return low;
1822 low_next = RBT_LEFT(uvm_objtree, low)uvm_objtree_RBT_LEFT(low);
1823 }
1824
1825 if (low == high)
1826 return NULL((void *)0);
1827
1828 /* No hits. Walk the address tree until we find something usable. */
1829 for (low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low);
1830 low != high;
1831 low = RBT_NEXT(uvm_pmr_addr, low)uvm_pmr_addr_RBT_NEXT(low)) {
1832 KDASSERT(PMR_IS_SUBRANGE_OF(atop(VM_PAGE_TO_PHYS(low)),((void)0)
1833 atop(VM_PAGE_TO_PHYS(low)) + low->fpgsz,((void)0)
1834 start, end))((void)0);
1835 if (uvm_pmr_pg_to_memtype(low) == memtype)
1836 return low;
1837 }
1838
1839 /* Nothing found. */
1840 return NULL((void *)0);
1841}
1842
1843/*
1844 * Allocate any page, the fastest way. Page number constraints only.
1845 */
1846psize_t
1847uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result,
1848 paddr_t start, paddr_t end, int memtype_only)
1849{
1850 struct uvm_pmemrange *pmr;
1851 struct vm_page *found, *splitpg;
1852 psize_t fcount;
1853 int memtype;
1854
1855 fcount = 0;
1856 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr)
!= ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next))
{
1857 /* We're done. */
1858 if (fcount == count)
1859 break;
1860
1861 /* Outside requested range. */
1862 if (!(start == 0 && end == 0) &&
1863 !PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)(((start) == 0 || (start) < (pmr->high)) && ((end
) == 0 || (pmr->low) < (end)))
)
1864 continue;
1865
1866 /* Range is empty. */
1867 if (pmr->nsegs == 0)
1868 continue;
1869
1870 /* Loop over all memtypes, starting at memtype_init. */
1871 memtype = memtype_init;
1872 while (fcount != count) {
1873 found = TAILQ_FIRST(&pmr->single[memtype])((&pmr->single[memtype])->tqh_first);
1874 /*
1875 * If found is outside the range, walk the list
1876 * until we find something that intersects with
1877 * boundaries.
1878 */
1879 while (found && !PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + 1)) && ((end) == 0 || (((((found)->phys_addr
)) >> 12)) < (end)))
1880 atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + 1)) && ((end) == 0 || (((((found)->phys_addr
)) >> 12)) < (end)))
1881 atop(VM_PAGE_TO_PHYS(found)) + 1,(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + 1)) && ((end) == 0 || (((((found)->phys_addr
)) >> 12)) < (end)))
1882 start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + 1)) && ((end) == 0 || (((((found)->phys_addr
)) >> 12)) < (end)))
)
1883 found = TAILQ_NEXT(found, pageq)((found)->pageq.tqe_next);
1884
1885 if (found == NULL((void *)0)) {
1886 /*
1887 * Check if the size tree contains a range
1888 * that intersects with the boundaries. As the
1889 * allocation is for any page, try the smallest
1890 * range so that large ranges are preserved for
1891 * more constrained cases. Only one entry is
1892 * checked here, to avoid a brute-force search.
1893 *
1894 * Note that a size tree gives pg[1] instead of
1895 * pg[0].
1896 */
1897 found = RBT_MIN(uvm_pmr_size,uvm_pmr_size_RBT_MIN(&pmr->size[memtype])
1898 &pmr->size[memtype])uvm_pmr_size_RBT_MIN(&pmr->size[memtype]);
1899 if (found != NULL((void *)0)) {
1900 found--;
1901 if (!PMR_INTERSECTS_WITH((((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + found->fpgsz)) && ((end) == 0 || (((((found
)->phys_addr)) >> 12)) < (end)))
1902 atop(VM_PAGE_TO_PHYS(found)),(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + found->fpgsz)) && ((end) == 0 || (((((found
)->phys_addr)) >> 12)) < (end)))
1903 atop(VM_PAGE_TO_PHYS(found)) +(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + found->fpgsz)) && ((end) == 0 || (((((found
)->phys_addr)) >> 12)) < (end)))
1904 found->fpgsz, start, end)(((start) == 0 || (start) < (((((found)->phys_addr)) >>
12) + found->fpgsz)) && ((end) == 0 || (((((found
)->phys_addr)) >> 12)) < (end)))
)
1905 found = NULL((void *)0);
1906 }
1907 }
1908 if (found == NULL((void *)0)) {
1909 /*
1910 * Try address-guided search to meet the page
1911 * number constraints.
1912 */
1913 found = RBT_ROOT(uvm_pmr_addr, &pmr->addr)uvm_pmr_addr_RBT_ROOT(&pmr->addr);
1914 if (found != NULL((void *)0)) {
1915 found = uvm_pmr_rootupdate(pmr, found,
1916 start, end, memtype);
1917 }
1918 }
1919 if (found != NULL((void *)0)) {
1920 uvm_pmr_assertvalid(pmr)do {} while (0);
1921 uvm_pmr_remove_size(pmr, found);
1922
1923 /*
1924 * If the page intersects the end, then it'll
1925 * need splitting.
1926 *
1927 * Note that we don't need to split if the page
1928 * intersects start: the drain function will
1929 * simply stop on hitting start.
1930 */
1931 if (end != 0 && atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) +
1932 found->fpgsz > end) {
1933 psize_t splitsz =
1934 atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) +
1935 found->fpgsz - end;
1936
1937 uvm_pmr_remove_addr(pmr, found);
1938 uvm_pmr_assertvalid(pmr)do {} while (0);
1939 found->fpgsz -= splitsz;
1940 splitpg = found + found->fpgsz;
1941 splitpg->fpgsz = splitsz;
1942 uvm_pmr_insert(pmr, splitpg, 1);
1943
1944 /*
1945 * At this point, splitpg and found
1946 * actually should be joined.
1947 * But we explicitly disable that,
1948 * because we will start subtracting
1949 * from found.
1950 */
1951 KASSERT(start == 0 ||((start == 0 || ((((found)->phys_addr)) >> 12) + found
->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start"
))
1952 atop(VM_PAGE_TO_PHYS(found)) +((start == 0 || ((((found)->phys_addr)) >> 12) + found
->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start"
))
1953 found->fpgsz > start)((start == 0 || ((((found)->phys_addr)) >> 12) + found
->fpgsz > start) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pmemrange.c"
, 1953, "start == 0 || atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz > start"
))
;
1954 uvm_pmr_insert_addr(pmr, found, 1);
1955 }
1956
1957 /*
1958 * Fetch pages from the end.
1959 * If the range is larger than the requested
1960 * number of pages, this saves us an addr-tree
1961 * update.
1962 *
1963 * Since we take from the end and insert at
1964 * the head, any ranges keep preserved.
1965 */
1966 while (found->fpgsz > 0 && fcount < count &&
1967 (start == 0 ||
1968 atop(VM_PAGE_TO_PHYS(found))((((found)->phys_addr)) >> 12) +
1969 found->fpgsz > start)) {
1970 found->fpgsz--;
1971 fcount++;
1972 TAILQ_INSERT_HEAD(result,do { if (((&found[found->fpgsz])->pageq.tqe_next = (
result)->tqh_first) != ((void *)0)) (result)->tqh_first
->pageq.tqe_prev = &(&found[found->fpgsz])->
pageq.tqe_next; else (result)->tqh_last = &(&found
[found->fpgsz])->pageq.tqe_next; (result)->tqh_first
= (&found[found->fpgsz]); (&found[found->fpgsz
])->pageq.tqe_prev = &(result)->tqh_first; } while (
0)
1973 &found[found->fpgsz], pageq)do { if (((&found[found->fpgsz])->pageq.tqe_next = (
result)->tqh_first) != ((void *)0)) (result)->tqh_first
->pageq.tqe_prev = &(&found[found->fpgsz])->
pageq.tqe_next; else (result)->tqh_last = &(&found
[found->fpgsz])->pageq.tqe_next; (result)->tqh_first
= (&found[found->fpgsz]); (&found[found->fpgsz
])->pageq.tqe_prev = &(result)->tqh_first; } while (
0)
;
1974 }
1975 if (found->fpgsz > 0) {
1976 uvm_pmr_insert_size(pmr, found);
1977 KDASSERT(fcount == count)((void)0);
1978 uvm_pmr_assertvalid(pmr)do {} while (0);
1979 return fcount;
1980 }
1981
1982 /*
1983 * Delayed addr-tree removal.
1984 */
1985 uvm_pmr_remove_addr(pmr, found);
1986 uvm_pmr_assertvalid(pmr)do {} while (0);
1987 } else {
1988 if (memtype_only)
1989 break;
1990 /*
1991 * Skip to the next memtype.
1992 */
1993 memtype += 1;
1994 if (memtype == UVM_PMR_MEMTYPE_MAX2)
1995 memtype = 0;
1996 if (memtype == memtype_init)
1997 break;
1998 }
1999 }
2000 }
2001
2002 /*
2003 * Search finished.
2004 *
2005 * Ran out of ranges before enough pages were gathered, or we hit the
2006 * case where found->fpgsz == count - fcount, in which case the
2007 * above exit condition didn't trigger.
2008 *
2009 * On failure, caller will free the pages.
2010 */
2011 return fcount;
2012}
2013
2014#ifdef DDB1
2015/*
2016 * Print information about pmemrange.
2017 * Does not do locking (so either call it from DDB or acquire fpageq lock
2018 * before invoking.
2019 */
2020void
2021uvm_pmr_print(void)
2022{
2023 struct uvm_pmemrange *pmr;
2024 struct vm_page *pg;
2025 psize_t size[UVM_PMR_MEMTYPE_MAX2];
2026 psize_t free;
2027 int useq_len;
2028 int mt;
2029
2030 printf("Ranges, use queue:\n");
2031 useq_len = 0;
2032 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use)for((pmr) = ((&uvm.pmr_control.use)->tqh_first); (pmr)
!= ((void *)0); (pmr) = ((pmr)->pmr_use.tqe_next))
{
2033 useq_len++;
2034 free = 0;
2035 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) {
2036 pg = RBT_MAX(uvm_pmr_size, &pmr->size[mt])uvm_pmr_size_RBT_MAX(&pmr->size[mt]);
2037 if (pg != NULL((void *)0))
2038 pg--;
2039 else
2040 pg = TAILQ_FIRST(&pmr->single[mt])((&pmr->single[mt])->tqh_first);
2041 size[mt] = (pg == NULL((void *)0) ? 0 : pg->fpgsz);
2042
2043 RBT_FOREACH(pg, uvm_pmr_addr, &pmr->addr)for ((pg) = uvm_pmr_addr_RBT_MIN((&pmr->addr)); (pg) !=
((void *)0); (pg) = uvm_pmr_addr_RBT_NEXT((pg)))
2044 free += pg->fpgsz;
2045 }
2046
2047 printf("* [0x%lx-0x%lx] use=%d nsegs=%ld",
2048 (unsigned long)pmr->low, (unsigned long)pmr->high,
2049 pmr->use, (unsigned long)pmr->nsegs);
2050 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX2; mt++) {
2051 printf(" maxsegsz[%d]=0x%lx", mt,
2052 (unsigned long)size[mt]);
2053 }
2054 printf(" free=0x%lx\n", (unsigned long)free);
2055 }
2056 printf("#ranges = %d\n", useq_len);
2057}
2058#endif
2059
2060/*
2061 * uvm_wait_pla: wait (sleep) for the page daemon to free some pages
2062 * in a specific physmem area.
2063 *
2064 * Returns ENOMEM if the pagedaemon failed to free any pages.
2065 * If not failok, failure will lead to panic.
2066 *
2067 * Must be called with fpageq locked.
2068 */
2069int
2070uvm_wait_pla(paddr_t low, paddr_t high, paddr_t size, int failok)
2071{
2072 struct uvm_pmalloc pma;
2073 const char *wmsg = "pmrwait";
2074
2075 if (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
== uvm.pagedaemon_proc
) {
51
Assuming field 'ci_curproc' is not equal to field 'pagedaemon_proc'
52
Taking false branch
2076 /*
2077 * This is not that uncommon when the pagedaemon is trying
2078 * to flush out a large mmapped file. VOP_WRITE will circle
2079 * back through the buffer cache and try to get more memory.
2080 * The pagedaemon starts by calling bufbackoff, but we can
2081 * easily use up that reserve in a single scan iteration.
2082 */
2083 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
2084 if (bufbackoff(NULL((void *)0), atop(size)((size) >> 12)) == 0) {
2085 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
2086 return 0;
2087 }
2088 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
2089
2090 /*
2091 * XXX detect pagedaemon deadlock - see comment in
2092 * uvm_wait(), as this is exactly the same issue.
2093 */
2094 printf("pagedaemon: wait_pla deadlock detected!\n");
2095 msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM4, wmsg,
2096 MSEC_TO_NSEC(125));
2097#if defined(DEBUG)
2098 /* DEBUG: panic so we can debug it */
2099 panic("wait_pla pagedaemon deadlock");
2100#endif
2101 return 0;
2102 }
2103
2104 for (;;) {
53
Loop condition is true. Entering loop body
2105 pma.pm_constraint.ucr_low = low;
2106 pma.pm_constraint.ucr_high = high;
2107 pma.pm_size = size;
2108 pma.pm_flags = UVM_PMA_LINKED0x01;
2109 TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, &pma, pmq)do { (&pma)->pmq.tqe_next = ((void *)0); (&pma)->
pmq.tqe_prev = (&uvm.pmr_control.allocs)->tqh_last; *(
&uvm.pmr_control.allocs)->tqh_last = (&pma); (&
uvm.pmr_control.allocs)->tqh_last = &(&pma)->pmq
.tqe_next; } while (0)
;
54
Loop condition is false. Exiting loop
2110
2111 wakeup(&uvm.pagedaemon); /* wake the daemon! */
2112 while (pma.pm_flags & (UVM_PMA_LINKED0x01 | UVM_PMA_BUSY0x02))
55
Loop condition is false. Execution continues on line 2115
2113 msleep_nsec(&pma, &uvm.fpageqlock, PVM4, wmsg, INFSLP0xffffffffffffffffULL);
2114
2115 if (!(pma.pm_flags & UVM_PMA_FREED0x20) &&
56
Assuming the condition is false
2116 pma.pm_flags & UVM_PMA_FAIL0x10) {
2117 if (failok)
2118 return ENOMEM12;
2119 printf("uvm_wait: failed to free %ld pages between "
2120 "0x%lx-0x%lx\n", atop(size)((size) >> 12), low, high);
2121 } else
2122 return 0;
57
Address of stack memory associated with local variable 'pma' is still referred to by the global variable 'uvm' upon returning to the caller. This will be a dangling reference
2123 }
2124 /* UNREACHABLE */
2125}
2126
2127/*
2128 * Wake up uvm_pmalloc sleepers.
2129 */
2130void
2131uvm_wakeup_pla(paddr_t low, psize_t len)
2132{
2133 struct uvm_pmalloc *pma, *pma_next;
2134 paddr_t high;
2135
2136 high = low + len;
2137
2138 /* Wake specific allocations waiting for this memory. */
2139 for (pma = TAILQ_FIRST(&uvm.pmr_control.allocs)((&uvm.pmr_control.allocs)->tqh_first); pma != NULL((void *)0);
2140 pma = pma_next) {
2141 pma_next = TAILQ_NEXT(pma, pmq)((pma)->pmq.tqe_next);
2142
2143 if (low < pma->pm_constraint.ucr_high &&
2144 high > pma->pm_constraint.ucr_low) {
2145 pma->pm_flags |= UVM_PMA_FREED0x20;
2146 if (!(pma->pm_flags & UVM_PMA_BUSY0x02)) {
2147 pma->pm_flags &= ~UVM_PMA_LINKED0x01;
2148 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq
.tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (&
uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev
; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)->
pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void
*)-1); } while (0)
2149 pmq)do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq
.tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (&
uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev
; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)->
pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void
*)-1); } while (0)
;
2150 wakeup(pma);
2151 }
2152 }
2153 }
2154}
2155
2156void
2157uvm_pagezero_thread(void *arg)
2158{
2159 struct pglist pgl;
2160 struct vm_page *pg;
2161 int count;
2162
2163 /* Run at the lowest possible priority. */
2164 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_p->ps_nice = NZERO20 + PRIO_MAX20;
2165
2166 KERNEL_UNLOCK()_kernel_unlock();
2167
2168 TAILQ_INIT(&pgl)do { (&pgl)->tqh_first = ((void *)0); (&pgl)->tqh_last
= &(&pgl)->tqh_first; } while (0)
;
2169 for (;;) {
2170 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
2171 while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET(uvmexp.free / 8) ||
2172 (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY0,
2173 &pgl, 0, 0, 1)) == 0) {
2174 msleep_nsec(&uvmexp.zeropages, &uvm.fpageqlock,
2175 MAXPRI127, "pgzero", INFSLP0xffffffffffffffffULL);
2176 }
2177 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
2178
2179 TAILQ_FOREACH(pg, &pgl, pageq)for((pg) = ((&pgl)->tqh_first); (pg) != ((void *)0); (
pg) = ((pg)->pageq.tqe_next))
{
2180 uvm_pagezero(pg);
2181 atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_ZERO0x00000100);
2182 }
2183
2184 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
2185 while (!TAILQ_EMPTY(&pgl)(((&pgl)->tqh_first) == ((void *)0)))
2186 uvm_pmr_remove_1strange(&pgl, 0, NULL((void *)0), 0);
2187 uvmexp.zeropages += count;
2188 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
2189
2190 yield();
2191 }
2192}