Bug Summary

File:uvm/uvm_fault.c
Warning:line 771, column 11
Although the value stored to 'nforw' is used in the enclosing expression, the value is never actually read from 'nforw'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name uvm_fault.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/uvm/uvm_fault.c
1/* $OpenBSD: uvm_fault.c,v 1.135 2023/09/05 05:08:26 guenther Exp $ */
2/* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
29 */
30
31/*
32 * uvm_fault.c: fault handler
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/percpu.h>
39#include <sys/proc.h>
40#include <sys/malloc.h>
41#include <sys/mman.h>
42#include <sys/tracepoint.h>
43
44#include <uvm/uvm.h>
45
46/*
47 *
48 * a word on page faults:
49 *
50 * types of page faults we handle:
51 *
52 * CASE 1: upper layer faults CASE 2: lower layer faults
53 *
54 * CASE 1A CASE 1B CASE 2A CASE 2B
55 * read/write1 write>1 read/write +-cow_write/zero
56 * | | | |
57 * +--|--+ +--|--+ +-----+ + | + | +-----+
58 * amap | V | | ---------> new | | | | ^ |
59 * +-----+ +-----+ +-----+ + | + | +--|--+
60 * | | |
61 * +-----+ +-----+ +--|--+ | +--|--+
62 * uobj | d/c | | d/c | | V | +----+ |
63 * +-----+ +-----+ +-----+ +-----+
64 *
65 * d/c = don't care
66 *
67 * case [0]: layerless fault
68 * no amap or uobj is present. this is an error.
69 *
70 * case [1]: upper layer fault [anon active]
71 * 1A: [read] or [write with anon->an_ref == 1]
72 * I/O takes place in upper level anon and uobj is not touched.
73 * 1B: [write with anon->an_ref > 1]
74 * new anon is alloc'd and data is copied off ["COW"]
75 *
76 * case [2]: lower layer fault [uobj]
77 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
78 * I/O takes place directly in object.
79 * 2B: [write to copy_on_write] or [read on NULL uobj]
80 * data is "promoted" from uobj to a new anon.
81 * if uobj is null, then we zero fill.
82 *
83 * we follow the standard UVM locking protocol ordering:
84 *
85 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
86 * we hold a PG_BUSY page if we unlock for I/O
87 *
88 *
89 * the code is structured as follows:
90 *
91 * - init the "IN" params in the ufi structure
92 * ReFault: (ERESTART returned to the loop in uvm_fault)
93 * - do lookups [locks maps], check protection, handle needs_copy
94 * - check for case 0 fault (error)
95 * - establish "range" of fault
96 * - if we have an amap lock it and extract the anons
97 * - if sequential advice deactivate pages behind us
98 * - at the same time check pmap for unmapped areas and anon for pages
99 * that we could map in (and do map it if found)
100 * - check object for resident pages that we could map in
101 * - if (case 2) goto Case2
102 * - >>> handle case 1
103 * - ensure source anon is resident in RAM
104 * - if case 1B alloc new anon and copy from source
105 * - map the correct page in
106 * Case2:
107 * - >>> handle case 2
108 * - ensure source page is resident (if uobj)
109 * - if case 2B alloc new anon and copy from source (could be zero
110 * fill if uobj == NULL)
111 * - map the correct page in
112 * - done!
113 *
114 * note on paging:
115 * if we have to do I/O we place a PG_BUSY page in the correct object,
116 * unlock everything, and do the I/O. when I/O is done we must reverify
117 * the state of the world before assuming that our data structures are
118 * valid. [because mappings could change while the map is unlocked]
119 *
120 * alternative 1: unbusy the page in question and restart the page fault
121 * from the top (ReFault). this is easy but does not take advantage
122 * of the information that we already have from our previous lookup,
123 * although it is possible that the "hints" in the vm_map will help here.
124 *
125 * alternative 2: the system already keeps track of a "version" number of
126 * a map. [i.e. every time you write-lock a map (e.g. to change a
127 * mapping) you bump the version number up by one...] so, we can save
128 * the version number of the map before we release the lock and start I/O.
129 * then when I/O is done we can relock and check the version numbers
130 * to see if anything changed. this might save us some over 1 because
131 * we don't have to unbusy the page and may be less compares(?).
132 *
133 * alternative 3: put in backpointers or a way to "hold" part of a map
134 * in place while I/O is in progress. this could be complex to
135 * implement (especially with structures like amap that can be referenced
136 * by multiple map entries, and figuring out what should wait could be
137 * complex as well...).
138 *
139 * we use alternative 2. given that we are multi-threaded now we may want
140 * to reconsider the choice.
141 */
142
143/*
144 * local data structures
145 */
146struct uvm_advice {
147 int nback;
148 int nforw;
149};
150
151/*
152 * page range array: set up in uvmfault_init().
153 */
154static struct uvm_advice uvmadvice[MADV_MASK0x7 + 1];
155
156#define UVM_MAXRANGE16 16 /* must be max() of nback+nforw+1 */
157
158/*
159 * private prototypes
160 */
161static void uvmfault_amapcopy(struct uvm_faultinfo *);
162static inline void uvmfault_anonflush(struct vm_anon **, int);
163void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
164void uvmfault_update_stats(struct uvm_faultinfo *);
165
166/*
167 * inline functions
168 */
169/*
170 * uvmfault_anonflush: try and deactivate pages in specified anons
171 *
172 * => does not have to deactivate page if it is busy
173 */
174static inline void
175uvmfault_anonflush(struct vm_anon **anons, int n)
176{
177 int lcv;
178 struct vm_page *pg;
179
180 for (lcv = 0; lcv < n; lcv++) {
181 if (anons[lcv] == NULL((void *)0))
182 continue;
183 KASSERT(rw_lock_held(anons[lcv]->an_lock))((rw_lock_held(anons[lcv]->an_lock)) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 183, "rw_lock_held(anons[lcv]->an_lock)"
))
;
184 pg = anons[lcv]->an_page;
185 if (pg && (pg->pg_flags & PG_BUSY0x00000001) == 0) {
186 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
187 if (pg->wire_count == 0) {
188 pmap_page_protect(pg, PROT_NONE0x00);
189 uvm_pagedeactivate(pg);
190 }
191 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
192 }
193 }
194}
195
196/*
197 * normal functions
198 */
199/*
200 * uvmfault_init: compute proper values for the uvmadvice[] array.
201 */
202void
203uvmfault_init(void)
204{
205 int npages;
206
207 npages = atop(16384)((16384) >> 12);
208 if (npages > 0) {
209 KASSERT(npages <= UVM_MAXRANGE / 2)((npages <= 16 / 2) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 209, "npages <= UVM_MAXRANGE / 2"))
;
210 uvmadvice[MADV_NORMAL0].nforw = npages;
211 uvmadvice[MADV_NORMAL0].nback = npages - 1;
212 }
213
214 npages = atop(32768)((32768) >> 12);
215 if (npages > 0) {
216 KASSERT(npages <= UVM_MAXRANGE / 2)((npages <= 16 / 2) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 216, "npages <= UVM_MAXRANGE / 2"))
;
217 uvmadvice[MADV_SEQUENTIAL2].nforw = npages - 1;
218 uvmadvice[MADV_SEQUENTIAL2].nback = npages;
219 }
220}
221
222/*
223 * uvmfault_amapcopy: clear "needs_copy" in a map.
224 *
225 * => called with VM data structures unlocked (usually, see below)
226 * => we get a write lock on the maps and clear needs_copy for a VA
227 * => if we are out of RAM we sleep (waiting for more)
228 */
229static void
230uvmfault_amapcopy(struct uvm_faultinfo *ufi)
231{
232 for (;;) {
233 /*
234 * no mapping? give up.
235 */
236 if (uvmfault_lookup(ufi, TRUE1) == FALSE0)
237 return;
238
239 /*
240 * copy if needed.
241 */
242 if (UVM_ET_ISNEEDSCOPY(ufi->entry)(((ufi->entry)->etype & 0x0008) != 0))
243 amap_copy(ufi->map, ufi->entry, M_NOWAIT0x0002,
244 UVM_ET_ISSTACK(ufi->entry)(((ufi->entry)->etype & 0x0040) != 0) ? FALSE0 : TRUE1,
245 ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
246
247 /*
248 * didn't work? must be out of RAM. unlock and sleep.
249 */
250 if (UVM_ET_ISNEEDSCOPY(ufi->entry)(((ufi->entry)->etype & 0x0008) != 0)) {
251 uvmfault_unlockmaps(ufi, TRUE1);
252 uvm_wait("fltamapcopy");
253 continue;
254 }
255
256 /*
257 * got it! unlock and return.
258 */
259 uvmfault_unlockmaps(ufi, TRUE1);
260 return;
261 }
262 /*NOTREACHED*/
263}
264
265/*
266 * uvmfault_anonget: get data in an anon into a non-busy, non-released
267 * page in that anon.
268 *
269 * => Map, amap and thus anon should be locked by caller.
270 * => If we fail, we unlock everything and error is returned.
271 * => If we are successful, return with everything still locked.
272 * => We do not move the page on the queues [gets moved later]. If we
273 * allocate a new page [we_own], it gets put on the queues. Either way,
274 * the result is that the page is on the queues at return time
275 */
276int
277uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
278 struct vm_anon *anon)
279{
280 struct vm_page *pg;
281 int error;
282
283 KASSERT(rw_lock_held(anon->an_lock))((rw_lock_held(anon->an_lock)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 283, "rw_lock_held(anon->an_lock)"
))
;
284 KASSERT(anon->an_lock == amap->am_lock)((anon->an_lock == amap->am_lock) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 284, "anon->an_lock == amap->am_lock"
))
;
285
286 /* Increment the counters.*/
287 counters_inc(uvmexp_counters, flt_anget);
288 if (anon->an_page) {
289 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_ru.ru_minflt++;
290 } else {
291 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_ru.ru_majflt++;
292 }
293 error = 0;
294
295 /*
296 * Loop until we get the anon data, or fail.
297 */
298 for (;;) {
299 boolean_t we_own, locked;
300 /*
301 * Note: 'we_own' will become true if we set PG_BUSY on a page.
302 */
303 we_own = FALSE0;
304 pg = anon->an_page;
305
306 /*
307 * Is page resident? Make sure it is not busy/released.
308 */
309 if (pg) {
310 KASSERT(pg->pg_flags & PQ_ANON)((pg->pg_flags & 0x00100000) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 310, "pg->pg_flags & PQ_ANON"
))
;
311 KASSERT(pg->uanon == anon)((pg->uanon == anon) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 311, "pg->uanon == anon"))
;
312
313 /*
314 * if the page is busy, we drop all the locks and
315 * try again.
316 */
317 if ((pg->pg_flags & (PG_BUSY0x00000001|PG_RELEASED0x00000020)) == 0)
318 return (VM_PAGER_OK0);
319 atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_WANTED0x00000002);
320 counters_inc(uvmexp_counters, flt_pgwait);
321
322 /*
323 * The last unlock must be an atomic unlock and wait
324 * on the owner of page.
325 */
326 if (pg->uobject) {
327 /* Owner of page is UVM object. */
328 uvmfault_unlockall(ufi, amap, NULL((void *)0));
329 rwsleep_nsec(pg, pg->uobject->vmobjlock,
330 PVM4 | PNORELOCK0x200, "anonget1", INFSLP0xffffffffffffffffULL);
331 } else {
332 /* Owner of page is anon. */
333 uvmfault_unlockall(ufi, NULL((void *)0), NULL((void *)0));
334 rwsleep_nsec(pg, anon->an_lock, PVM4 | PNORELOCK0x200,
335 "anonget2", INFSLP0xffffffffffffffffULL);
336 }
337 } else {
338 /*
339 * No page, therefore allocate one.
340 */
341 pg = uvm_pagealloc(NULL((void *)0), 0, anon, 0);
342 if (pg == NULL((void *)0)) {
343 /* Out of memory. Wait a little. */
344 uvmfault_unlockall(ufi, amap, NULL((void *)0));
345 counters_inc(uvmexp_counters, flt_noram);
346 uvm_wait("flt_noram1");
347 } else {
348 /* PG_BUSY bit is set. */
349 we_own = TRUE1;
350 uvmfault_unlockall(ufi, amap, NULL((void *)0));
351
352 /*
353 * Pass a PG_BUSY+PG_FAKE+PG_CLEAN page into
354 * the uvm_swap_get() function with all data
355 * structures unlocked. Note that it is OK
356 * to read an_swslot here, because we hold
357 * PG_BUSY on the page.
358 */
359 counters_inc(uvmexp_counters, pageins);
360 error = uvm_swap_get(pg, anon->an_swslot,
361 PGO_SYNCIO0x002);
362
363 /*
364 * We clean up after the I/O below in the
365 * 'we_own' case.
366 */
367 }
368 }
369
370 /*
371 * Re-lock the map and anon.
372 */
373 locked = uvmfault_relock(ufi);
374 if (locked || we_own) {
375 rw_enter(anon->an_lock, RW_WRITE0x0001UL);
376 }
377
378 /*
379 * If we own the page (i.e. we set PG_BUSY), then we need
380 * to clean up after the I/O. There are three cases to
381 * consider:
382 *
383 * 1) Page was released during I/O: free anon and ReFault.
384 * 2) I/O not OK. Free the page and cause the fault to fail.
385 * 3) I/O OK! Activate the page and sync with the non-we_own
386 * case (i.e. drop anon lock if not locked).
387 */
388 if (we_own) {
389 if (pg->pg_flags & PG_WANTED0x00000002) {
390 wakeup(pg);
391 }
392
393 /*
394 * if we were RELEASED during I/O, then our anon is
395 * no longer part of an amap. we need to free the
396 * anon and try again.
397 */
398 if (pg->pg_flags & PG_RELEASED0x00000020) {
399 KASSERT(anon->an_ref == 0)((anon->an_ref == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 399, "anon->an_ref == 0"))
;
400 /*
401 * Released while we had unlocked amap.
402 */
403 if (locked)
404 uvmfault_unlockall(ufi, NULL((void *)0), NULL((void *)0));
405 uvm_anon_release(anon); /* frees page for us */
406 counters_inc(uvmexp_counters, flt_pgrele);
407 return (VM_PAGER_REFAULT7); /* refault! */
408 }
409
410 if (error != VM_PAGER_OK0) {
411 KASSERT(error != VM_PAGER_PEND)((error != 3) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 411, "error != VM_PAGER_PEND"))
;
412
413 /* remove page from anon */
414 anon->an_page = NULL((void *)0);
415
416 /*
417 * Remove the swap slot from the anon and
418 * mark the anon as having no real slot.
419 * Do not free the swap slot, thus preventing
420 * it from being used again.
421 */
422 uvm_swap_markbad(anon->an_swslot, 1);
423 anon->an_swslot = SWSLOT_BAD(-1);
424
425 /*
426 * Note: page was never !PG_BUSY, so it
427 * cannot be mapped and thus no need to
428 * pmap_page_protect() it.
429 */
430 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
431 uvm_pagefree(pg);
432 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
433
434 if (locked) {
435 uvmfault_unlockall(ufi, NULL((void *)0), NULL((void *)0));
436 }
437 rw_exit(anon->an_lock);
438 return (VM_PAGER_ERROR4);
439 }
440
441 /*
442 * We have successfully read the page, activate it.
443 */
444 pmap_clear_modify(pg)pmap_clear_attrs(pg, 0x0000000000000040UL);
445 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
446 uvm_pageactivate(pg);
447 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
448 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags,
449 PG_WANTED0x00000002|PG_BUSY0x00000001|PG_FAKE0x00000040);
450 UVM_PAGE_OWN(pg, NULL);
451 }
452
453 /*
454 * We were not able to re-lock the map - restart the fault.
455 */
456 if (!locked) {
457 if (we_own) {
458 rw_exit(anon->an_lock);
459 }
460 return (VM_PAGER_REFAULT7);
461 }
462
463 /*
464 * Verify that no one has touched the amap and moved
465 * the anon on us.
466 */
467 if (ufi != NULL((void *)0) && amap_lookup(&ufi->entry->aref,
468 ufi->orig_rvaddr - ufi->entry->start) != anon) {
469
470 uvmfault_unlockall(ufi, amap, NULL((void *)0));
471 return (VM_PAGER_REFAULT7);
472 }
473
474 /*
475 * Retry..
476 */
477 counters_inc(uvmexp_counters, flt_anretry);
478 continue;
479
480 }
481 /*NOTREACHED*/
482}
483
484/*
485 * Update statistics after fault resolution.
486 * - maxrss
487 */
488void
489uvmfault_update_stats(struct uvm_faultinfo *ufi)
490{
491 struct vm_map *map;
492 struct proc *p;
493 vsize_t res;
494
495 map = ufi->orig_map;
496
497 /*
498 * If this is a nested pmap (eg, a virtual machine pmap managed
499 * by vmm(4) on amd64/i386), don't do any updating, just return.
500 *
501 * pmap_nested() on other archs is #defined to 0, so this is a
502 * no-op.
503 */
504 if (pmap_nested(map->pmap)((map->pmap)->pm_type != 1))
505 return;
506
507 /* Update the maxrss for the process. */
508 if (map->flags & VM_MAP_ISVMSPACE0x40) {
509 p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
510 KASSERT(p != NULL && &p->p_vmspace->vm_map == map)((p != ((void *)0) && &p->p_vmspace->vm_map
== map) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 510, "p != NULL && &p->p_vmspace->vm_map == map"
))
;
511
512 res = pmap_resident_count(map->pmap)((map->pmap)->pm_stats.resident_count);
513 /* Convert res from pages to kilobytes. */
514 res <<= (PAGE_SHIFT12 - 10);
515
516 if (p->p_ru.ru_maxrss < res)
517 p->p_ru.ru_maxrss = res;
518 }
519}
520
521/*
522 * F A U L T - m a i n e n t r y p o i n t
523 */
524
525/*
526 * uvm_fault: page fault handler
527 *
528 * => called from MD code to resolve a page fault
529 * => VM data structures usually should be unlocked. however, it is
530 * possible to call here with the main map locked if the caller
531 * gets a write lock, sets it recursive, and then calls us (c.f.
532 * uvm_map_pageable). this should be avoided because it keeps
533 * the map locked off during I/O.
534 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT
535 */
536#define MASK(entry)((((entry)->etype & 0x0004) != 0) ? ~0x02 : (0x01 | 0x02
| 0x04))
(UVM_ET_ISCOPYONWRITE(entry)(((entry)->etype & 0x0004) != 0) ? \
537 ~PROT_WRITE0x02 : PROT_MASK(0x01 | 0x02 | 0x04))
538struct uvm_faultctx {
539 /*
540 * the following members are set up by uvm_fault_check() and
541 * read-only after that.
542 */
543 vm_prot_t enter_prot;
544 vm_prot_t access_type;
545 vaddr_t startva;
546 int npages;
547 int centeridx;
548 boolean_t narrow;
549 boolean_t wired;
550 paddr_t pa_flags;
551};
552
553int uvm_fault_check(
554 struct uvm_faultinfo *, struct uvm_faultctx *,
555 struct vm_anon ***);
556
557int uvm_fault_upper(
558 struct uvm_faultinfo *, struct uvm_faultctx *,
559 struct vm_anon **, vm_fault_t);
560boolean_t uvm_fault_upper_lookup(
561 struct uvm_faultinfo *, const struct uvm_faultctx *,
562 struct vm_anon **, struct vm_page **);
563
564int uvm_fault_lower(
565 struct uvm_faultinfo *, struct uvm_faultctx *,
566 struct vm_page **, vm_fault_t);
567
568int
569uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type,
570 vm_prot_t access_type)
571{
572 struct uvm_faultinfo ufi;
573 struct uvm_faultctx flt;
574 boolean_t shadowed;
575 struct vm_anon *anons_store[UVM_MAXRANGE16], **anons;
576 struct vm_page *pages[UVM_MAXRANGE16];
577 int error;
578
579 counters_inc(uvmexp_counters, faults);
580 TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL)do { extern struct dt_probe (dt_static_uvm_fault); struct dt_probe
*dtp = &(dt_static_uvm_fault); if (__builtin_expect(((dt_tracing
) != 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv
->dtpv_enter(dtpv, dtp, vaddr, fault_type, access_type, ((
void *)0)); } } while (0)
;
581
582 /*
583 * init the IN parameters in the ufi
584 */
585 ufi.orig_map = orig_map;
586 ufi.orig_rvaddr = trunc_page(vaddr)((vaddr) & ~((1 << 12) - 1));
587 ufi.orig_size = PAGE_SIZE(1 << 12); /* can't get any smaller than this */
588 if (fault_type == VM_FAULT_WIRE((vm_fault_t) 0x2))
589 flt.narrow = TRUE1; /* don't look for neighborhood
590 * pages on wire */
591 else
592 flt.narrow = FALSE0; /* normal fault */
593 flt.access_type = access_type;
594
595
596 error = ERESTART-1;
597 while (error == ERESTART-1) { /* ReFault: */
598 anons = anons_store;
599
600 error = uvm_fault_check(&ufi, &flt, &anons);
601 if (error != 0)
602 continue;
603
604 /* True if there is an anon at the faulting address */
605 shadowed = uvm_fault_upper_lookup(&ufi, &flt, anons, pages);
606 if (shadowed == TRUE1) {
607 /* case 1: fault on an anon in our amap */
608 error = uvm_fault_upper(&ufi, &flt, anons, fault_type);
609 } else {
610 struct uvm_object *uobj = ufi.entry->object.uvm_obj;
611
612 /*
613 * if the desired page is not shadowed by the amap and
614 * we have a backing object, then we check to see if
615 * the backing object would prefer to handle the fault
616 * itself (rather than letting us do it with the usual
617 * pgo_get hook). the backing object signals this by
618 * providing a pgo_fault routine.
619 */
620 if (uobj != NULL((void *)0) && uobj->pgops->pgo_fault != NULL((void *)0)) {
621 KERNEL_LOCK()_kernel_lock();
622 rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL);
623 error = uobj->pgops->pgo_fault(&ufi,
624 flt.startva, pages, flt.npages,
625 flt.centeridx, fault_type, flt.access_type,
626 PGO_LOCKED0x040);
627 KERNEL_UNLOCK()_kernel_unlock();
628
629 if (error == VM_PAGER_OK0)
630 error = 0;
631 else if (error == VM_PAGER_REFAULT7)
632 error = ERESTART-1;
633 else
634 error = EACCES13;
635 } else {
636 /* case 2: fault on backing obj or zero fill */
637 error = uvm_fault_lower(&ufi, &flt, pages,
638 fault_type);
639 }
640 }
641 }
642
643 return error;
644}
645
646/*
647 * uvm_fault_check: check prot, handle needs-copy, etc.
648 *
649 * 1. lookup entry.
650 * 2. check protection.
651 * 3. adjust fault condition (mainly for simulated fault).
652 * 4. handle needs-copy (lazy amap copy).
653 * 5. establish range of interest for neighbor fault (aka pre-fault).
654 * 6. look up anons (if amap exists).
655 * 7. flush pages (if MADV_SEQUENTIAL)
656 *
657 * => called with nothing locked.
658 * => if we fail (result != 0) we unlock everything.
659 * => initialize/adjust many members of flt.
660 */
661int
662uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
663 struct vm_anon ***ranons)
664{
665 struct vm_amap *amap;
666 struct uvm_object *uobj;
667 int nback, nforw;
668
669 /*
670 * lookup and lock the maps
671 */
672 if (uvmfault_lookup(ufi, FALSE0) == FALSE0) {
673 return EFAULT14;
674 }
675 /* locked: maps(read) */
676
677#ifdef DIAGNOSTIC1
678 if ((ufi->map->flags & VM_MAP_PAGEABLE0x01) == 0)
679 panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)",
680 ufi->map, ufi->orig_rvaddr);
681#endif
682
683 /*
684 * check protection
685 */
686 if ((ufi->entry->protection & flt->access_type) != flt->access_type) {
687 uvmfault_unlockmaps(ufi, FALSE0);
688 return EACCES13;
689 }
690
691 /*
692 * "enter_prot" is the protection we want to enter the page in at.
693 * for certain pages (e.g. copy-on-write pages) this protection can
694 * be more strict than ufi->entry->protection. "wired" means either
695 * the entry is wired or we are fault-wiring the pg.
696 */
697
698 flt->enter_prot = ufi->entry->protection;
699 flt->pa_flags = UVM_ET_ISWC(ufi->entry)(((ufi->entry)->etype & 0x0080) != 0) ? PMAP_WC0x2 : 0;
700 flt->wired = VM_MAPENT_ISWIRED(ufi->entry)((ufi->entry)->wired_count != 0) || (flt->narrow == TRUE1);
701 if (flt->wired)
702 flt->access_type = flt->enter_prot; /* full access for wired */
703
704 /* handle "needs_copy" case. */
705 if (UVM_ET_ISNEEDSCOPY(ufi->entry)(((ufi->entry)->etype & 0x0008) != 0)) {
706 if ((flt->access_type & PROT_WRITE0x02) ||
707 (ufi->entry->object.uvm_obj == NULL((void *)0))) {
708 /* need to clear */
709 uvmfault_unlockmaps(ufi, FALSE0);
710 uvmfault_amapcopy(ufi);
711 counters_inc(uvmexp_counters, flt_amcopy);
712 return ERESTART-1;
713 } else {
714 /*
715 * ensure that we pmap_enter page R/O since
716 * needs_copy is still true
717 */
718 flt->enter_prot &= ~PROT_WRITE0x02;
719 }
720 }
721
722 /*
723 * identify the players
724 */
725 amap = ufi->entry->aref.ar_amap; /* upper layer */
726 uobj = ufi->entry->object.uvm_obj; /* lower layer */
727
728 /*
729 * check for a case 0 fault. if nothing backing the entry then
730 * error now.
731 */
732 if (amap == NULL((void *)0) && uobj == NULL((void *)0)) {
733 uvmfault_unlockmaps(ufi, FALSE0);
734 return EFAULT14;
735 }
736
737 /*
738 * for a case 2B fault waste no time on adjacent pages because
739 * they are likely already entered.
740 */
741 if (uobj != NULL((void *)0) && amap != NULL((void *)0) &&
742 (flt->access_type & PROT_WRITE0x02) != 0) {
743 /* wide fault (!narrow) */
744 flt->narrow = TRUE1;
745 }
746
747 /*
748 * establish range of interest based on advice from mapper
749 * and then clip to fit map entry. note that we only want
750 * to do this the first time through the fault. if we
751 * ReFault we will disable this by setting "narrow" to true.
752 */
753 if (flt->narrow == FALSE0) {
754
755 /* wide fault (!narrow) */
756 nback = min(uvmadvice[ufi->entry->advice].nback,
757 (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT12);
758 flt->startva = ufi->orig_rvaddr - ((vsize_t)nback << PAGE_SHIFT12);
759 nforw = min(uvmadvice[ufi->entry->advice].nforw,
760 ((ufi->entry->end - ufi->orig_rvaddr) >> PAGE_SHIFT12) - 1);
761 /*
762 * note: "-1" because we don't want to count the
763 * faulting page as forw
764 */
765 flt->npages = nback + nforw + 1;
766 flt->centeridx = nback;
767
768 flt->narrow = TRUE1; /* ensure only once per-fault */
769 } else {
770 /* narrow fault! */
771 nback = nforw = 0;
Although the value stored to 'nforw' is used in the enclosing expression, the value is never actually read from 'nforw'
772 flt->startva = ufi->orig_rvaddr;
773 flt->npages = 1;
774 flt->centeridx = 0;
775 }
776
777 /*
778 * if we've got an amap then lock it and extract current anons.
779 */
780 if (amap) {
781 amap_lock(amap)rw_enter_write((amap)->am_lock);
782 amap_lookups(&ufi->entry->aref,
783 flt->startva - ufi->entry->start, *ranons, flt->npages);
784 } else {
785 *ranons = NULL((void *)0); /* to be safe */
786 }
787
788 /*
789 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
790 * now and then forget about them (for the rest of the fault).
791 */
792 if (ufi->entry->advice == MADV_SEQUENTIAL2 && nback != 0) {
793 /* flush back-page anons? */
794 if (amap)
795 uvmfault_anonflush(*ranons, nback);
796
797 /*
798 * flush object?
799 */
800 if (uobj) {
801 voff_t uoff;
802
803 uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset;
804 rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL);
805 (void) uobj->pgops->pgo_flush(uobj, uoff, uoff +
806 ((vsize_t)nback << PAGE_SHIFT12), PGO_DEACTIVATE0x004);
807 rw_exit(uobj->vmobjlock);
808 }
809
810 /* now forget about the backpages */
811 if (amap)
812 *ranons += nback;
813 flt->startva += ((vsize_t)nback << PAGE_SHIFT12);
814 flt->npages -= nback;
815 flt->centeridx = 0;
816 }
817
818 return 0;
819}
820
821/*
822 * uvm_fault_upper_lookup: look up existing h/w mapping and amap.
823 *
824 * iterate range of interest:
825 * 1. check if h/w mapping exists. if yes, we don't care
826 * 2. check if anon exists. if not, page is lower.
827 * 3. if anon exists, enter h/w mapping for neighbors.
828 *
829 * => called with amap locked (if exists).
830 */
831boolean_t
832uvm_fault_upper_lookup(struct uvm_faultinfo *ufi,
833 const struct uvm_faultctx *flt, struct vm_anon **anons,
834 struct vm_page **pages)
835{
836 struct vm_amap *amap = ufi->entry->aref.ar_amap;
837 struct vm_anon *anon;
838 boolean_t shadowed;
839 vaddr_t currva;
840 paddr_t pa;
841 int lcv;
842
843 /* locked: maps(read), amap(if there) */
844 KASSERT(amap == NULL ||((amap == ((void *)0) || rw_write_held(amap->am_lock)) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 845, "amap == NULL || rw_write_held(amap->am_lock)"))
845 rw_write_held(amap->am_lock))((amap == ((void *)0) || rw_write_held(amap->am_lock)) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 845, "amap == NULL || rw_write_held(amap->am_lock)"))
;
846
847 /*
848 * map in the backpages and frontpages we found in the amap in hopes
849 * of preventing future faults. we also init the pages[] array as
850 * we go.
851 */
852 currva = flt->startva;
853 shadowed = FALSE0;
854 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE(1 << 12)) {
855 /*
856 * dont play with VAs that are already mapped
857 * except for center)
858 */
859 if (lcv != flt->centeridx &&
860 pmap_extract(ufi->orig_map->pmap, currva, &pa)) {
861 pages[lcv] = PGO_DONTCARE((struct vm_page *) -1L);
862 continue;
863 }
864
865 /*
866 * unmapped or center page. check if any anon at this level.
867 */
868 if (amap == NULL((void *)0) || anons[lcv] == NULL((void *)0)) {
869 pages[lcv] = NULL((void *)0);
870 continue;
871 }
872
873 /*
874 * check for present page and map if possible.
875 */
876 pages[lcv] = PGO_DONTCARE((struct vm_page *) -1L);
877 if (lcv == flt->centeridx) { /* save center for later! */
878 shadowed = TRUE1;
879 continue;
880 }
881 anon = anons[lcv];
882 KASSERT(anon->an_lock == amap->am_lock)((anon->an_lock == amap->am_lock) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 882, "anon->an_lock == amap->am_lock"
))
;
883 if (anon->an_page &&
884 (anon->an_page->pg_flags & (PG_RELEASED0x00000020|PG_BUSY0x00000001)) == 0) {
885 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
886 uvm_pageactivate(anon->an_page); /* reactivate */
887 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
888 counters_inc(uvmexp_counters, flt_namap);
889
890 /*
891 * Since this isn't the page that's actually faulting,
892 * ignore pmap_enter() failures; it's not critical
893 * that we enter these right now.
894 */
895 (void) pmap_enter(ufi->orig_map->pmap, currva,
896 VM_PAGE_TO_PHYS(anon->an_page)((anon->an_page)->phys_addr) | flt->pa_flags,
897 (anon->an_ref > 1) ?
898 (flt->enter_prot & ~PROT_WRITE0x02) : flt->enter_prot,
899 PMAP_CANFAIL0x00000020 |
900 (VM_MAPENT_ISWIRED(ufi->entry)((ufi->entry)->wired_count != 0) ? PMAP_WIRED0x00000010 : 0));
901 }
902 }
903 if (flt->npages > 1)
904 pmap_update(ufi->orig_map->pmap);
905
906 return shadowed;
907}
908
909/*
910 * uvm_fault_upper: handle upper fault.
911 *
912 * 1. acquire anon lock.
913 * 2. get anon. let uvmfault_anonget do the dirty work.
914 * 3. if COW, promote data to new anon
915 * 4. enter h/w mapping
916 */
917int
918uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
919 struct vm_anon **anons, vm_fault_t fault_type)
920{
921 struct vm_amap *amap = ufi->entry->aref.ar_amap;
922 struct vm_anon *oanon, *anon = anons[flt->centeridx];
923 struct vm_page *pg = NULL((void *)0);
924 int error, ret;
925
926 /* locked: maps(read), amap, anon */
927 KASSERT(rw_write_held(amap->am_lock))((rw_write_held(amap->am_lock)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 927, "rw_write_held(amap->am_lock)"
))
;
928 KASSERT(anon->an_lock == amap->am_lock)((anon->an_lock == amap->am_lock) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 928, "anon->an_lock == amap->am_lock"
))
;
929
930 /*
931 * no matter if we have case 1A or case 1B we are going to need to
932 * have the anon's memory resident. ensure that now.
933 */
934 /*
935 * let uvmfault_anonget do the dirty work.
936 * if it fails (!OK) it will unlock everything for us.
937 * if it succeeds, locks are still valid and locked.
938 * also, if it is OK, then the anon's page is on the queues.
939 * if the page is on loan from a uvm_object, then anonget will
940 * lock that object for us if it does not fail.
941 */
942 error = uvmfault_anonget(ufi, amap, anon);
943 switch (error) {
944 case VM_PAGER_OK0:
945 break;
946
947 case VM_PAGER_REFAULT7:
948 return ERESTART-1;
949
950 case VM_PAGER_ERROR4:
951 /*
952 * An error occurred while trying to bring in the
953 * page -- this is the only error we return right
954 * now.
955 */
956 return EACCES13; /* XXX */
957 default:
958#ifdef DIAGNOSTIC1
959 panic("uvm_fault: uvmfault_anonget -> %d", error);
960#else
961 return EACCES13;
962#endif
963 }
964
965 KASSERT(rw_write_held(amap->am_lock))((rw_write_held(amap->am_lock)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 965, "rw_write_held(amap->am_lock)"
))
;
966 KASSERT(anon->an_lock == amap->am_lock)((anon->an_lock == amap->am_lock) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 966, "anon->an_lock == amap->am_lock"
))
;
967
968 /*
969 * if we are case 1B then we will need to allocate a new blank
970 * anon to transfer the data into. note that we have a lock
971 * on anon, so no one can busy or release the page until we are done.
972 * also note that the ref count can't drop to zero here because
973 * it is > 1 and we are only dropping one ref.
974 *
975 * in the (hopefully very rare) case that we are out of RAM we
976 * will unlock, wait for more RAM, and refault.
977 *
978 * if we are out of anon VM we wait for RAM to become available.
979 */
980
981 if ((flt->access_type & PROT_WRITE0x02) != 0 && anon->an_ref > 1) {
982 counters_inc(uvmexp_counters, flt_acow);
983 oanon = anon; /* oanon = old */
984 anon = uvm_analloc();
985 if (anon) {
986 anon->an_lock = amap->am_lock;
987 pg = uvm_pagealloc(NULL((void *)0), 0, anon, 0);
988 }
989
990 /* check for out of RAM */
991 if (anon == NULL((void *)0) || pg == NULL((void *)0)) {
992 uvmfault_unlockall(ufi, amap, NULL((void *)0));
993 if (anon == NULL((void *)0))
994 counters_inc(uvmexp_counters, flt_noanon);
995 else {
996 anon->an_lock = NULL((void *)0);
997 anon->an_ref--;
998 uvm_anfree(anon)uvm_anfree_list((anon), ((void *)0));
999 counters_inc(uvmexp_counters, flt_noram);
1000 }
1001
1002 if (uvm_swapisfull())
1003 return ENOMEM12;
1004
1005 /* out of RAM, wait for more */
1006 if (anon == NULL((void *)0))
1007 uvm_anwait();
1008 else
1009 uvm_wait("flt_noram3");
1010 return ERESTART-1;
1011 }
1012
1013 /* got all resources, replace anon with nanon */
1014 uvm_pagecopy(oanon->an_page, pg); /* pg now !PG_CLEAN */
1015 /* un-busy! new page */
1016 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_BUSY0x00000001|PG_FAKE0x00000040);
1017 UVM_PAGE_OWN(pg, NULL);
1018 ret = amap_add(&ufi->entry->aref,
1019 ufi->orig_rvaddr - ufi->entry->start, anon, 1);
1020 KASSERT(ret == 0)((ret == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1020, "ret == 0"))
;
1021
1022 /* deref: can not drop to zero here by defn! */
1023 oanon->an_ref--;
1024
1025#if defined(MULTIPROCESSOR1) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW)
1026 /*
1027 * If there are multiple threads, either uvm or the
1028 * pmap has to make sure no threads see the old RO
1029 * mapping once any have seen the new RW mapping.
1030 * uvm does it by inserting the new mapping RO and
1031 * letting it fault again.
1032 * This is only a problem on MP systems.
1033 */
1034 if (P_HASSIBLING(curproc)((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc)->p_p->ps_threadcnt > 1)
) {
1035 flt->enter_prot &= ~PROT_WRITE0x02;
1036 flt->access_type &= ~PROT_WRITE0x02;
1037 }
1038#endif
1039
1040 /*
1041 * note: anon is _not_ locked, but we have the sole references
1042 * to in from amap.
1043 * thus, no one can get at it until we are done with it.
1044 */
1045 } else {
1046 counters_inc(uvmexp_counters, flt_anon);
1047 oanon = anon;
1048 pg = anon->an_page;
1049 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */
1050 flt->enter_prot = flt->enter_prot & ~PROT_WRITE0x02;
1051 }
1052
1053 /*
1054 * now map the page in .
1055 */
1056 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
1057 VM_PAGE_TO_PHYS(pg)((pg)->phys_addr) | flt->pa_flags, flt->enter_prot,
1058 flt->access_type | PMAP_CANFAIL0x00000020 | (flt->wired ? PMAP_WIRED0x00000010 : 0)) != 0) {
1059 /*
1060 * No need to undo what we did; we can simply think of
1061 * this as the pmap throwing away the mapping information.
1062 *
1063 * We do, however, have to go through the ReFault path,
1064 * as the map may change while we're asleep.
1065 */
1066 uvmfault_unlockall(ufi, amap, NULL((void *)0));
1067 if (uvm_swapisfull()) {
1068 /* XXX instrumentation */
1069 return ENOMEM12;
1070 }
1071 /* XXX instrumentation */
1072 uvm_wait("flt_pmfail1");
1073 return ERESTART-1;
1074 }
1075
1076 /*
1077 * ... update the page queues.
1078 */
1079 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1080
1081 if (fault_type == VM_FAULT_WIRE((vm_fault_t) 0x2)) {
1082 uvm_pagewire(pg);
1083 /*
1084 * since the now-wired page cannot be paged out,
1085 * release its swap resources for others to use.
1086 * since an anon with no swap cannot be PG_CLEAN,
1087 * clear its clean flag now.
1088 */
1089 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_CLEAN0x00000008);
1090 uvm_anon_dropswap(anon);
1091 } else {
1092 /* activate it */
1093 uvm_pageactivate(pg);
1094 }
1095
1096 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1097
1098 /*
1099 * done case 1! finish up by unlocking everything and returning success
1100 */
1101 uvmfault_unlockall(ufi, amap, NULL((void *)0));
1102 pmap_update(ufi->orig_map->pmap);
1103 return 0;
1104}
1105
1106/*
1107 * uvm_fault_lower_lookup: look up on-memory uobj pages.
1108 *
1109 * 1. get on-memory pages.
1110 * 2. if failed, give up (get only center page later).
1111 * 3. if succeeded, enter h/w mapping of neighbor pages.
1112 */
1113
1114struct vm_page *
1115uvm_fault_lower_lookup(
1116 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1117 struct vm_page **pages)
1118{
1119 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1120 struct vm_page *uobjpage = NULL((void *)0);
1121 int lcv, gotpages;
1122 vaddr_t currva;
1123
1124 rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL);
1125
1126 counters_inc(uvmexp_counters, flt_lget);
1127 gotpages = flt->npages;
1128 (void) uobj->pgops->pgo_get(uobj,
1129 ufi->entry->offset + (flt->startva - ufi->entry->start),
1130 pages, &gotpages, flt->centeridx,
1131 flt->access_type & MASK(ufi->entry)((((ufi->entry)->etype & 0x0004) != 0) ? ~0x02 : (0x01
| 0x02 | 0x04))
, ufi->entry->advice,
1132 PGO_LOCKED0x040);
1133
1134 /*
1135 * check for pages to map, if we got any
1136 */
1137 if (gotpages == 0) {
1138 return NULL((void *)0);
1139 }
1140
1141 currva = flt->startva;
1142 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE(1 << 12)) {
1143 if (pages[lcv] == NULL((void *)0) ||
1144 pages[lcv] == PGO_DONTCARE((struct vm_page *) -1L))
1145 continue;
1146
1147 KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0)(((pages[lcv]->pg_flags & 0x00000020) == 0) ? (void)0 :
__assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 1147
, "(pages[lcv]->pg_flags & PG_RELEASED) == 0"))
;
1148
1149 /*
1150 * if center page is resident and not
1151 * PG_BUSY, then pgo_get made it PG_BUSY
1152 * for us and gave us a handle to it.
1153 * remember this page as "uobjpage."
1154 * (for later use).
1155 */
1156 if (lcv == flt->centeridx) {
1157 uobjpage = pages[lcv];
1158 continue;
1159 }
1160
1161 /*
1162 * note: calling pgo_get with locked data
1163 * structures returns us pages which are
1164 * neither busy nor released, so we don't
1165 * need to check for this. we can just
1166 * directly enter the page (after moving it
1167 * to the head of the active queue [useful?]).
1168 */
1169
1170 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1171 uvm_pageactivate(pages[lcv]); /* reactivate */
1172 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1173 counters_inc(uvmexp_counters, flt_nomap);
1174
1175 /*
1176 * Since this page isn't the page that's
1177 * actually faulting, ignore pmap_enter()
1178 * failures; it's not critical that we
1179 * enter these right now.
1180 */
1181 (void) pmap_enter(ufi->orig_map->pmap, currva,
1182 VM_PAGE_TO_PHYS(pages[lcv])((pages[lcv])->phys_addr) | flt->pa_flags,
1183 flt->enter_prot & MASK(ufi->entry)((((ufi->entry)->etype & 0x0004) != 0) ? ~0x02 : (0x01
| 0x02 | 0x04))
,
1184 PMAP_CANFAIL0x00000020 |
1185 (flt->wired ? PMAP_WIRED0x00000010 : 0));
1186
1187 /*
1188 * NOTE: page can't be PG_WANTED because
1189 * we've held the lock the whole time
1190 * we've had the handle.
1191 */
1192 atomic_clearbits_intx86_atomic_clearbits_u32(&pages[lcv]->pg_flags, PG_BUSY0x00000001);
1193 UVM_PAGE_OWN(pages[lcv], NULL);
1194 }
1195 pmap_update(ufi->orig_map->pmap);
1196
1197 return uobjpage;
1198}
1199
1200/*
1201 * uvm_fault_lower: handle lower fault.
1202 *
1203 */
1204int
1205uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1206 struct vm_page **pages, vm_fault_t fault_type)
1207{
1208 struct vm_amap *amap = ufi->entry->aref.ar_amap;
1209 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1210 boolean_t promote, locked;
1211 int result;
1212 struct vm_page *uobjpage, *pg = NULL((void *)0);
1213 struct vm_anon *anon = NULL((void *)0);
1214 voff_t uoff;
1215
1216 /*
1217 * now, if the desired page is not shadowed by the amap and we have
1218 * a backing object that does not have a special fault routine, then
1219 * we ask (with pgo_get) the object for resident pages that we care
1220 * about and attempt to map them in. we do not let pgo_get block
1221 * (PGO_LOCKED).
1222 */
1223 if (uobj == NULL((void *)0)) {
1224 /* zero fill; don't care neighbor pages */
1225 uobjpage = NULL((void *)0);
1226 } else {
1227 uobjpage = uvm_fault_lower_lookup(ufi, flt, pages);
1228 }
1229
1230 /*
1231 * note that at this point we are done with any front or back pages.
1232 * we are now going to focus on the center page (i.e. the one we've
1233 * faulted on). if we have faulted on the bottom (uobj)
1234 * layer [i.e. case 2] and the page was both present and available,
1235 * then we've got a pointer to it as "uobjpage" and we've already
1236 * made it BUSY.
1237 */
1238
1239 /*
1240 * locked:
1241 */
1242 KASSERT(amap == NULL ||((amap == ((void *)0) || rw_write_held(amap->am_lock)) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1243, "amap == NULL || rw_write_held(amap->am_lock)"))
1243 rw_write_held(amap->am_lock))((amap == ((void *)0) || rw_write_held(amap->am_lock)) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1243, "amap == NULL || rw_write_held(amap->am_lock)"))
;
1244 KASSERT(uobj == NULL ||((uobj == ((void *)0) || rw_write_held(uobj->vmobjlock)) ?
(void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1245, "uobj == NULL || rw_write_held(uobj->vmobjlock)"))
1245 rw_write_held(uobj->vmobjlock))((uobj == ((void *)0) || rw_write_held(uobj->vmobjlock)) ?
(void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1245, "uobj == NULL || rw_write_held(uobj->vmobjlock)"))
;
1246
1247 /*
1248 * note that uobjpage can not be PGO_DONTCARE at this point. we now
1249 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
1250 * have a backing object, check and see if we are going to promote
1251 * the data up to an anon during the fault.
1252 */
1253 if (uobj == NULL((void *)0)) {
1254 uobjpage = PGO_DONTCARE((struct vm_page *) -1L);
1255 promote = TRUE1; /* always need anon here */
1256 } else {
1257 KASSERT(uobjpage != PGO_DONTCARE)((uobjpage != ((struct vm_page *) -1L)) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 1257, "uobjpage != PGO_DONTCARE"
))
;
1258 promote = (flt->access_type & PROT_WRITE0x02) &&
1259 UVM_ET_ISCOPYONWRITE(ufi->entry)(((ufi->entry)->etype & 0x0004) != 0);
1260 }
1261
1262 /*
1263 * if uobjpage is not null then we do not need to do I/O to get the
1264 * uobjpage.
1265 *
1266 * if uobjpage is null, then we need to ask the pager to
1267 * get the data for us. once we have the data, we need to reverify
1268 * the state the world. we are currently not holding any resources.
1269 */
1270 if (uobjpage) {
1271 /* update rusage counters */
1272 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_ru.ru_minflt++;
1273 } else {
1274 int gotpages;
1275
1276 /* update rusage counters */
1277 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_ru.ru_majflt++;
1278
1279 uvmfault_unlockall(ufi, amap, NULL((void *)0));
1280
1281 counters_inc(uvmexp_counters, flt_get);
1282 gotpages = 1;
1283 uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset;
1284 result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
1285 0, flt->access_type & MASK(ufi->entry)((((ufi->entry)->etype & 0x0004) != 0) ? ~0x02 : (0x01
| 0x02 | 0x04))
, ufi->entry->advice,
1286 PGO_SYNCIO0x002);
1287
1288 /*
1289 * recover from I/O
1290 */
1291 if (result != VM_PAGER_OK0) {
1292 KASSERT(result != VM_PAGER_PEND)((result != 3) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1292, "result != VM_PAGER_PEND"))
;
1293
1294 if (result == VM_PAGER_AGAIN5) {
1295 tsleep_nsec(&nowake, PVM4, "fltagain2",
1296 MSEC_TO_NSEC(5));
1297 return ERESTART-1;
1298 }
1299
1300 if (!UVM_ET_ISNOFAULT(ufi->entry)(((ufi->entry)->etype & 0x0020) != 0))
1301 return (EIO5);
1302
1303 uobjpage = PGO_DONTCARE((struct vm_page *) -1L);
1304 uobj = NULL((void *)0);
1305 promote = TRUE1;
1306 }
1307
1308 /* re-verify the state of the world. */
1309 locked = uvmfault_relock(ufi);
1310 if (locked && amap != NULL((void *)0))
1311 amap_lock(amap)rw_enter_write((amap)->am_lock);
1312
1313 /* might be changed */
1314 if (uobjpage != PGO_DONTCARE((struct vm_page *) -1L)) {
1315 uobj = uobjpage->uobject;
1316 rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL);
1317 }
1318
1319 /*
1320 * Re-verify that amap slot is still free. if there is
1321 * a problem, we clean up.
1322 */
1323 if (locked && amap && amap_lookup(&ufi->entry->aref,
1324 ufi->orig_rvaddr - ufi->entry->start)) {
1325 if (locked)
1326 uvmfault_unlockall(ufi, amap, NULL((void *)0));
1327 locked = FALSE0;
1328 }
1329
1330 /* didn't get the lock? release the page and retry. */
1331 if (locked == FALSE0 && uobjpage != PGO_DONTCARE((struct vm_page *) -1L)) {
1332 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1333 /* make sure it is in queues */
1334 uvm_pageactivate(uobjpage);
1335 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1336
1337 if (uobjpage->pg_flags & PG_WANTED0x00000002)
1338 /* still holding object lock */
1339 wakeup(uobjpage);
1340 atomic_clearbits_intx86_atomic_clearbits_u32(&uobjpage->pg_flags,
1341 PG_BUSY0x00000001|PG_WANTED0x00000002);
1342 UVM_PAGE_OWN(uobjpage, NULL);
1343 }
1344
1345 if (locked == FALSE0) {
1346 if (uobjpage != PGO_DONTCARE((struct vm_page *) -1L))
1347 rw_exit(uobj->vmobjlock);
1348 return ERESTART-1;
1349 }
1350
1351 /*
1352 * we have the data in uobjpage which is PG_BUSY
1353 */
1354 }
1355
1356 /*
1357 * notes:
1358 * - at this point uobjpage can not be NULL
1359 * - at this point uobjpage could be PG_WANTED (handle later)
1360 */
1361 if (promote == FALSE0) {
1362 /*
1363 * we are not promoting. if the mapping is COW ensure that we
1364 * don't give more access than we should (e.g. when doing a read
1365 * fault on a COPYONWRITE mapping we want to map the COW page in
1366 * R/O even though the entry protection could be R/W).
1367 *
1368 * set "pg" to the page we want to map in (uobjpage, usually)
1369 */
1370 counters_inc(uvmexp_counters, flt_obj);
1371 if (UVM_ET_ISCOPYONWRITE(ufi->entry)(((ufi->entry)->etype & 0x0004) != 0))
1372 flt->enter_prot &= ~PROT_WRITE0x02;
1373 pg = uobjpage; /* map in the actual object */
1374
1375 /* assert(uobjpage != PGO_DONTCARE) */
1376
1377 /*
1378 * we are faulting directly on the page.
1379 */
1380 } else {
1381 /*
1382 * if we are going to promote the data to an anon we
1383 * allocate a blank anon here and plug it into our amap.
1384 */
1385#ifdef DIAGNOSTIC1
1386 if (amap == NULL((void *)0))
1387 panic("uvm_fault: want to promote data, but no anon");
1388#endif
1389
1390 anon = uvm_analloc();
1391 if (anon) {
1392 /*
1393 * In `Fill in data...' below, if
1394 * uobjpage == PGO_DONTCARE, we want
1395 * a zero'd, dirty page, so have
1396 * uvm_pagealloc() do that for us.
1397 */
1398 anon->an_lock = amap->am_lock;
1399 pg = uvm_pagealloc(NULL((void *)0), 0, anon,
1400 (uobjpage == PGO_DONTCARE((struct vm_page *) -1L)) ? UVM_PGA_ZERO0x0002 : 0);
1401 }
1402
1403 /*
1404 * out of memory resources?
1405 */
1406 if (anon == NULL((void *)0) || pg == NULL((void *)0)) {
1407 /*
1408 * arg! must unbusy our page and fail or sleep.
1409 */
1410 if (uobjpage != PGO_DONTCARE((struct vm_page *) -1L)) {
1411 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1412 uvm_pageactivate(uobjpage);
1413 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1414
1415 if (uobjpage->pg_flags & PG_WANTED0x00000002)
1416 wakeup(uobjpage);
1417 atomic_clearbits_intx86_atomic_clearbits_u32(&uobjpage->pg_flags,
1418 PG_BUSY0x00000001|PG_WANTED0x00000002);
1419 UVM_PAGE_OWN(uobjpage, NULL);
1420 }
1421
1422 /* unlock and fail ... */
1423 uvmfault_unlockall(ufi, amap, uobj);
1424 if (anon == NULL((void *)0))
1425 counters_inc(uvmexp_counters, flt_noanon);
1426 else {
1427 anon->an_lock = NULL((void *)0);
1428 anon->an_ref--;
1429 uvm_anfree(anon)uvm_anfree_list((anon), ((void *)0));
1430 counters_inc(uvmexp_counters, flt_noram);
1431 }
1432
1433 if (uvm_swapisfull())
1434 return (ENOMEM12);
1435
1436 /* out of RAM, wait for more */
1437 if (anon == NULL((void *)0))
1438 uvm_anwait();
1439 else
1440 uvm_wait("flt_noram5");
1441 return ERESTART-1;
1442 }
1443
1444 /*
1445 * fill in the data
1446 */
1447 if (uobjpage != PGO_DONTCARE((struct vm_page *) -1L)) {
1448 counters_inc(uvmexp_counters, flt_prcopy);
1449 /* copy page [pg now dirty] */
1450 uvm_pagecopy(uobjpage, pg);
1451
1452 /*
1453 * promote to shared amap? make sure all sharing
1454 * procs see it
1455 */
1456 if ((amap_flags(amap)((amap)->am_flags) & AMAP_SHARED0x1) != 0) {
1457 pmap_page_protect(uobjpage, PROT_NONE0x00);
1458 }
1459#if defined(MULTIPROCESSOR1) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW)
1460 /*
1461 * Otherwise:
1462 * If there are multiple threads, either uvm or the
1463 * pmap has to make sure no threads see the old RO
1464 * mapping once any have seen the new RW mapping.
1465 * uvm does it here by forcing it to PROT_NONE before
1466 * inserting the new mapping.
1467 */
1468 else if (P_HASSIBLING(curproc)((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc)->p_p->ps_threadcnt > 1)
) {
1469 pmap_page_protect(uobjpage, PROT_NONE0x00);
1470 }
1471#endif
1472
1473 /* dispose of uobjpage. drop handle to uobj as well. */
1474 if (uobjpage->pg_flags & PG_WANTED0x00000002)
1475 wakeup(uobjpage);
1476 atomic_clearbits_intx86_atomic_clearbits_u32(&uobjpage->pg_flags,
1477 PG_BUSY0x00000001|PG_WANTED0x00000002);
1478 UVM_PAGE_OWN(uobjpage, NULL);
1479 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1480 uvm_pageactivate(uobjpage);
1481 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1482 rw_exit(uobj->vmobjlock);
1483 uobj = NULL((void *)0);
1484 } else {
1485 counters_inc(uvmexp_counters, flt_przero);
1486 /*
1487 * Page is zero'd and marked dirty by uvm_pagealloc()
1488 * above.
1489 */
1490 }
1491
1492 if (amap_add(&ufi->entry->aref,
1493 ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
1494 uvmfault_unlockall(ufi, amap, uobj);
1495 uvm_anfree(anon)uvm_anfree_list((anon), ((void *)0));
1496 counters_inc(uvmexp_counters, flt_noamap);
1497
1498 if (uvm_swapisfull())
1499 return (ENOMEM12);
1500
1501 amap_populate(&ufi->entry->aref,
1502 ufi->orig_rvaddr - ufi->entry->start);
1503 return ERESTART-1;
1504 }
1505 }
1506
1507 /* note: pg is either the uobjpage or the new page in the new anon */
1508 /*
1509 * all resources are present. we can now map it in and free our
1510 * resources.
1511 */
1512 if (amap == NULL((void *)0))
1513 KASSERT(anon == NULL)((anon == ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1513, "anon == NULL"))
;
1514 else {
1515 KASSERT(rw_write_held(amap->am_lock))((rw_write_held(amap->am_lock)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 1515, "rw_write_held(amap->am_lock)"
))
;
1516 KASSERT(anon == NULL || anon->an_lock == amap->am_lock)((anon == ((void *)0) || anon->an_lock == amap->am_lock
) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1516, "anon == NULL || anon->an_lock == amap->am_lock"
))
;
1517 }
1518 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
1519 VM_PAGE_TO_PHYS(pg)((pg)->phys_addr) | flt->pa_flags, flt->enter_prot,
1520 flt->access_type | PMAP_CANFAIL0x00000020 | (flt->wired ? PMAP_WIRED0x00000010 : 0)) != 0) {
1521 /*
1522 * No need to undo what we did; we can simply think of
1523 * this as the pmap throwing away the mapping information.
1524 *
1525 * We do, however, have to go through the ReFault path,
1526 * as the map may change while we're asleep.
1527 */
1528 if (pg->pg_flags & PG_WANTED0x00000002)
1529 wakeup(pg);
1530
1531 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_BUSY0x00000001|PG_FAKE0x00000040|PG_WANTED0x00000002);
1532 UVM_PAGE_OWN(pg, NULL);
1533 uvmfault_unlockall(ufi, amap, uobj);
1534 if (uvm_swapisfull()) {
1535 /* XXX instrumentation */
1536 return (ENOMEM12);
1537 }
1538 /* XXX instrumentation */
1539 uvm_wait("flt_pmfail2");
1540 return ERESTART-1;
1541 }
1542
1543 if (fault_type == VM_FAULT_WIRE((vm_fault_t) 0x2)) {
1544 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1545 uvm_pagewire(pg);
1546 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1547 if (pg->pg_flags & PQ_AOBJ0x00200000) {
1548 /*
1549 * since the now-wired page cannot be paged out,
1550 * release its swap resources for others to use.
1551 * since an aobj page with no swap cannot be clean,
1552 * mark it dirty now.
1553 *
1554 * use pg->uobject here. if the page is from a
1555 * tmpfs vnode, the pages are backed by its UAO and
1556 * not the vnode.
1557 */
1558 KASSERT(uobj != NULL)((uobj != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1558, "uobj != NULL"))
;
1559 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock)((uobj->vmobjlock == pg->uobject->vmobjlock) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c", 1559
, "uobj->vmobjlock == pg->uobject->vmobjlock"))
;
1560 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_CLEAN0x00000008);
1561 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT12);
1562 }
1563 } else {
1564 /* activate it */
1565 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1566 uvm_pageactivate(pg);
1567 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1568 }
1569
1570 if (pg->pg_flags & PG_WANTED0x00000002)
1571 wakeup(pg);
1572
1573 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, PG_BUSY0x00000001|PG_FAKE0x00000040|PG_WANTED0x00000002);
1574 UVM_PAGE_OWN(pg, NULL);
1575 uvmfault_unlockall(ufi, amap, uobj);
1576 pmap_update(ufi->orig_map->pmap);
1577
1578 return (0);
1579}
1580
1581
1582/*
1583 * uvm_fault_wire: wire down a range of virtual addresses in a map.
1584 *
1585 * => map may be read-locked by caller, but MUST NOT be write-locked.
1586 * => if map is read-locked, any operations which may cause map to
1587 * be write-locked in uvm_fault() must be taken care of by
1588 * the caller. See uvm_map_pageable().
1589 */
1590int
1591uvm_fault_wire(vm_map_t map, vaddr_t start, vaddr_t end, vm_prot_t access_type)
1592{
1593 vaddr_t va;
1594 int rv;
1595
1596 /*
1597 * now fault it in a page at a time. if the fault fails then we have
1598 * to undo what we have done. note that in uvm_fault PROT_NONE
1599 * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
1600 */
1601 for (va = start ; va < end ; va += PAGE_SIZE(1 << 12)) {
1602 rv = uvm_fault(map, va, VM_FAULT_WIRE((vm_fault_t) 0x2), access_type);
1603 if (rv) {
1604 if (va != start) {
1605 uvm_fault_unwire(map, start, va);
1606 }
1607 return (rv);
1608 }
1609 }
1610
1611 return (0);
1612}
1613
1614/*
1615 * uvm_fault_unwire(): unwire range of virtual space.
1616 */
1617void
1618uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
1619{
1620
1621 vm_map_lock_read(map)vm_map_lock_read_ln(map, "/usr/src/sys/uvm/uvm_fault.c", 1621
)
;
1622 uvm_fault_unwire_locked(map, start, end);
1623 vm_map_unlock_read(map)vm_map_unlock_read_ln(map, "/usr/src/sys/uvm/uvm_fault.c", 1623
)
;
1624}
1625
1626/*
1627 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
1628 *
1629 * => map must be at least read-locked.
1630 */
1631void
1632uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
1633{
1634 vm_map_entry_t entry, oentry = NULL((void *)0), next;
1635 pmap_t pmap = vm_map_pmap(map)((map)->pmap);
1636 vaddr_t va;
1637 paddr_t pa;
1638 struct vm_page *pg;
1639
1640 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0)(((map->flags & 0x02) == 0) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 1640, "(map->flags & VM_MAP_INTRSAFE) == 0"
))
;
1641 vm_map_assert_anylock(map)vm_map_assert_anylock_ln(map, "/usr/src/sys/uvm/uvm_fault.c",
1641)
;
1642
1643 /*
1644 * we assume that the area we are unwiring has actually been wired
1645 * in the first place. this means that we should be able to extract
1646 * the PAs from the pmap.
1647 */
1648
1649 /*
1650 * find the beginning map entry for the region.
1651 */
1652 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map))((start >= ((map)->min_offset) && end <= ((map
)->max_offset)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_fault.c"
, 1652, "start >= vm_map_min(map) && end <= vm_map_max(map)"
))
;
1653 if (uvm_map_lookup_entry(map, start, &entry) == FALSE0)
1654 panic("uvm_fault_unwire_locked: address not in map");
1655
1656 for (va = start; va < end ; va += PAGE_SIZE(1 << 12)) {
1657 if (pmap_extract(pmap, va, &pa) == FALSE0)
1658 continue;
1659
1660 /*
1661 * find the map entry for the current address.
1662 */
1663 KASSERT(va >= entry->start)((va >= entry->start) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_fault.c", 1663, "va >= entry->start"
))
;
1664 while (entry && va >= entry->end) {
1665 next = RBT_NEXT(uvm_map_addr, entry)uvm_map_addr_RBT_NEXT(entry);
1666 entry = next;
1667 }
1668
1669 if (entry == NULL((void *)0))
1670 return;
1671 if (va < entry->start)
1672 continue;
1673
1674 /*
1675 * lock it.
1676 */
1677 if (entry != oentry) {
1678 if (oentry != NULL((void *)0)) {
1679 uvm_map_unlock_entry(oentry);
1680 }
1681 uvm_map_lock_entry(entry);
1682 oentry = entry;
1683 }
1684
1685 /*
1686 * if the entry is no longer wired, tell the pmap.
1687 */
1688 if (VM_MAPENT_ISWIRED(entry)((entry)->wired_count != 0) == 0)
1689 pmap_unwire(pmap, va);
1690
1691 pg = PHYS_TO_VM_PAGE(pa);
1692 if (pg) {
1693 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
1694 uvm_pageunwire(pg);
1695 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
1696 }
1697 }
1698
1699 if (oentry != NULL((void *)0)) {
1700 uvm_map_unlock_entry(oentry);
1701 }
1702}
1703
1704/*
1705 * uvmfault_unlockmaps: unlock the maps
1706 */
1707void
1708uvmfault_unlockmaps(struct uvm_faultinfo *ufi, boolean_t write_locked)
1709{
1710 /*
1711 * ufi can be NULL when this isn't really a fault,
1712 * but merely paging in anon data.
1713 */
1714 if (ufi == NULL((void *)0)) {
1715 return;
1716 }
1717
1718 uvmfault_update_stats(ufi);
1719 if (write_locked) {
1720 vm_map_unlock(ufi->map)vm_map_unlock_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c",
1720)
;
1721 } else {
1722 vm_map_unlock_read(ufi->map)vm_map_unlock_read_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c"
, 1722)
;
1723 }
1724}
1725
1726/*
1727 * uvmfault_unlockall: unlock everything passed in.
1728 *
1729 * => maps must be read-locked (not write-locked).
1730 */
1731void
1732uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
1733 struct uvm_object *uobj)
1734{
1735 if (uobj)
1736 rw_exit(uobj->vmobjlock);
1737 if (amap != NULL((void *)0))
1738 amap_unlock(amap)rw_exit_write((amap)->am_lock);
1739 uvmfault_unlockmaps(ufi, FALSE0);
1740}
1741
1742/*
1743 * uvmfault_lookup: lookup a virtual address in a map
1744 *
1745 * => caller must provide a uvm_faultinfo structure with the IN
1746 * params properly filled in
1747 * => we will lookup the map entry (handling submaps) as we go
1748 * => if the lookup is a success we will return with the maps locked
1749 * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
1750 * get a read lock.
1751 * => note that submaps can only appear in the kernel and they are
1752 * required to use the same virtual addresses as the map they
1753 * are referenced by (thus address translation between the main
1754 * map and the submap is unnecessary).
1755 */
1756
1757boolean_t
1758uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
1759{
1760 vm_map_t tmpmap;
1761
1762 /*
1763 * init ufi values for lookup.
1764 */
1765 ufi->map = ufi->orig_map;
1766 ufi->size = ufi->orig_size;
1767
1768 /*
1769 * keep going down levels until we are done. note that there can
1770 * only be two levels so we won't loop very long.
1771 */
1772 while (1) {
1773 if (ufi->orig_rvaddr < ufi->map->min_offset ||
1774 ufi->orig_rvaddr >= ufi->map->max_offset)
1775 return FALSE0;
1776
1777 /* lock map */
1778 if (write_lock) {
1779 vm_map_lock(ufi->map)vm_map_lock_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c", 1779
)
;
1780 } else {
1781 vm_map_lock_read(ufi->map)vm_map_lock_read_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c"
, 1781)
;
1782 }
1783
1784 /* lookup */
1785 if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
1786 &ufi->entry)) {
1787 uvmfault_unlockmaps(ufi, write_lock);
1788 return FALSE0;
1789 }
1790
1791 /* reduce size if necessary */
1792 if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
1793 ufi->size = ufi->entry->end - ufi->orig_rvaddr;
1794
1795 /*
1796 * submap? replace map with the submap and lookup again.
1797 * note: VAs in submaps must match VAs in main map.
1798 */
1799 if (UVM_ET_ISSUBMAP(ufi->entry)(((ufi->entry)->etype & 0x0002) != 0)) {
1800 tmpmap = ufi->entry->object.sub_map;
1801 uvmfault_unlockmaps(ufi, write_lock);
1802 ufi->map = tmpmap;
1803 continue;
1804 }
1805
1806 /*
1807 * got it!
1808 */
1809 ufi->mapv = ufi->map->timestamp;
1810 return TRUE1;
1811
1812 } /* while loop */
1813
1814 /*NOTREACHED*/
1815}
1816
1817/*
1818 * uvmfault_relock: attempt to relock the same version of the map
1819 *
1820 * => fault data structures should be unlocked before calling.
1821 * => if a success (TRUE) maps will be locked after call.
1822 */
1823boolean_t
1824uvmfault_relock(struct uvm_faultinfo *ufi)
1825{
1826 /*
1827 * ufi can be NULL when this isn't really a fault,
1828 * but merely paging in anon data.
1829 */
1830 if (ufi == NULL((void *)0)) {
1831 return TRUE1;
1832 }
1833
1834 counters_inc(uvmexp_counters, flt_relck);
1835
1836 /*
1837 * relock map. fail if version mismatch (in which case nothing
1838 * gets locked).
1839 */
1840 vm_map_lock_read(ufi->map)vm_map_lock_read_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c"
, 1840)
;
1841 if (ufi->mapv != ufi->map->timestamp) {
1842 vm_map_unlock_read(ufi->map)vm_map_unlock_read_ln(ufi->map, "/usr/src/sys/uvm/uvm_fault.c"
, 1842)
;
1843 return FALSE0;
1844 }
1845
1846 counters_inc(uvmexp_counters, flt_relckok);
1847 return TRUE1; /* got it! */
1848}