File: | uvm/uvm_pager.c |
Warning: | line 763, column 6 Access to field 'pg_flags' results in a dereference of an undefined pointer value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: uvm_pager.c,v 1.77 2021/12/15 12:53:53 mpi Exp $ */ | |||
2 | /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ | |||
3 | ||||
4 | /* | |||
5 | * Copyright (c) 1997 Charles D. Cranor and Washington University. | |||
6 | * All rights reserved. | |||
7 | * | |||
8 | * Redistribution and use in source and binary forms, with or without | |||
9 | * modification, are permitted provided that the following conditions | |||
10 | * are met: | |||
11 | * 1. Redistributions of source code must retain the above copyright | |||
12 | * notice, this list of conditions and the following disclaimer. | |||
13 | * 2. Redistributions in binary form must reproduce the above copyright | |||
14 | * notice, this list of conditions and the following disclaimer in the | |||
15 | * documentation and/or other materials provided with the distribution. | |||
16 | * | |||
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |||
18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||
19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |||
20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |||
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |||
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
27 | * | |||
28 | * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp | |||
29 | */ | |||
30 | ||||
31 | /* | |||
32 | * uvm_pager.c: generic functions used to assist the pagers. | |||
33 | */ | |||
34 | ||||
35 | #include <sys/param.h> | |||
36 | #include <sys/systm.h> | |||
37 | #include <sys/malloc.h> | |||
38 | #include <sys/pool.h> | |||
39 | #include <sys/buf.h> | |||
40 | #include <sys/atomic.h> | |||
41 | ||||
42 | #include <uvm/uvm.h> | |||
43 | ||||
44 | struct pool *uvm_aiobuf_pool; | |||
45 | ||||
46 | const struct uvm_pagerops *uvmpagerops[] = { | |||
47 | &aobj_pager, | |||
48 | &uvm_deviceops, | |||
49 | &uvm_vnodeops, | |||
50 | }; | |||
51 | ||||
52 | /* | |||
53 | * the pager map: provides KVA for I/O | |||
54 | * | |||
55 | * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of | |||
56 | * MAXBSIZE bytes. | |||
57 | * | |||
58 | * The number of uvm_pseg instances is dynamic using an array segs. | |||
59 | * At most UVM_PSEG_COUNT instances can exist. | |||
60 | * | |||
61 | * psegs[0] always exists (so that the pager can always map in pages). | |||
62 | * psegs[0] element 0 is always reserved for the pagedaemon. | |||
63 | * | |||
64 | * Any other pseg is automatically created when no space is available | |||
65 | * and automatically destroyed when it is no longer in use. | |||
66 | */ | |||
67 | #define MAX_PAGER_SEGS16 16 | |||
68 | #define PSEG_NUMSEGS((16 * 1024 * 1024) / 16 / (64 * 1024)) (PAGER_MAP_SIZE(16 * 1024 * 1024) / MAX_PAGER_SEGS16 / MAXBSIZE(64 * 1024)) | |||
69 | struct uvm_pseg { | |||
70 | /* Start of virtual space; 0 if not inited. */ | |||
71 | vaddr_t start; | |||
72 | /* Bitmap of the segments in use in this pseg. */ | |||
73 | int use; | |||
74 | }; | |||
75 | struct mutex uvm_pseg_lck; | |||
76 | struct uvm_pseg psegs[PSEG_NUMSEGS((16 * 1024 * 1024) / 16 / (64 * 1024))]; | |||
77 | ||||
78 | #define UVM_PSEG_FULL(pseg)((pseg)->use == (1 << 16) - 1) ((pseg)->use == (1 << MAX_PAGER_SEGS16) - 1) | |||
79 | #define UVM_PSEG_EMPTY(pseg)((pseg)->use == 0) ((pseg)->use == 0) | |||
80 | #define UVM_PSEG_INUSE(pseg,id)(((pseg)->use & (1 << (id))) != 0) (((pseg)->use & (1 << (id))) != 0) | |||
81 | ||||
82 | void uvm_pseg_init(struct uvm_pseg *); | |||
83 | vaddr_t uvm_pseg_get(int); | |||
84 | void uvm_pseg_release(vaddr_t); | |||
85 | ||||
86 | /* | |||
87 | * uvm_pager_init: init pagers (at boot time) | |||
88 | */ | |||
89 | void | |||
90 | uvm_pager_init(void) | |||
91 | { | |||
92 | int lcv; | |||
93 | ||||
94 | /* init pager map */ | |||
95 | uvm_pseg_init(&psegs[0]); | |||
96 | mtx_init(&uvm_pseg_lck, IPL_VM)do { (void)(((void *)0)); (void)(0); __mtx_init((&uvm_pseg_lck ), ((((0xa)) > 0x0 && ((0xa)) < 0x9) ? 0x9 : (( 0xa)))); } while (0); | |||
97 | ||||
98 | /* init ASYNC I/O queue */ | |||
99 | TAILQ_INIT(&uvm.aio_done)do { (&uvm.aio_done)->tqh_first = ((void *)0); (&uvm .aio_done)->tqh_last = &(&uvm.aio_done)->tqh_first ; } while (0); | |||
100 | ||||
101 | /* call pager init functions */ | |||
102 | for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); | |||
103 | lcv++) { | |||
104 | if (uvmpagerops[lcv]->pgo_init) | |||
105 | uvmpagerops[lcv]->pgo_init(); | |||
106 | } | |||
107 | } | |||
108 | ||||
109 | /* | |||
110 | * Initialize a uvm_pseg. | |||
111 | * | |||
112 | * May fail, in which case seg->start == 0. | |||
113 | * | |||
114 | * Caller locks uvm_pseg_lck. | |||
115 | */ | |||
116 | void | |||
117 | uvm_pseg_init(struct uvm_pseg *pseg) | |||
118 | { | |||
119 | KASSERT(pseg->start == 0)((pseg->start == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c" , 119, "pseg->start == 0")); | |||
120 | KASSERT(pseg->use == 0)((pseg->use == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c" , 120, "pseg->use == 0")); | |||
121 | pseg->start = uvm_km_valloc_try(kernel_map, MAX_PAGER_SEGS16 * MAXBSIZE(64 * 1024)); | |||
122 | } | |||
123 | ||||
124 | /* | |||
125 | * Acquire a pager map segment. | |||
126 | * | |||
127 | * Returns a vaddr for paging. 0 on failure. | |||
128 | * | |||
129 | * Caller does not lock. | |||
130 | */ | |||
131 | vaddr_t | |||
132 | uvm_pseg_get(int flags) | |||
133 | { | |||
134 | int i; | |||
135 | struct uvm_pseg *pseg; | |||
136 | ||||
137 | /* | |||
138 | * XXX Prevent lock ordering issue in uvm_unmap_detach(). A real | |||
139 | * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach(). | |||
140 | * | |||
141 | * witness_checkorder() at witness_checkorder+0xba0 | |||
142 | * __mp_lock() at __mp_lock+0x5f | |||
143 | * uvm_unmap_detach() at uvm_unmap_detach+0xc5 | |||
144 | * uvm_map() at uvm_map+0x857 | |||
145 | * uvm_km_valloc_try() at uvm_km_valloc_try+0x65 | |||
146 | * uvm_pseg_get() at uvm_pseg_get+0x6f | |||
147 | * uvm_pagermapin() at uvm_pagermapin+0x45 | |||
148 | * uvn_io() at uvn_io+0xcf | |||
149 | * uvn_get() at uvn_get+0x156 | |||
150 | * uvm_fault_lower() at uvm_fault_lower+0x28a | |||
151 | * uvm_fault() at uvm_fault+0x1b3 | |||
152 | * upageflttrap() at upageflttrap+0x62 | |||
153 | */ | |||
154 | KERNEL_LOCK()_kernel_lock(); | |||
155 | mtx_enter(&uvm_pseg_lck); | |||
156 | ||||
157 | pager_seg_restart: | |||
158 | /* Find first pseg that has room. */ | |||
159 | for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS((16 * 1024 * 1024) / 16 / (64 * 1024))]; pseg++) { | |||
160 | if (UVM_PSEG_FULL(pseg)((pseg)->use == (1 << 16) - 1)) | |||
161 | continue; | |||
162 | ||||
163 | if (pseg->start == 0) { | |||
164 | /* Need initialization. */ | |||
165 | uvm_pseg_init(pseg); | |||
166 | if (pseg->start == 0) | |||
167 | goto pager_seg_fail; | |||
168 | } | |||
169 | ||||
170 | /* Keep index 0 reserved for pagedaemon. */ | |||
171 | if (pseg == &psegs[0] && curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc != uvm.pagedaemon_proc) | |||
172 | i = 1; | |||
173 | else | |||
174 | i = 0; | |||
175 | ||||
176 | for (; i < MAX_PAGER_SEGS16; i++) { | |||
177 | if (!UVM_PSEG_INUSE(pseg, i)(((pseg)->use & (1 << (i))) != 0)) { | |||
178 | pseg->use |= 1 << i; | |||
179 | mtx_leave(&uvm_pseg_lck); | |||
180 | KERNEL_UNLOCK()_kernel_unlock(); | |||
181 | return pseg->start + i * MAXBSIZE(64 * 1024); | |||
182 | } | |||
183 | } | |||
184 | } | |||
185 | ||||
186 | pager_seg_fail: | |||
187 | if ((flags & UVMPAGER_MAPIN_WAITOK0x01) != 0) { | |||
188 | msleep_nsec(&psegs, &uvm_pseg_lck, PVM4, "pagerseg", INFSLP0xffffffffffffffffULL); | |||
189 | goto pager_seg_restart; | |||
190 | } | |||
191 | ||||
192 | mtx_leave(&uvm_pseg_lck); | |||
193 | KERNEL_UNLOCK()_kernel_unlock(); | |||
194 | return 0; | |||
195 | } | |||
196 | ||||
197 | /* | |||
198 | * Release a pager map segment. | |||
199 | * | |||
200 | * Caller does not lock. | |||
201 | * | |||
202 | * Deallocates pseg if it is no longer in use. | |||
203 | */ | |||
204 | void | |||
205 | uvm_pseg_release(vaddr_t segaddr) | |||
206 | { | |||
207 | int id; | |||
208 | struct uvm_pseg *pseg; | |||
209 | vaddr_t va = 0; | |||
210 | ||||
211 | for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS((16 * 1024 * 1024) / 16 / (64 * 1024))]; pseg++) { | |||
212 | if (pseg->start <= segaddr && | |||
213 | segaddr < pseg->start + MAX_PAGER_SEGS16 * MAXBSIZE(64 * 1024)) | |||
214 | break; | |||
215 | } | |||
216 | KASSERT(pseg != &psegs[PSEG_NUMSEGS])((pseg != &psegs[((16 * 1024 * 1024) / 16 / (64 * 1024))] ) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c" , 216, "pseg != &psegs[PSEG_NUMSEGS]")); | |||
217 | ||||
218 | id = (segaddr - pseg->start) / MAXBSIZE(64 * 1024); | |||
219 | KASSERT(id >= 0 && id < MAX_PAGER_SEGS)((id >= 0 && id < 16) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pager.c", 219, "id >= 0 && id < MAX_PAGER_SEGS" )); | |||
220 | ||||
221 | /* test for no remainder */ | |||
222 | KDASSERT(segaddr == pseg->start + id * MAXBSIZE)((void)0); | |||
223 | ||||
224 | mtx_enter(&uvm_pseg_lck); | |||
225 | ||||
226 | KASSERT(UVM_PSEG_INUSE(pseg, id))(((((pseg)->use & (1 << (id))) != 0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c", 226, "UVM_PSEG_INUSE(pseg, id)")); | |||
227 | ||||
228 | pseg->use &= ~(1 << id); | |||
229 | wakeup(&psegs); | |||
230 | ||||
231 | if (pseg != &psegs[0] && UVM_PSEG_EMPTY(pseg)((pseg)->use == 0)) { | |||
232 | va = pseg->start; | |||
233 | pseg->start = 0; | |||
234 | } | |||
235 | ||||
236 | mtx_leave(&uvm_pseg_lck); | |||
237 | ||||
238 | if (va) | |||
239 | uvm_km_free(kernel_map, va, MAX_PAGER_SEGS16 * MAXBSIZE(64 * 1024)); | |||
240 | } | |||
241 | ||||
242 | /* | |||
243 | * uvm_pagermapin: map pages into KVA for I/O that needs mappings | |||
244 | * | |||
245 | * We basically just km_valloc a blank map entry to reserve the space in the | |||
246 | * kernel map and then use pmap_enter() to put the mappings in by hand. | |||
247 | */ | |||
248 | vaddr_t | |||
249 | uvm_pagermapin(struct vm_page **pps, int npages, int flags) | |||
250 | { | |||
251 | vaddr_t kva, cva; | |||
252 | vm_prot_t prot; | |||
253 | vsize_t size; | |||
254 | struct vm_page *pp; | |||
255 | ||||
256 | prot = PROT_READ0x01; | |||
257 | if (flags & UVMPAGER_MAPIN_READ0x02) | |||
258 | prot |= PROT_WRITE0x02; | |||
259 | size = ptoa(npages)((paddr_t)(npages) << 12); | |||
260 | ||||
261 | KASSERT(size <= MAXBSIZE)((size <= (64 * 1024)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c", 261, "size <= MAXBSIZE")); | |||
262 | ||||
263 | kva = uvm_pseg_get(flags); | |||
264 | if (kva == 0) | |||
265 | return 0; | |||
266 | ||||
267 | for (cva = kva ; size != 0 ; size -= PAGE_SIZE(1 << 12), cva += PAGE_SIZE(1 << 12)) { | |||
268 | pp = *pps++; | |||
269 | KASSERT(pp)((pp) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c" , 269, "pp")); | |||
270 | KASSERT(pp->pg_flags & PG_BUSY)((pp->pg_flags & 0x00000001) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pager.c", 270, "pp->pg_flags & PG_BUSY" )); | |||
271 | /* Allow pmap_enter to fail. */ | |||
272 | if (pmap_enter(pmap_kernel()(&kernel_pmap_store), cva, VM_PAGE_TO_PHYS(pp)((pp)->phys_addr), | |||
273 | prot, PMAP_WIRED0x00000010 | PMAP_CANFAIL0x00000020 | prot) != 0) { | |||
274 | pmap_remove(pmap_kernel()(&kernel_pmap_store), kva, cva); | |||
275 | pmap_update(pmap_kernel()); | |||
276 | uvm_pseg_release(kva); | |||
277 | return 0; | |||
278 | } | |||
279 | } | |||
280 | pmap_update(pmap_kernel()); | |||
281 | return kva; | |||
282 | } | |||
283 | ||||
284 | /* | |||
285 | * uvm_pagermapout: remove KVA mapping | |||
286 | * | |||
287 | * We remove our mappings by hand and then remove the mapping. | |||
288 | */ | |||
289 | void | |||
290 | uvm_pagermapout(vaddr_t kva, int npages) | |||
291 | { | |||
292 | ||||
293 | pmap_remove(pmap_kernel()(&kernel_pmap_store), kva, kva + ((vsize_t)npages << PAGE_SHIFT12)); | |||
294 | pmap_update(pmap_kernel()); | |||
295 | uvm_pseg_release(kva); | |||
296 | ||||
297 | } | |||
298 | ||||
299 | /* | |||
300 | * uvm_mk_pcluster | |||
301 | * | |||
302 | * generic "make 'pager put' cluster" function. a pager can either | |||
303 | * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this | |||
304 | * generic function, or [3] set it to a pager specific function. | |||
305 | * | |||
306 | * => caller must lock object _and_ pagequeues (since we need to look | |||
307 | * at active vs. inactive bits, etc.) | |||
308 | * => caller must make center page busy and write-protect it | |||
309 | * => we mark all cluster pages busy for the caller | |||
310 | * => the caller must unbusy all pages (and check wanted/released | |||
311 | * status if it drops the object lock) | |||
312 | * => flags: | |||
313 | * PGO_ALLPAGES: all pages in object are valid targets | |||
314 | * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster | |||
315 | * PGO_DOACTCLUST: include active pages in cluster. | |||
316 | * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed | |||
317 | * in async io (caller must clean on error). | |||
318 | * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. | |||
319 | * PG_CLEANCHK is only a hint, but clearing will help reduce | |||
320 | * the number of calls we make to the pmap layer. | |||
321 | */ | |||
322 | ||||
323 | struct vm_page ** | |||
324 | uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, | |||
325 | struct vm_page *center, int flags, voff_t mlo, voff_t mhi) | |||
326 | { | |||
327 | struct vm_page **ppsp, *pclust; | |||
328 | voff_t lo, hi, curoff; | |||
329 | int center_idx, forward, incr; | |||
330 | ||||
331 | /* | |||
332 | * center page should already be busy and write protected. XXX: | |||
333 | * suppose page is wired? if we lock, then a process could | |||
334 | * fault/block on it. if we don't lock, a process could write the | |||
335 | * pages in the middle of an I/O. (consider an msync()). let's | |||
336 | * lock it for now (better to delay than corrupt data?). | |||
337 | */ | |||
338 | /* get cluster boundaries, check sanity, and apply our limits as well.*/ | |||
339 | uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); | |||
340 | if ((flags & PGO_ALLPAGES0x010) == 0) { | |||
341 | if (lo < mlo) | |||
342 | lo = mlo; | |||
343 | if (hi > mhi) | |||
344 | hi = mhi; | |||
345 | } | |||
346 | if ((hi - lo) >> PAGE_SHIFT12 > *npages) { /* pps too small, bail out! */ | |||
347 | pps[0] = center; | |||
348 | *npages = 1; | |||
349 | return pps; | |||
350 | } | |||
351 | ||||
352 | /* now determine the center and attempt to cluster around the edges */ | |||
353 | center_idx = (center->offset - lo) >> PAGE_SHIFT12; | |||
354 | pps[center_idx] = center; /* plug in the center page */ | |||
355 | ppsp = &pps[center_idx]; | |||
356 | *npages = 1; | |||
357 | ||||
358 | /* | |||
359 | * attempt to cluster around the left [backward], and then | |||
360 | * the right side [forward]. | |||
361 | * | |||
362 | * note that for inactive pages (pages that have been deactivated) | |||
363 | * there are no valid mappings and PG_CLEAN should be up to date. | |||
364 | * [i.e. there is no need to query the pmap with pmap_is_modified | |||
365 | * since there are no mappings]. | |||
366 | */ | |||
367 | for (forward = 0 ; forward <= 1 ; forward++) { | |||
368 | incr = forward ? PAGE_SIZE(1 << 12) : -PAGE_SIZE(1 << 12); | |||
369 | curoff = center->offset + incr; | |||
370 | for ( ;(forward == 0 && curoff >= lo) || | |||
371 | (forward && curoff < hi); | |||
372 | curoff += incr) { | |||
373 | ||||
374 | pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ | |||
375 | if (pclust == NULL((void *)0)) { | |||
376 | break; /* no page */ | |||
377 | } | |||
378 | /* handle active pages */ | |||
379 | /* NOTE: inactive pages don't have pmap mappings */ | |||
380 | if ((pclust->pg_flags & PQ_INACTIVE0x00020000) == 0) { | |||
381 | if ((flags & PGO_DOACTCLUST0x020) == 0) { | |||
382 | /* dont want mapped pages at all */ | |||
383 | break; | |||
384 | } | |||
385 | ||||
386 | /* make sure "clean" bit is sync'd */ | |||
387 | if ((pclust->pg_flags & PG_CLEANCHK0x00000010) == 0) { | |||
388 | if ((pclust->pg_flags & (PG_CLEAN0x00000008|PG_BUSY0x00000001)) | |||
389 | == PG_CLEAN0x00000008 && | |||
390 | pmap_is_modified(pclust)pmap_test_attrs(pclust, 0x0000000000000040UL)) | |||
391 | atomic_clearbits_intx86_atomic_clearbits_u32( | |||
392 | &pclust->pg_flags, | |||
393 | PG_CLEAN0x00000008); | |||
394 | /* now checked */ | |||
395 | atomic_setbits_intx86_atomic_setbits_u32(&pclust->pg_flags, | |||
396 | PG_CLEANCHK0x00000010); | |||
397 | } | |||
398 | } | |||
399 | ||||
400 | /* is page available for cleaning and does it need it */ | |||
401 | if ((pclust->pg_flags & (PG_CLEAN0x00000008|PG_BUSY0x00000001)) != 0) { | |||
402 | break; /* page is already clean or is busy */ | |||
403 | } | |||
404 | ||||
405 | /* yes! enroll the page in our array */ | |||
406 | atomic_setbits_intx86_atomic_setbits_u32(&pclust->pg_flags, PG_BUSY0x00000001); | |||
407 | UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); | |||
408 | ||||
409 | /* | |||
410 | * If we want to free after io is done, and we're | |||
411 | * async, set the released flag | |||
412 | */ | |||
413 | if ((flags & (PGO_FREE0x008|PGO_SYNCIO0x002)) == PGO_FREE0x008) | |||
414 | atomic_setbits_intx86_atomic_setbits_u32(&pclust->pg_flags, | |||
415 | PG_RELEASED0x00000020); | |||
416 | ||||
417 | /* XXX: protect wired page? see above comment. */ | |||
418 | pmap_page_protect(pclust, PROT_READ0x01); | |||
419 | if (!forward) { | |||
420 | ppsp--; /* back up one page */ | |||
421 | *ppsp = pclust; | |||
422 | } else { | |||
423 | /* move forward one page */ | |||
424 | ppsp[*npages] = pclust; | |||
425 | } | |||
426 | (*npages)++; | |||
427 | } | |||
428 | } | |||
429 | ||||
430 | /* | |||
431 | * done! return the cluster array to the caller!!! | |||
432 | */ | |||
433 | return ppsp; | |||
434 | } | |||
435 | ||||
436 | /* | |||
437 | * uvm_pager_put: high level pageout routine | |||
438 | * | |||
439 | * we want to pageout page "pg" to backing store, clustering if | |||
440 | * possible. | |||
441 | * | |||
442 | * => page queues must be locked by caller | |||
443 | * => if page is not swap-backed, then "uobj" points to the object | |||
444 | * backing it. | |||
445 | * => if page is swap-backed, then "uobj" should be NULL. | |||
446 | * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN | |||
447 | * for swap-backed memory, "pg" can be NULL if there is no page | |||
448 | * of interest [sometimes the case for the pagedaemon] | |||
449 | * => "ppsp_ptr" should point to an array of npages vm_page pointers | |||
450 | * for possible cluster building | |||
451 | * => flags (first two for non-swap-backed pages) | |||
452 | * PGO_ALLPAGES: all pages in uobj are valid targets | |||
453 | * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets | |||
454 | * PGO_SYNCIO: do SYNC I/O (no async) | |||
455 | * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O | |||
456 | * PGO_FREE: tell the aio daemon to free pages in the async case. | |||
457 | * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range | |||
458 | * if (!uobj) start is the (daddr_t) of the starting swapblk | |||
459 | * => return state: | |||
460 | * 1. we return the VM_PAGER status code of the pageout | |||
461 | * 2. we return with the page queues unlocked | |||
462 | * 3. on errors we always drop the cluster. thus, if we return | |||
463 | * !PEND, !OK, then the caller only has to worry about | |||
464 | * un-busying the main page (not the cluster pages). | |||
465 | * 4. on success, if !PGO_PDFREECLUST, we return the cluster | |||
466 | * with all pages busy (caller must un-busy and check | |||
467 | * wanted/released flags). | |||
468 | */ | |||
469 | int | |||
470 | uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, | |||
471 | struct vm_page ***ppsp_ptr, int *npages, int flags, | |||
472 | voff_t start, voff_t stop) | |||
473 | { | |||
474 | int result; | |||
475 | daddr_t swblk; | |||
476 | struct vm_page **ppsp = *ppsp_ptr; | |||
477 | ||||
478 | /* | |||
479 | * note that uobj is null if we are doing a swap-backed pageout. | |||
480 | * note that uobj is !null if we are doing normal object pageout. | |||
481 | * note that the page queues must be locked to cluster. | |||
482 | */ | |||
483 | if (uobj) { /* if !swap-backed */ | |||
484 | /* | |||
485 | * attempt to build a cluster for pageout using its | |||
486 | * make-put-cluster function (if it has one). | |||
487 | */ | |||
488 | if (uobj->pgops->pgo_mk_pcluster) { | |||
489 | ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, | |||
490 | npages, pg, flags, start, stop); | |||
491 | *ppsp_ptr = ppsp; /* update caller's pointer */ | |||
492 | } else { | |||
493 | ppsp[0] = pg; | |||
494 | *npages = 1; | |||
495 | } | |||
496 | ||||
497 | swblk = 0; /* XXX: keep gcc happy */ | |||
498 | } else { | |||
499 | /* | |||
500 | * for swap-backed pageout, the caller (the pagedaemon) has | |||
501 | * already built the cluster for us. the starting swap | |||
502 | * block we are writing to has been passed in as "start." | |||
503 | * "pg" could be NULL if there is no page we are especially | |||
504 | * interested in (in which case the whole cluster gets dropped | |||
505 | * in the event of an error or a sync "done"). | |||
506 | */ | |||
507 | swblk = start; | |||
508 | /* ppsp and npages should be ok */ | |||
509 | } | |||
510 | ||||
511 | /* now that we've clustered we can unlock the page queues */ | |||
512 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); | |||
513 | ||||
514 | /* | |||
515 | * now attempt the I/O. if we have a failure and we are | |||
516 | * clustered, we will drop the cluster and try again. | |||
517 | */ | |||
518 | ReTry: | |||
519 | if (uobj) { | |||
520 | result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); | |||
521 | } else { | |||
522 | /* XXX daddr_t -> int */ | |||
523 | result = uvm_swap_put(swblk, ppsp, *npages, flags); | |||
524 | } | |||
525 | ||||
526 | /* | |||
527 | * we have attempted the I/O. | |||
528 | * | |||
529 | * if the I/O was a success then: | |||
530 | * if !PGO_PDFREECLUST, we return the cluster to the | |||
531 | * caller (who must un-busy all pages) | |||
532 | * else we un-busy cluster pages for the pagedaemon | |||
533 | * | |||
534 | * if I/O is pending (async i/o) then we return the pending code. | |||
535 | * [in this case the async i/o done function must clean up when | |||
536 | * i/o is done...] | |||
537 | */ | |||
538 | if (result == VM_PAGER_PEND3 || result == VM_PAGER_OK0) { | |||
539 | if (result == VM_PAGER_OK0 && (flags & PGO_PDFREECLUST0x080)) { | |||
540 | /* drop cluster */ | |||
541 | if (*npages > 1 || pg == NULL((void *)0)) | |||
542 | uvm_pager_dropcluster(uobj, pg, ppsp, npages, | |||
543 | PGO_PDFREECLUST0x080); | |||
544 | } | |||
545 | return (result); | |||
546 | } | |||
547 | ||||
548 | /* | |||
549 | * a pager error occurred (even after dropping the cluster, if there | |||
550 | * was one). give up! the caller only has one page ("pg") | |||
551 | * to worry about. | |||
552 | */ | |||
553 | if (*npages > 1 || pg == NULL((void *)0)) { | |||
554 | uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP0x100); | |||
555 | ||||
556 | /* | |||
557 | * for failed swap-backed pageouts with a "pg", | |||
558 | * we need to reset pg's swslot to either: | |||
559 | * "swblk" (for transient errors, so we can retry), | |||
560 | * or 0 (for hard errors). | |||
561 | */ | |||
562 | if (uobj == NULL((void *)0) && pg != NULL((void *)0)) { | |||
563 | /* XXX daddr_t -> int */ | |||
564 | int nswblk = (result == VM_PAGER_AGAIN5) ? swblk : 0; | |||
565 | if (pg->pg_flags & PQ_ANON0x00100000) { | |||
566 | rw_enter(pg->uanon->an_lock, RW_WRITE0x0001UL); | |||
567 | pg->uanon->an_swslot = nswblk; | |||
568 | rw_exit(pg->uanon->an_lock); | |||
569 | } else { | |||
570 | rw_enter(pg->uobject->vmobjlock, RW_WRITE0x0001UL); | |||
571 | uao_set_swslot(pg->uobject, | |||
572 | pg->offset >> PAGE_SHIFT12, | |||
573 | nswblk); | |||
574 | rw_exit(pg->uobject->vmobjlock); | |||
575 | } | |||
576 | } | |||
577 | if (result == VM_PAGER_AGAIN5) { | |||
578 | /* | |||
579 | * for transient failures, free all the swslots that | |||
580 | * we're not going to retry with. | |||
581 | */ | |||
582 | if (uobj == NULL((void *)0)) { | |||
583 | if (pg) { | |||
584 | /* XXX daddr_t -> int */ | |||
585 | uvm_swap_free(swblk + 1, *npages - 1); | |||
586 | } else { | |||
587 | /* XXX daddr_t -> int */ | |||
588 | uvm_swap_free(swblk, *npages); | |||
589 | } | |||
590 | } | |||
591 | if (pg) { | |||
592 | ppsp[0] = pg; | |||
593 | *npages = 1; | |||
594 | goto ReTry; | |||
595 | } | |||
596 | } else if (uobj == NULL((void *)0)) { | |||
597 | /* | |||
598 | * for hard errors on swap-backed pageouts, | |||
599 | * mark the swslots as bad. note that we do not | |||
600 | * free swslots that we mark bad. | |||
601 | */ | |||
602 | /* XXX daddr_t -> int */ | |||
603 | uvm_swap_markbad(swblk, *npages); | |||
604 | } | |||
605 | } | |||
606 | ||||
607 | /* | |||
608 | * a pager error occurred (even after dropping the cluster, if there | |||
609 | * was one). give up! the caller only has one page ("pg") | |||
610 | * to worry about. | |||
611 | */ | |||
612 | ||||
613 | return result; | |||
614 | } | |||
615 | ||||
616 | /* | |||
617 | * uvm_pager_dropcluster: drop a cluster we have built (because we | |||
618 | * got an error, or, if PGO_PDFREECLUST we are un-busying the | |||
619 | * cluster pages on behalf of the pagedaemon). | |||
620 | * | |||
621 | * => uobj, if non-null, is a non-swap-backed object | |||
622 | * => page queues are not locked | |||
623 | * => pg is our page of interest (the one we clustered around, can be null) | |||
624 | * => ppsp/npages is our current cluster | |||
625 | * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster | |||
626 | * pages on behalf of the pagedaemon. | |||
627 | * PGO_REALLOCSWAP: drop previously allocated swap slots for | |||
628 | * clustered swap-backed pages (except for "pg" if !NULL) | |||
629 | * "swblk" is the start of swap alloc (e.g. for ppsp[0]) | |||
630 | * [only meaningful if swap-backed (uobj == NULL)] | |||
631 | */ | |||
632 | ||||
633 | void | |||
634 | uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, | |||
635 | struct vm_page **ppsp, int *npages, int flags) | |||
636 | { | |||
637 | int lcv; | |||
638 | ||||
639 | KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock))((uobj == ((void *)0) || rw_write_held(uobj->vmobjlock)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c" , 639, "uobj == NULL || rw_write_held(uobj->vmobjlock)")); | |||
640 | ||||
641 | /* drop all pages but "pg" */ | |||
642 | for (lcv = 0 ; lcv < *npages ; lcv++) { | |||
643 | /* skip "pg" or empty slot */ | |||
644 | if (ppsp[lcv] == pg || ppsp[lcv] == NULL((void *)0)) | |||
645 | continue; | |||
646 | ||||
647 | /* | |||
648 | * Note that PQ_ANON bit can't change as long as we are holding | |||
649 | * the PG_BUSY bit (so there is no need to lock the page | |||
650 | * queues to test it). | |||
651 | */ | |||
652 | if (!uobj) { | |||
653 | if (ppsp[lcv]->pg_flags & PQ_ANON0x00100000) { | |||
654 | rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE0x0001UL); | |||
655 | if (flags & PGO_REALLOCSWAP0x100) | |||
656 | /* zap swap block */ | |||
657 | ppsp[lcv]->uanon->an_swslot = 0; | |||
658 | } else { | |||
659 | rw_enter(ppsp[lcv]->uobject->vmobjlock, | |||
660 | RW_WRITE0x0001UL); | |||
661 | if (flags & PGO_REALLOCSWAP0x100) | |||
662 | uao_set_swslot(ppsp[lcv]->uobject, | |||
663 | ppsp[lcv]->offset >> PAGE_SHIFT12, 0); | |||
664 | } | |||
665 | } | |||
666 | ||||
667 | /* did someone want the page while we had it busy-locked? */ | |||
668 | if (ppsp[lcv]->pg_flags & PG_WANTED0x00000002) { | |||
669 | wakeup(ppsp[lcv]); | |||
670 | } | |||
671 | ||||
672 | /* if page was released, release it. otherwise un-busy it */ | |||
673 | if (ppsp[lcv]->pg_flags & PG_RELEASED0x00000020 && | |||
674 | ppsp[lcv]->pg_flags & PQ_ANON0x00100000) { | |||
675 | /* so that anfree will free */ | |||
676 | atomic_clearbits_intx86_atomic_clearbits_u32(&ppsp[lcv]->pg_flags, | |||
677 | PG_BUSY0x00000001); | |||
678 | UVM_PAGE_OWN(ppsp[lcv], NULL); | |||
679 | ||||
680 | /* kills anon and frees pg */ | |||
681 | uvm_anon_release(ppsp[lcv]->uanon); | |||
682 | ||||
683 | continue; | |||
684 | } else { | |||
685 | /* | |||
686 | * if we were planning on async io then we would | |||
687 | * have PG_RELEASED set, clear that with the others. | |||
688 | */ | |||
689 | atomic_clearbits_intx86_atomic_clearbits_u32(&ppsp[lcv]->pg_flags, | |||
690 | PG_BUSY0x00000001|PG_WANTED0x00000002|PG_FAKE0x00000040|PG_RELEASED0x00000020); | |||
691 | UVM_PAGE_OWN(ppsp[lcv], NULL); | |||
692 | } | |||
693 | ||||
694 | /* | |||
695 | * if we are operating on behalf of the pagedaemon and we | |||
696 | * had a successful pageout update the page! | |||
697 | */ | |||
698 | if (flags & PGO_PDFREECLUST0x080) { | |||
699 | pmap_clear_reference(ppsp[lcv])pmap_clear_attrs(ppsp[lcv], 0x0000000000000020UL); | |||
700 | pmap_clear_modify(ppsp[lcv])pmap_clear_attrs(ppsp[lcv], 0x0000000000000040UL); | |||
701 | atomic_setbits_intx86_atomic_setbits_u32(&ppsp[lcv]->pg_flags, PG_CLEAN0x00000008); | |||
702 | } | |||
703 | ||||
704 | /* if anonymous cluster, unlock object and move on */ | |||
705 | if (!uobj) { | |||
706 | if (ppsp[lcv]->pg_flags & PQ_ANON0x00100000) | |||
707 | rw_exit(ppsp[lcv]->uanon->an_lock); | |||
708 | else | |||
709 | rw_exit(ppsp[lcv]->uobject->vmobjlock); | |||
710 | } | |||
711 | } | |||
712 | } | |||
713 | ||||
714 | /* | |||
715 | * interrupt-context iodone handler for single-buf i/os | |||
716 | * or the top-level buf of a nested-buf i/o. | |||
717 | * | |||
718 | * => must be at splbio(). | |||
719 | */ | |||
720 | ||||
721 | void | |||
722 | uvm_aio_biodone(struct buf *bp) | |||
723 | { | |||
724 | splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__ ); } } while (0); | |||
725 | ||||
726 | /* reset b_iodone for when this is a single-buf i/o. */ | |||
727 | bp->b_iodone = uvm_aio_aiodone; | |||
728 | ||||
729 | mtx_enter(&uvm.aiodoned_lock); | |||
730 | TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist)do { (bp)->b_freelist.tqe_next = ((void *)0); (bp)->b_freelist .tqe_prev = (&uvm.aio_done)->tqh_last; *(&uvm.aio_done )->tqh_last = (bp); (&uvm.aio_done)->tqh_last = & (bp)->b_freelist.tqe_next; } while (0); | |||
731 | wakeup(&uvm.aiodoned); | |||
732 | mtx_leave(&uvm.aiodoned_lock); | |||
733 | } | |||
734 | ||||
735 | /* | |||
736 | * uvm_aio_aiodone: do iodone processing for async i/os. | |||
737 | * this should be called in thread context, not interrupt context. | |||
738 | */ | |||
739 | void | |||
740 | uvm_aio_aiodone(struct buf *bp) | |||
741 | { | |||
742 | int npages = bp->b_bufsize >> PAGE_SHIFT12; | |||
743 | struct vm_page *pg, *pgs[MAXPHYS(64 * 1024) >> PAGE_SHIFT12]; | |||
744 | struct uvm_object *uobj; | |||
745 | int i, error; | |||
746 | boolean_t write, swap; | |||
747 | ||||
748 | KASSERT(npages <= MAXPHYS >> PAGE_SHIFT)((npages <= (64 * 1024) >> 12) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/uvm/uvm_pager.c", 748, "npages <= MAXPHYS >> PAGE_SHIFT" )); | |||
| ||||
749 | splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__ ); } } while (0); | |||
750 | ||||
751 | error = (bp->b_flags & B_ERROR0x00000400) ? (bp->b_error ? bp->b_error : EIO5) : 0; | |||
752 | write = (bp->b_flags & B_READ0x00008000) == 0; | |||
753 | ||||
754 | uobj = NULL((void *)0); | |||
755 | for (i = 0; i < npages; i++) | |||
756 | pgs[i] = uvm_atopg((vaddr_t)bp->b_data + | |||
757 | ((vsize_t)i << PAGE_SHIFT12)); | |||
758 | uvm_pagermapout((vaddr_t)bp->b_data, npages); | |||
759 | #ifdef UVM_SWAP_ENCRYPT1 | |||
760 | /* | |||
761 | * XXX - assumes that we only get ASYNC writes. used to be above. | |||
762 | */ | |||
763 | if (pgs[0]->pg_flags & PQ_ENCRYPT0x00400000) { | |||
| ||||
764 | uvm_swap_freepages(pgs, npages); | |||
765 | goto freed; | |||
766 | } | |||
767 | #endif /* UVM_SWAP_ENCRYPT */ | |||
768 | for (i = 0; i < npages; i++) { | |||
769 | pg = pgs[i]; | |||
770 | ||||
771 | if (i == 0) { | |||
772 | swap = (pg->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) != 0; | |||
773 | if (!swap) { | |||
774 | uobj = pg->uobject; | |||
775 | rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL); | |||
776 | } | |||
777 | } | |||
778 | KASSERT(swap || pg->uobject == uobj)((swap || pg->uobject == uobj) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pager.c", 778, "swap || pg->uobject == uobj" )); | |||
779 | ||||
780 | /* | |||
781 | * if this is a read and we got an error, mark the pages | |||
782 | * PG_RELEASED so that uvm_page_unbusy() will free them. | |||
783 | */ | |||
784 | if (!write && error) { | |||
785 | atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, PG_RELEASED0x00000020); | |||
786 | continue; | |||
787 | } | |||
788 | KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0)((!write || (pgs[i]->pg_flags & 0x00000040) == 0) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pager.c", 788 , "!write || (pgs[i]->pg_flags & PG_FAKE) == 0")); | |||
789 | ||||
790 | /* | |||
791 | * if this is a read and the page is PG_FAKE, | |||
792 | * or this was a successful write, | |||
793 | * mark the page PG_CLEAN and not PG_FAKE. | |||
794 | */ | |||
795 | if ((pgs[i]->pg_flags & PG_FAKE0x00000040) || (write && error != ENOMEM12)) { | |||
796 | pmap_clear_reference(pgs[i])pmap_clear_attrs(pgs[i], 0x0000000000000020UL); | |||
797 | pmap_clear_modify(pgs[i])pmap_clear_attrs(pgs[i], 0x0000000000000040UL); | |||
798 | atomic_setbits_intx86_atomic_setbits_u32(&pgs[i]->pg_flags, PG_CLEAN0x00000008); | |||
799 | atomic_clearbits_intx86_atomic_clearbits_u32(&pgs[i]->pg_flags, PG_FAKE0x00000040); | |||
800 | } | |||
801 | } | |||
802 | uvm_page_unbusy(pgs, npages); | |||
803 | if (!swap) { | |||
804 | rw_exit(uobj->vmobjlock); | |||
805 | } | |||
806 | ||||
807 | #ifdef UVM_SWAP_ENCRYPT1 | |||
808 | freed: | |||
809 | #endif | |||
810 | pool_put(&bufpool, bp); | |||
811 | } |