File: | uvm/uvm_pdaemon.c |
Warning: | line 879, column 2 Value stored to 'free' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: uvm_pdaemon.c,v 1.109 2023/10/27 19:18:53 mpi Exp $ */ |
2 | /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ |
3 | |
4 | /* |
5 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
6 | * Copyright (c) 1991, 1993, The Regents of the University of California. |
7 | * |
8 | * All rights reserved. |
9 | * |
10 | * This code is derived from software contributed to Berkeley by |
11 | * The Mach Operating System project at Carnegie-Mellon University. |
12 | * |
13 | * Redistribution and use in source and binary forms, with or without |
14 | * modification, are permitted provided that the following conditions |
15 | * are met: |
16 | * 1. Redistributions of source code must retain the above copyright |
17 | * notice, this list of conditions and the following disclaimer. |
18 | * 2. Redistributions in binary form must reproduce the above copyright |
19 | * notice, this list of conditions and the following disclaimer in the |
20 | * documentation and/or other materials provided with the distribution. |
21 | * 3. Neither the name of the University nor the names of its contributors |
22 | * may be used to endorse or promote products derived from this software |
23 | * without specific prior written permission. |
24 | * |
25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
35 | * SUCH DAMAGE. |
36 | * |
37 | * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 |
38 | * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp |
39 | * |
40 | * |
41 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. |
42 | * All rights reserved. |
43 | * |
44 | * Permission to use, copy, modify and distribute this software and |
45 | * its documentation is hereby granted, provided that both the copyright |
46 | * notice and this permission notice appear in all copies of the |
47 | * software, derivative works or modified versions, and any portions |
48 | * thereof, and that both notices appear in supporting documentation. |
49 | * |
50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
53 | * |
54 | * Carnegie Mellon requests users of this software to return to |
55 | * |
56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
57 | * School of Computer Science |
58 | * Carnegie Mellon University |
59 | * Pittsburgh PA 15213-3890 |
60 | * |
61 | * any improvements or extensions that they make and grant Carnegie the |
62 | * rights to redistribute these changes. |
63 | */ |
64 | |
65 | /* |
66 | * uvm_pdaemon.c: the page daemon |
67 | */ |
68 | |
69 | #include <sys/param.h> |
70 | #include <sys/systm.h> |
71 | #include <sys/kernel.h> |
72 | #include <sys/pool.h> |
73 | #include <sys/proc.h> |
74 | #include <sys/buf.h> |
75 | #include <sys/mount.h> |
76 | #include <sys/atomic.h> |
77 | |
78 | #ifdef HIBERNATE1 |
79 | #include <sys/hibernate.h> |
80 | #endif |
81 | |
82 | #include <uvm/uvm.h> |
83 | |
84 | #include "drm.h" |
85 | |
86 | #if NDRM1 > 0 |
87 | extern void drmbackoff(long); |
88 | #endif |
89 | |
90 | /* |
91 | * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate |
92 | * in a pass thru the inactive list when swap is full. the value should be |
93 | * "small"... if it's too large we'll cycle the active pages thru the inactive |
94 | * queue too quickly to for them to be referenced and avoid being freed. |
95 | */ |
96 | |
97 | #define UVMPD_NUMDIRTYREACTS16 16 |
98 | |
99 | |
100 | /* |
101 | * local prototypes |
102 | */ |
103 | |
104 | struct rwlock *uvmpd_trylockowner(struct vm_page *); |
105 | void uvmpd_scan(struct uvm_pmalloc *, struct uvm_constraint_range *); |
106 | void uvmpd_scan_inactive(struct uvm_pmalloc *, |
107 | struct uvm_constraint_range *, struct pglist *); |
108 | void uvmpd_tune(void); |
109 | void uvmpd_drop(struct pglist *); |
110 | void uvmpd_dropswap(struct vm_page *); |
111 | |
112 | /* |
113 | * uvm_wait: wait (sleep) for the page daemon to free some pages |
114 | * |
115 | * => should be called with all locks released |
116 | * => should _not_ be called by the page daemon (to avoid deadlock) |
117 | */ |
118 | |
119 | void |
120 | uvm_wait(const char *wmsg) |
121 | { |
122 | uint64_t timo = INFSLP0xffffffffffffffffULL; |
123 | |
124 | #ifdef DIAGNOSTIC1 |
125 | if (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == &proc0) |
126 | panic("%s: cannot sleep for memory during boot", __func__); |
127 | #endif |
128 | |
129 | /* |
130 | * check for page daemon going to sleep (waiting for itself) |
131 | */ |
132 | if (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == uvm.pagedaemon_proc) { |
133 | printf("uvm_wait emergency bufbackoff\n"); |
134 | if (bufbackoff(NULL((void *)0), 4) == 0) |
135 | return; |
136 | /* |
137 | * now we have a problem: the pagedaemon wants to go to |
138 | * sleep until it frees more memory. but how can it |
139 | * free more memory if it is asleep? that is a deadlock. |
140 | * we have two options: |
141 | * [1] panic now |
142 | * [2] put a timeout on the sleep, thus causing the |
143 | * pagedaemon to only pause (rather than sleep forever) |
144 | * |
145 | * note that option [2] will only help us if we get lucky |
146 | * and some other process on the system breaks the deadlock |
147 | * by exiting or freeing memory (thus allowing the pagedaemon |
148 | * to continue). for now we panic if DEBUG is defined, |
149 | * otherwise we hope for the best with option [2] (better |
150 | * yet, this should never happen in the first place!). |
151 | */ |
152 | |
153 | printf("pagedaemon: deadlock detected!\n"); |
154 | timo = MSEC_TO_NSEC(125); /* set timeout */ |
155 | #if defined(DEBUG) |
156 | /* DEBUG: panic so we can debug it */ |
157 | panic("pagedaemon deadlock"); |
158 | #endif |
159 | } |
160 | |
161 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
162 | wakeup(&uvm.pagedaemon); /* wake the daemon! */ |
163 | msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM4 | PNORELOCK0x200, wmsg, timo); |
164 | } |
165 | |
166 | /* |
167 | * uvmpd_tune: tune paging parameters |
168 | * |
169 | * => called whenever memory is added to (or removed from?) the system |
170 | * => caller must call with page queues locked |
171 | */ |
172 | |
173 | void |
174 | uvmpd_tune(void) |
175 | { |
176 | |
177 | uvmexp.freemin = uvmexp.npages / 30; |
178 | |
179 | /* between 16k and 512k */ |
180 | /* XXX: what are these values good for? */ |
181 | uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT12); |
182 | #if 0 |
183 | uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT12); |
184 | #endif |
185 | |
186 | /* Make sure there's always a user page free. */ |
187 | if (uvmexp.freemin < uvmexp.reserve_kernel + 1) |
188 | uvmexp.freemin = uvmexp.reserve_kernel + 1; |
189 | |
190 | uvmexp.freetarg = (uvmexp.freemin * 4) / 3; |
191 | if (uvmexp.freetarg <= uvmexp.freemin) |
192 | uvmexp.freetarg = uvmexp.freemin + 1; |
193 | |
194 | /* uvmexp.inactarg: computed in main daemon loop */ |
195 | |
196 | uvmexp.wiredmax = uvmexp.npages / 3; |
197 | } |
198 | |
199 | /* |
200 | * Indicate to the page daemon that a nowait call failed and it should |
201 | * recover at least some memory in the most restricted region (assumed |
202 | * to be dma_constraint). |
203 | */ |
204 | volatile int uvm_nowait_failed; |
205 | |
206 | /* |
207 | * uvm_pageout: the main loop for the pagedaemon |
208 | */ |
209 | void |
210 | uvm_pageout(void *arg) |
211 | { |
212 | struct uvm_constraint_range constraint; |
213 | struct uvm_pmalloc *pma; |
214 | int npages = 0; |
215 | |
216 | /* ensure correct priority and set paging parameters... */ |
217 | uvm.pagedaemon_proc = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
218 | (void) spl0()spllower(0x0); |
219 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
220 | npages = uvmexp.npages; |
221 | uvmpd_tune(); |
222 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); |
223 | |
224 | for (;;) { |
225 | long size; |
226 | |
227 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
228 | if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)(((&uvm.pmr_control.allocs)->tqh_first) == ((void *)0) )) { |
229 | msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM4, |
230 | "pgdaemon", INFSLP0xffffffffffffffffULL); |
231 | uvmexp.pdwoke++; |
232 | } |
233 | |
234 | if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)((&uvm.pmr_control.allocs)->tqh_first)) != NULL((void *)0)) { |
235 | pma->pm_flags |= UVM_PMA_BUSY0x02; |
236 | constraint = pma->pm_constraint; |
237 | } else { |
238 | if (uvm_nowait_failed) { |
239 | /* |
240 | * XXX realistically, this is what our |
241 | * nowait callers probably care about |
242 | */ |
243 | constraint = dma_constraint; |
244 | uvm_nowait_failed = 0; |
245 | } else |
246 | constraint = no_constraint; |
247 | } |
248 | |
249 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); |
250 | |
251 | /* |
252 | * now lock page queues and recompute inactive count |
253 | */ |
254 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
255 | if (npages != uvmexp.npages) { /* check for new pages? */ |
256 | npages = uvmexp.npages; |
257 | uvmpd_tune(); |
258 | } |
259 | |
260 | uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; |
261 | if (uvmexp.inactarg <= uvmexp.freetarg) { |
262 | uvmexp.inactarg = uvmexp.freetarg + 1; |
263 | } |
264 | |
265 | /* Reclaim pages from the buffer cache if possible. */ |
266 | size = 0; |
267 | if (pma != NULL((void *)0)) |
268 | size += pma->pm_size >> PAGE_SHIFT12; |
269 | if (uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages) < uvmexp.freetarg) |
270 | size += uvmexp.freetarg - (uvmexp.free - |
271 | BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)); |
272 | if (size == 0) |
273 | size = 16; /* XXX */ |
274 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); |
275 | (void) bufbackoff(&constraint, size * 2); |
276 | #if NDRM1 > 0 |
277 | drmbackoff(size * 2); |
278 | #endif |
279 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
280 | |
281 | /* |
282 | * scan if needed |
283 | */ |
284 | if (pma != NULL((void *)0) || |
285 | ((uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages)) < uvmexp.freetarg) || |
286 | ((uvmexp.inactive + BUFPAGES_INACT(((bcstats.numcleanpages - buflowpages) < 0) ? 0 : bcstats .numcleanpages - buflowpages)) < uvmexp.inactarg)) { |
287 | uvmpd_scan(pma, &constraint); |
288 | } |
289 | |
290 | /* |
291 | * if there's any free memory to be had, |
292 | * wake up any waiters. |
293 | */ |
294 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
295 | if (uvmexp.free > uvmexp.reserve_kernel || |
296 | uvmexp.paging == 0) { |
297 | wakeup(&uvmexp.free); |
298 | } |
299 | |
300 | if (pma != NULL((void *)0)) { |
301 | /* |
302 | * XXX If UVM_PMA_FREED isn't set, no pages |
303 | * were freed. Should we set UVM_PMA_FAIL in |
304 | * that case? |
305 | */ |
306 | pma->pm_flags &= ~UVM_PMA_BUSY0x02; |
307 | if (pma->pm_flags & UVM_PMA_FREED0x20) { |
308 | pma->pm_flags &= ~UVM_PMA_LINKED0x01; |
309 | TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0) |
310 | pmq)do { if (((pma)->pmq.tqe_next) != ((void *)0)) (pma)->pmq .tqe_next->pmq.tqe_prev = (pma)->pmq.tqe_prev; else (& uvm.pmr_control.allocs)->tqh_last = (pma)->pmq.tqe_prev ; *(pma)->pmq.tqe_prev = (pma)->pmq.tqe_next; ((pma)-> pmq.tqe_prev) = ((void *)-1); ((pma)->pmq.tqe_next) = ((void *)-1); } while (0); |
311 | wakeup(pma); |
312 | } |
313 | } |
314 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); |
315 | |
316 | /* |
317 | * scan done. unlock page queues (the only lock we are holding) |
318 | */ |
319 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); |
320 | |
321 | sched_pause(yield)do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield (); } while (0); |
322 | } |
323 | /*NOTREACHED*/ |
324 | } |
325 | |
326 | |
327 | /* |
328 | * uvm_aiodone_daemon: main loop for the aiodone daemon. |
329 | */ |
330 | void |
331 | uvm_aiodone_daemon(void *arg) |
332 | { |
333 | int s, free; |
334 | struct buf *bp, *nbp; |
335 | |
336 | uvm.aiodoned_proc = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
337 | |
338 | for (;;) { |
339 | /* |
340 | * Check for done aio structures. If we've got structures to |
341 | * process, do so. Otherwise sleep while avoiding races. |
342 | */ |
343 | mtx_enter(&uvm.aiodoned_lock); |
344 | while ((bp = TAILQ_FIRST(&uvm.aio_done)((&uvm.aio_done)->tqh_first)) == NULL((void *)0)) |
345 | msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock, |
346 | PVM4, "aiodoned", INFSLP0xffffffffffffffffULL); |
347 | /* Take the list for ourselves. */ |
348 | TAILQ_INIT(&uvm.aio_done)do { (&uvm.aio_done)->tqh_first = ((void *)0); (&uvm .aio_done)->tqh_last = &(&uvm.aio_done)->tqh_first ; } while (0); |
349 | mtx_leave(&uvm.aiodoned_lock); |
350 | |
351 | /* process each i/o that's done. */ |
352 | free = uvmexp.free; |
353 | while (bp != NULL((void *)0)) { |
354 | if (bp->b_flags & B_PDAEMON0x00200000) { |
355 | uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT12; |
356 | } |
357 | nbp = TAILQ_NEXT(bp, b_freelist)((bp)->b_freelist.tqe_next); |
358 | s = splbio()splraise(0x3); /* b_iodone must by called at splbio */ |
359 | (*bp->b_iodone)(bp); |
360 | splx(s)spllower(s); |
361 | bp = nbp; |
362 | |
363 | sched_pause(yield)do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield (); } while (0); |
364 | } |
365 | uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock); |
366 | wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon : |
367 | &uvmexp.free); |
368 | uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock); |
369 | } |
370 | } |
371 | |
372 | /* |
373 | * uvmpd_trylockowner: trylock the page's owner. |
374 | * |
375 | * => return the locked rwlock on success. otherwise, return NULL. |
376 | */ |
377 | struct rwlock * |
378 | uvmpd_trylockowner(struct vm_page *pg) |
379 | { |
380 | |
381 | struct uvm_object *uobj = pg->uobject; |
382 | struct rwlock *slock; |
383 | |
384 | if (uobj != NULL((void *)0)) { |
385 | slock = uobj->vmobjlock; |
386 | } else { |
387 | struct vm_anon *anon = pg->uanon; |
388 | |
389 | KASSERT(anon != NULL)((anon != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pdaemon.c" , 389, "anon != NULL")); |
390 | slock = anon->an_lock; |
391 | } |
392 | |
393 | if (rw_enter(slock, RW_WRITE0x0001UL|RW_NOSLEEP0x0040UL)) { |
394 | return NULL((void *)0); |
395 | } |
396 | |
397 | return slock; |
398 | } |
399 | |
400 | |
401 | /* |
402 | * uvmpd_dropswap: free any swap allocated to this page. |
403 | * |
404 | * => called with owner locked. |
405 | */ |
406 | void |
407 | uvmpd_dropswap(struct vm_page *pg) |
408 | { |
409 | struct vm_anon *anon = pg->uanon; |
410 | |
411 | if ((pg->pg_flags & PQ_ANON0x00100000) && anon->an_swslot) { |
412 | uvm_swap_free(anon->an_swslot, 1); |
413 | anon->an_swslot = 0; |
414 | } else if (pg->pg_flags & PQ_AOBJ0x00200000) { |
415 | uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT12); |
416 | } |
417 | } |
418 | |
419 | /* |
420 | * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. |
421 | * |
422 | * => called with page queues locked |
423 | * => we work on meeting our free target by converting inactive pages |
424 | * into free pages. |
425 | * => we handle the building of swap-backed clusters |
426 | * => we return TRUE if we are exiting because we met our target |
427 | */ |
428 | void |
429 | uvmpd_scan_inactive(struct uvm_pmalloc *pma, |
430 | struct uvm_constraint_range *constraint, struct pglist *pglst) |
431 | { |
432 | int free, result; |
433 | struct vm_page *p, *nextpg; |
434 | struct uvm_object *uobj; |
435 | struct vm_page *pps[SWCLUSTPAGES((64 * 1024) >> 12)], **ppsp; |
436 | int npages; |
437 | struct vm_page *swpps[SWCLUSTPAGES((64 * 1024) >> 12)]; /* XXX: see below */ |
438 | struct rwlock *slock; |
439 | int swnpages, swcpages; /* XXX: see below */ |
440 | int swslot; |
441 | struct vm_anon *anon; |
442 | boolean_t swap_backed; |
443 | vaddr_t start; |
444 | int dirtyreacts; |
445 | paddr_t paddr; |
446 | |
447 | /* |
448 | * swslot is non-zero if we are building a swap cluster. we want |
449 | * to stay in the loop while we have a page to scan or we have |
450 | * a swap-cluster to build. |
451 | */ |
452 | swslot = 0; |
453 | swnpages = swcpages = 0; |
454 | dirtyreacts = 0; |
455 | p = NULL((void *)0); |
456 | |
457 | /* Start with the first page on the list that fit in `constraint' */ |
458 | TAILQ_FOREACH(p, pglst, pageq)for((p) = ((pglst)->tqh_first); (p) != ((void *)0); (p) = ( (p)->pageq.tqe_next)) { |
459 | paddr = atop(VM_PAGE_TO_PHYS(p))((((p)->phys_addr)) >> 12); |
460 | if (paddr >= constraint->ucr_low && |
461 | paddr < constraint->ucr_high) |
462 | break; |
463 | } |
464 | |
465 | for (; p != NULL((void *)0) || swslot != 0; p = nextpg) { |
466 | /* |
467 | * note that p can be NULL iff we have traversed the whole |
468 | * list and need to do one final swap-backed clustered pageout. |
469 | */ |
470 | uobj = NULL((void *)0); |
471 | anon = NULL((void *)0); |
472 | if (p) { |
473 | /* |
474 | * see if we've met our target |
475 | */ |
476 | free = uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages); |
477 | if (((pma == NULL((void *)0) || (pma->pm_flags & UVM_PMA_FREED0x20)) && |
478 | (free + uvmexp.paging >= uvmexp.freetarg << 2)) || |
479 | dirtyreacts == UVMPD_NUMDIRTYREACTS16) { |
480 | if (swslot == 0) { |
481 | /* exit now if no swap-i/o pending */ |
482 | break; |
483 | } |
484 | |
485 | /* set p to null to signal final swap i/o */ |
486 | p = NULL((void *)0); |
487 | nextpg = NULL((void *)0); |
488 | } |
489 | } |
490 | if (p) { /* if (we have a new page to consider) */ |
491 | /* |
492 | * we are below target and have a new page to consider. |
493 | */ |
494 | uvmexp.pdscans++; |
495 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
496 | |
497 | anon = p->uanon; |
498 | uobj = p->uobject; |
499 | |
500 | /* |
501 | * first we attempt to lock the object that this page |
502 | * belongs to. if our attempt fails we skip on to |
503 | * the next page (no harm done). it is important to |
504 | * "try" locking the object as we are locking in the |
505 | * wrong order (pageq -> object) and we don't want to |
506 | * deadlock. |
507 | */ |
508 | slock = uvmpd_trylockowner(p); |
509 | if (slock == NULL((void *)0)) { |
510 | continue; |
511 | } |
512 | |
513 | /* |
514 | * move referenced pages back to active queue |
515 | * and skip to next page. |
516 | */ |
517 | if (pmap_is_referenced(p)pmap_test_attrs(p, 0x0000000000000020UL)) { |
518 | uvm_pageactivate(p); |
519 | rw_exit(slock); |
520 | uvmexp.pdreact++; |
521 | continue; |
522 | } |
523 | |
524 | if (p->pg_flags & PG_BUSY0x00000001) { |
525 | rw_exit(slock); |
526 | uvmexp.pdbusy++; |
527 | continue; |
528 | } |
529 | |
530 | /* does the page belong to an object? */ |
531 | if (uobj != NULL((void *)0)) { |
532 | uvmexp.pdobscan++; |
533 | } else { |
534 | KASSERT(anon != NULL)((anon != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pdaemon.c" , 534, "anon != NULL")); |
535 | uvmexp.pdanscan++; |
536 | } |
537 | |
538 | /* |
539 | * we now have the page queues locked. |
540 | * the page is not busy. if the page is clean we |
541 | * can free it now and continue. |
542 | */ |
543 | if (p->pg_flags & PG_CLEAN0x00000008) { |
544 | if (p->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) { |
545 | /* this page now lives only in swap */ |
546 | atomic_inc_int(&uvmexp.swpgonly)_atomic_inc_int(&uvmexp.swpgonly); |
547 | } |
548 | |
549 | /* zap all mappings with pmap_page_protect... */ |
550 | pmap_page_protect(p, PROT_NONE0x00); |
551 | uvm_pagefree(p); |
552 | uvmexp.pdfreed++; |
553 | |
554 | if (anon) { |
555 | |
556 | /* |
557 | * an anonymous page can only be clean |
558 | * if it has backing store assigned. |
559 | */ |
560 | |
561 | KASSERT(anon->an_swslot != 0)((anon->an_swslot != 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/uvm/uvm_pdaemon.c", 561, "anon->an_swslot != 0" )); |
562 | |
563 | /* remove from object */ |
564 | anon->an_page = NULL((void *)0); |
565 | } |
566 | rw_exit(slock); |
567 | continue; |
568 | } |
569 | |
570 | /* |
571 | * this page is dirty, skip it if we'll have met our |
572 | * free target when all the current pageouts complete. |
573 | */ |
574 | if ((pma == NULL((void *)0) || (pma->pm_flags & UVM_PMA_FREED0x20)) && |
575 | (free + uvmexp.paging > uvmexp.freetarg << 2)) { |
576 | rw_exit(slock); |
577 | continue; |
578 | } |
579 | |
580 | /* |
581 | * this page is dirty, but we can't page it out |
582 | * since all pages in swap are only in swap. |
583 | * reactivate it so that we eventually cycle |
584 | * all pages thru the inactive queue. |
585 | */ |
586 | if ((p->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) && uvm_swapisfull()) { |
587 | dirtyreacts++; |
588 | uvm_pageactivate(p); |
589 | rw_exit(slock); |
590 | continue; |
591 | } |
592 | |
593 | /* |
594 | * if the page is swap-backed and dirty and swap space |
595 | * is full, free any swap allocated to the page |
596 | * so that other pages can be paged out. |
597 | */ |
598 | if ((p->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) && uvm_swapisfilled()) |
599 | uvmpd_dropswap(p); |
600 | |
601 | /* |
602 | * the page we are looking at is dirty. we must |
603 | * clean it before it can be freed. to do this we |
604 | * first mark the page busy so that no one else will |
605 | * touch the page. we write protect all the mappings |
606 | * of the page so that no one touches it while it is |
607 | * in I/O. |
608 | */ |
609 | |
610 | swap_backed = ((p->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) != 0); |
611 | atomic_setbits_intx86_atomic_setbits_u32(&p->pg_flags, PG_BUSY0x00000001); |
612 | UVM_PAGE_OWN(p, "scan_inactive"); |
613 | pmap_page_protect(p, PROT_READ0x01); |
614 | uvmexp.pgswapout++; |
615 | |
616 | /* |
617 | * for swap-backed pages we need to (re)allocate |
618 | * swap space. |
619 | */ |
620 | if (swap_backed) { |
621 | /* free old swap slot (if any) */ |
622 | uvmpd_dropswap(p); |
623 | |
624 | /* start new cluster (if necessary) */ |
625 | if (swslot == 0) { |
626 | swnpages = SWCLUSTPAGES((64 * 1024) >> 12); |
627 | swslot = uvm_swap_alloc(&swnpages, |
628 | TRUE1); |
629 | if (swslot == 0) { |
630 | /* no swap? give up! */ |
631 | atomic_clearbits_intx86_atomic_clearbits_u32( |
632 | &p->pg_flags, |
633 | PG_BUSY0x00000001); |
634 | UVM_PAGE_OWN(p, NULL); |
635 | rw_exit(slock); |
636 | continue; |
637 | } |
638 | swcpages = 0; /* cluster is empty */ |
639 | } |
640 | |
641 | /* add block to cluster */ |
642 | swpps[swcpages] = p; |
643 | if (anon) |
644 | anon->an_swslot = swslot + swcpages; |
645 | else |
646 | uao_set_swslot(uobj, |
647 | p->offset >> PAGE_SHIFT12, |
648 | swslot + swcpages); |
649 | swcpages++; |
650 | rw_exit(slock); |
651 | |
652 | /* cluster not full yet? */ |
653 | if (swcpages < swnpages) |
654 | continue; |
655 | } |
656 | } else { |
657 | /* if p == NULL we must be doing a last swap i/o */ |
658 | swap_backed = TRUE1; |
659 | } |
660 | |
661 | /* |
662 | * now consider doing the pageout. |
663 | * |
664 | * for swap-backed pages, we do the pageout if we have either |
665 | * filled the cluster (in which case (swnpages == swcpages) or |
666 | * run out of pages (p == NULL). |
667 | * |
668 | * for object pages, we always do the pageout. |
669 | */ |
670 | if (swap_backed) { |
671 | /* starting I/O now... set up for it */ |
672 | npages = swcpages; |
673 | ppsp = swpps; |
674 | /* for swap-backed pages only */ |
675 | start = (vaddr_t) swslot; |
676 | |
677 | /* if this is final pageout we could have a few |
678 | * extra swap blocks */ |
679 | if (swcpages < swnpages) { |
680 | uvm_swap_free(swslot + swcpages, |
681 | (swnpages - swcpages)); |
682 | } |
683 | } else { |
684 | /* normal object pageout */ |
685 | ppsp = pps; |
686 | npages = sizeof(pps) / sizeof(struct vm_page *); |
687 | /* not looked at because PGO_ALLPAGES is set */ |
688 | start = 0; |
689 | } |
690 | |
691 | /* |
692 | * now do the pageout. |
693 | * |
694 | * for swap_backed pages we have already built the cluster. |
695 | * for !swap_backed pages, uvm_pager_put will call the object's |
696 | * "make put cluster" function to build a cluster on our behalf. |
697 | * |
698 | * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct |
699 | * it to free the cluster pages for us on a successful I/O (it |
700 | * always does this for un-successful I/O requests). this |
701 | * allows us to do clustered pageout without having to deal |
702 | * with cluster pages at this level. |
703 | * |
704 | * note locking semantics of uvm_pager_put with PGO_PDFREECLUST: |
705 | * IN: locked: page queues |
706 | * OUT: locked: |
707 | * !locked: pageqs |
708 | */ |
709 | |
710 | uvmexp.pdpageouts++; |
711 | result = uvm_pager_put(swap_backed ? NULL((void *)0) : uobj, p, |
712 | &ppsp, &npages, PGO_ALLPAGES0x010|PGO_PDFREECLUST0x080, start, 0); |
713 | |
714 | /* |
715 | * if we did i/o to swap, zero swslot to indicate that we are |
716 | * no longer building a swap-backed cluster. |
717 | */ |
718 | |
719 | if (swap_backed) |
720 | swslot = 0; /* done with this cluster */ |
721 | |
722 | /* |
723 | * first, we check for VM_PAGER_PEND which means that the |
724 | * async I/O is in progress and the async I/O done routine |
725 | * will clean up after us. in this case we move on to the |
726 | * next page. |
727 | * |
728 | * there is a very remote chance that the pending async i/o can |
729 | * finish _before_ we get here. if that happens, our page "p" |
730 | * may no longer be on the inactive queue. so we verify this |
731 | * when determining the next page (starting over at the head if |
732 | * we've lost our inactive page). |
733 | */ |
734 | |
735 | if (result == VM_PAGER_PEND3) { |
736 | uvmexp.paging += npages; |
737 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
738 | uvmexp.pdpending++; |
739 | if (p) { |
740 | if (p->pg_flags & PQ_INACTIVE0x00020000) |
741 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
742 | else |
743 | nextpg = TAILQ_FIRST(pglst)((pglst)->tqh_first); |
744 | } else { |
745 | nextpg = NULL((void *)0); |
746 | } |
747 | continue; |
748 | } |
749 | |
750 | /* clean up "p" if we have one */ |
751 | if (p) { |
752 | /* |
753 | * the I/O request to "p" is done and uvm_pager_put |
754 | * has freed any cluster pages it may have allocated |
755 | * during I/O. all that is left for us to do is |
756 | * clean up page "p" (which is still PG_BUSY). |
757 | * |
758 | * our result could be one of the following: |
759 | * VM_PAGER_OK: successful pageout |
760 | * |
761 | * VM_PAGER_AGAIN: tmp resource shortage, we skip |
762 | * to next page |
763 | * VM_PAGER_{FAIL,ERROR,BAD}: an error. we |
764 | * "reactivate" page to get it out of the way (it |
765 | * will eventually drift back into the inactive |
766 | * queue for a retry). |
767 | * VM_PAGER_UNLOCK: should never see this as it is |
768 | * only valid for "get" operations |
769 | */ |
770 | |
771 | /* relock p's object: page queues not lock yet, so |
772 | * no need for "try" */ |
773 | |
774 | /* !swap_backed case: already locked... */ |
775 | if (swap_backed) { |
776 | rw_enter(slock, RW_WRITE0x0001UL); |
777 | } |
778 | |
779 | #ifdef DIAGNOSTIC1 |
780 | if (result == VM_PAGER_UNLOCK6) |
781 | panic("pagedaemon: pageout returned " |
782 | "invalid 'unlock' code"); |
783 | #endif |
784 | |
785 | /* handle PG_WANTED now */ |
786 | if (p->pg_flags & PG_WANTED0x00000002) |
787 | wakeup(p); |
788 | |
789 | atomic_clearbits_intx86_atomic_clearbits_u32(&p->pg_flags, PG_BUSY0x00000001|PG_WANTED0x00000002); |
790 | UVM_PAGE_OWN(p, NULL); |
791 | |
792 | /* released during I/O? Can only happen for anons */ |
793 | if (p->pg_flags & PG_RELEASED0x00000020) { |
794 | KASSERT(anon != NULL)((anon != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_pdaemon.c" , 794, "anon != NULL")); |
795 | /* |
796 | * remove page so we can get nextpg, |
797 | * also zero out anon so we don't use |
798 | * it after the free. |
799 | */ |
800 | anon->an_page = NULL((void *)0); |
801 | p->uanon = NULL((void *)0); |
802 | |
803 | rw_exit(anon->an_lock); |
804 | uvm_anfree(anon)uvm_anfree_list((anon), ((void *)0)); /* kills anon */ |
805 | pmap_page_protect(p, PROT_NONE0x00); |
806 | anon = NULL((void *)0); |
807 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
808 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
809 | /* free released page */ |
810 | uvm_pagefree(p); |
811 | } else { /* page was not released during I/O */ |
812 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
813 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
814 | if (result != VM_PAGER_OK0) { |
815 | /* pageout was a failure... */ |
816 | if (result != VM_PAGER_AGAIN5) |
817 | uvm_pageactivate(p); |
818 | pmap_clear_reference(p)pmap_clear_attrs(p, 0x0000000000000020UL); |
819 | /* XXXCDC: if (swap_backed) FREE p's |
820 | * swap block? */ |
821 | } else { |
822 | /* pageout was a success... */ |
823 | pmap_clear_reference(p)pmap_clear_attrs(p, 0x0000000000000020UL); |
824 | pmap_clear_modify(p)pmap_clear_attrs(p, 0x0000000000000040UL); |
825 | atomic_setbits_intx86_atomic_setbits_u32(&p->pg_flags, |
826 | PG_CLEAN0x00000008); |
827 | } |
828 | } |
829 | |
830 | /* |
831 | * drop object lock (if there is an object left). do |
832 | * a safety check of nextpg to make sure it is on the |
833 | * inactive queue (it should be since PG_BUSY pages on |
834 | * the inactive queue can't be re-queued [note: not |
835 | * true for active queue]). |
836 | */ |
837 | rw_exit(slock); |
838 | |
839 | if (nextpg && (nextpg->pg_flags & PQ_INACTIVE0x00020000) == 0) { |
840 | nextpg = TAILQ_FIRST(pglst)((pglst)->tqh_first); /* reload! */ |
841 | } |
842 | } else { |
843 | /* |
844 | * if p is null in this loop, make sure it stays null |
845 | * in the next loop. |
846 | */ |
847 | nextpg = NULL((void *)0); |
848 | |
849 | /* |
850 | * lock page queues here just so they're always locked |
851 | * at the end of the loop. |
852 | */ |
853 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
854 | } |
855 | } |
856 | } |
857 | |
858 | /* |
859 | * uvmpd_scan: scan the page queues and attempt to meet our targets. |
860 | * |
861 | * => called with pageq's locked |
862 | */ |
863 | |
864 | void |
865 | uvmpd_scan(struct uvm_pmalloc *pma, struct uvm_constraint_range *constraint) |
866 | { |
867 | int free, inactive_shortage, swap_shortage, pages_freed; |
868 | struct vm_page *p, *nextpg; |
869 | struct rwlock *slock; |
870 | paddr_t paddr; |
871 | |
872 | MUTEX_ASSERT_LOCKED(&uvm.pageqlock)do { if (((&uvm.pageqlock)->mtx_owner != ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})) && !(panicstr || db_active)) panic("mutex %p not held in %s", (&uvm.pageqlock ), __func__); } while (0); |
873 | |
874 | uvmexp.pdrevs++; /* counter */ |
875 | |
876 | /* |
877 | * get current "free" page count |
878 | */ |
879 | free = uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages); |
Value stored to 'free' is never read | |
880 | |
881 | #ifdef __HAVE_PMAP_COLLECT |
882 | /* |
883 | * swap out some processes if we are below our free target. |
884 | * we need to unlock the page queues for this. |
885 | */ |
886 | if (free < uvmexp.freetarg) { |
887 | uvmexp.pdswout++; |
888 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); |
889 | uvm_swapout_threads(); |
890 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
891 | } |
892 | #endif |
893 | |
894 | /* |
895 | * now we want to work on meeting our targets. first we work on our |
896 | * free target by converting inactive pages into free pages. then |
897 | * we work on meeting our inactive target by converting active pages |
898 | * to inactive ones. |
899 | */ |
900 | |
901 | pages_freed = uvmexp.pdfreed; |
902 | (void) uvmpd_scan_inactive(pma, constraint, &uvm.page_inactive); |
903 | pages_freed = uvmexp.pdfreed - pages_freed; |
904 | |
905 | /* |
906 | * we have done the scan to get free pages. now we work on meeting |
907 | * our inactive target. |
908 | */ |
909 | inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT(((bcstats.numcleanpages - buflowpages) < 0) ? 0 : bcstats .numcleanpages - buflowpages); |
910 | |
911 | /* |
912 | * detect if we're not going to be able to page anything out |
913 | * until we free some swap resources from active pages. |
914 | */ |
915 | free = uvmexp.free - BUFPAGES_DEFICIT(((buflowpages - bcstats.numbufpages) < 0) ? 0 : buflowpages - bcstats.numbufpages); |
916 | swap_shortage = 0; |
917 | if (free < uvmexp.freetarg && uvm_swapisfilled() && !uvm_swapisfull() && |
918 | pages_freed == 0) { |
919 | swap_shortage = uvmexp.freetarg - free; |
920 | } |
921 | |
922 | for (p = TAILQ_FIRST(&uvm.page_active)((&uvm.page_active)->tqh_first); |
923 | p != NULL((void *)0) && (inactive_shortage > 0 || swap_shortage > 0); |
924 | p = nextpg) { |
925 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
926 | if (p->pg_flags & PG_BUSY0x00000001) { |
927 | continue; |
928 | } |
929 | |
930 | /* |
931 | * skip this page if it doesn't match the constraint. |
932 | */ |
933 | paddr = atop(VM_PAGE_TO_PHYS(p))((((p)->phys_addr)) >> 12); |
934 | if (paddr < constraint->ucr_low && |
935 | paddr >= constraint->ucr_high) |
936 | continue; |
937 | |
938 | /* |
939 | * lock the page's owner. |
940 | */ |
941 | slock = uvmpd_trylockowner(p); |
942 | if (slock == NULL((void *)0)) { |
943 | continue; |
944 | } |
945 | |
946 | /* |
947 | * skip this page if it's busy. |
948 | */ |
949 | if ((p->pg_flags & PG_BUSY0x00000001) != 0) { |
950 | rw_exit(slock); |
951 | continue; |
952 | } |
953 | |
954 | /* |
955 | * if there's a shortage of swap, free any swap allocated |
956 | * to this page so that other pages can be paged out. |
957 | */ |
958 | if (swap_shortage > 0) { |
959 | if ((p->pg_flags & PQ_ANON0x00100000) && p->uanon->an_swslot) { |
960 | uvm_swap_free(p->uanon->an_swslot, 1); |
961 | p->uanon->an_swslot = 0; |
962 | atomic_clearbits_intx86_atomic_clearbits_u32(&p->pg_flags, PG_CLEAN0x00000008); |
963 | swap_shortage--; |
964 | } |
965 | if (p->pg_flags & PQ_AOBJ0x00200000) { |
966 | int slot = uao_set_swslot(p->uobject, |
967 | p->offset >> PAGE_SHIFT12, 0); |
968 | if (slot) { |
969 | uvm_swap_free(slot, 1); |
970 | atomic_clearbits_intx86_atomic_clearbits_u32(&p->pg_flags, |
971 | PG_CLEAN0x00000008); |
972 | swap_shortage--; |
973 | } |
974 | } |
975 | } |
976 | |
977 | /* |
978 | * deactivate this page if there's a shortage of |
979 | * inactive pages. |
980 | */ |
981 | if (inactive_shortage > 0) { |
982 | pmap_page_protect(p, PROT_NONE0x00); |
983 | /* no need to check wire_count as pg is "active" */ |
984 | uvm_pagedeactivate(p); |
985 | uvmexp.pddeact++; |
986 | inactive_shortage--; |
987 | } |
988 | |
989 | /* |
990 | * we're done with this page. |
991 | */ |
992 | rw_exit(slock); |
993 | } |
994 | } |
995 | |
996 | #ifdef HIBERNATE1 |
997 | |
998 | /* |
999 | * uvmpd_drop: drop clean pages from list |
1000 | */ |
1001 | void |
1002 | uvmpd_drop(struct pglist *pglst) |
1003 | { |
1004 | struct vm_page *p, *nextpg; |
1005 | |
1006 | for (p = TAILQ_FIRST(pglst)((pglst)->tqh_first); p != NULL((void *)0); p = nextpg) { |
1007 | nextpg = TAILQ_NEXT(p, pageq)((p)->pageq.tqe_next); |
1008 | |
1009 | if (p->pg_flags & PQ_ANON0x00100000 || p->uobject == NULL((void *)0)) |
1010 | continue; |
1011 | |
1012 | if (p->pg_flags & PG_BUSY0x00000001) |
1013 | continue; |
1014 | |
1015 | if (p->pg_flags & PG_CLEAN0x00000008) { |
1016 | struct uvm_object * uobj = p->uobject; |
1017 | |
1018 | rw_enter(uobj->vmobjlock, RW_WRITE0x0001UL); |
1019 | uvm_lock_pageq()mtx_enter(&uvm.pageqlock); |
1020 | /* |
1021 | * we now have the page queues locked. |
1022 | * the page is not busy. if the page is clean we |
1023 | * can free it now and continue. |
1024 | */ |
1025 | if (p->pg_flags & PG_CLEAN0x00000008) { |
1026 | if (p->pg_flags & PQ_SWAPBACKED(0x00100000|0x00200000)) { |
1027 | /* this page now lives only in swap */ |
1028 | atomic_inc_int(&uvmexp.swpgonly)_atomic_inc_int(&uvmexp.swpgonly); |
1029 | } |
1030 | |
1031 | /* zap all mappings with pmap_page_protect... */ |
1032 | pmap_page_protect(p, PROT_NONE0x00); |
1033 | uvm_pagefree(p); |
1034 | } |
1035 | uvm_unlock_pageq()mtx_leave(&uvm.pageqlock); |
1036 | rw_exit(uobj->vmobjlock); |
1037 | } |
1038 | } |
1039 | } |
1040 | |
1041 | void |
1042 | uvmpd_hibernate(void) |
1043 | { |
1044 | uvmpd_drop(&uvm.page_inactive); |
1045 | uvmpd_drop(&uvm.page_active); |
1046 | } |
1047 | |
1048 | #endif |