Bug Summary

File:uvm/uvm_swap.c
Warning:line 1140, column 2
Value stored to 'error' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name uvm_swap.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/uvm/uvm_swap.c
1/* $OpenBSD: uvm_swap.c,v 1.152 2021/12/12 09:14:59 visa Exp $ */
2/* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */
3
4/*
5 * Copyright (c) 1995, 1996, 1997 Matthew R. Green
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
30 * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/buf.h>
36#include <sys/conf.h>
37#include <sys/proc.h>
38#include <sys/namei.h>
39#include <sys/disklabel.h>
40#include <sys/errno.h>
41#include <sys/kernel.h>
42#include <sys/malloc.h>
43#include <sys/vnode.h>
44#include <sys/fcntl.h>
45#include <sys/extent.h>
46#include <sys/mount.h>
47#include <sys/pool.h>
48#include <sys/syscallargs.h>
49#include <sys/swap.h>
50#include <sys/disk.h>
51#include <sys/task.h>
52#include <sys/pledge.h>
53#if defined(NFSCLIENT1)
54#include <sys/socket.h>
55#include <sys/domain.h>
56#include <netinet/in.h>
57#include <nfs/nfsproto.h>
58#include <nfs/nfsdiskless.h>
59#endif
60
61#include <uvm/uvm.h>
62#ifdef UVM_SWAP_ENCRYPT1
63#include <uvm/uvm_swap_encrypt.h>
64#endif
65
66#include <sys/specdev.h>
67
68#include "vnd.h"
69
70/*
71 * uvm_swap.c: manage configuration and i/o to swap space.
72 */
73
74/*
75 * swap space is managed in the following way:
76 *
77 * each swap partition or file is described by a "swapdev" structure.
78 * each "swapdev" structure contains a "swapent" structure which contains
79 * information that is passed up to the user (via system calls).
80 *
81 * each swap partition is assigned a "priority" (int) which controls
82 * swap partition usage.
83 *
84 * the system maintains a global data structure describing all swap
85 * partitions/files. there is a sorted LIST of "swappri" structures
86 * which describe "swapdev"'s at that priority. this LIST is headed
87 * by the "swap_priority" global var. each "swappri" contains a
88 * TAILQ of "swapdev" structures at that priority.
89 *
90 * locking:
91 * - swap_syscall_lock (sleep lock): this lock serializes the swapctl
92 * system call and prevents the swap priority list from changing
93 * while we are in the middle of a system call (e.g. SWAP_STATS).
94 *
95 * each swap device has the following info:
96 * - swap device in use (could be disabled, preventing future use)
97 * - swap enabled (allows new allocations on swap)
98 * - map info in /dev/drum
99 * - vnode pointer
100 * for swap files only:
101 * - block size
102 * - max byte count in buffer
103 * - buffer
104 * - credentials to use when doing i/o to file
105 *
106 * userland controls and configures swap with the swapctl(2) system call.
107 * the sys_swapctl performs the following operations:
108 * [1] SWAP_NSWAP: returns the number of swap devices currently configured
109 * [2] SWAP_STATS: given a pointer to an array of swapent structures
110 * (passed in via "arg") of a size passed in via "misc" ... we load
111 * the current swap config into the array.
112 * [3] SWAP_ON: given a pathname in arg (could be device or file) and a
113 * priority in "misc", start swapping on it.
114 * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
115 * [5] SWAP_CTL: changes the priority of a swap device (new priority in
116 * "misc")
117 */
118
119/*
120 * swapdev: describes a single swap partition/file
121 *
122 * note the following should be true:
123 * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks]
124 * swd_nblks <= swd_mapsize [because mapsize includes disklabel]
125 */
126struct swapdev {
127 struct swapent swd_se;
128#define swd_devswd_se.se_dev swd_se.se_dev /* device id */
129#define swd_flagsswd_se.se_flags swd_se.se_flags /* flags:inuse/enable/fake */
130#define swd_priorityswd_se.se_priority swd_se.se_priority /* our priority */
131#define swd_inuseswd_se.se_inuse swd_se.se_inuse /* blocks used */
132#define swd_nblksswd_se.se_nblks swd_se.se_nblks /* total blocks */
133 char *swd_path; /* saved pathname of device */
134 int swd_pathlen; /* length of pathname */
135 int swd_npages; /* #pages we can use */
136 int swd_npginuse; /* #pages in use */
137 int swd_npgbad; /* #pages bad */
138 int swd_drumoffset; /* page0 offset in drum */
139 int swd_drumsize; /* #pages in drum */
140 struct extent *swd_ex; /* extent for this swapdev */
141 char swd_exname[12]; /* name of extent above */
142 struct vnode *swd_vp; /* backing vnode */
143 TAILQ_ENTRY(swapdev)struct { struct swapdev *tqe_next; struct swapdev **tqe_prev;
}
swd_next; /* priority tailq */
144
145 int swd_bsize; /* blocksize (bytes) */
146 int swd_maxactive; /* max active i/o reqs */
147 int swd_active; /* # of active i/o reqs */
148 struct bufq swd_bufq;
149 struct ucred *swd_cred; /* cred for file access */
150#ifdef UVM_SWAP_ENCRYPT1
151#define SWD_KEY_SHIFT7 7 /* One key per 0.5 MByte */
152#define SWD_KEY(x,y)&((x)->swd_keys[((y) - (x)->swd_drumoffset) >>
7])
&((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT7])
153#define SWD_KEY_SIZE(x)(((x) + (1 << 7) - 1) >> 7) (((x) + (1 << SWD_KEY_SHIFT7) - 1) >> SWD_KEY_SHIFT7)
154
155#define SWD_DCRYPT_SHIFT5 5
156#define SWD_DCRYPT_BITS32 32
157#define SWD_DCRYPT_MASK(32 - 1) (SWD_DCRYPT_BITS32 - 1)
158#define SWD_DCRYPT_OFF(x)((x) >> 5) ((x) >> SWD_DCRYPT_SHIFT5)
159#define SWD_DCRYPT_BIT(x)((x) & (32 - 1)) ((x) & SWD_DCRYPT_MASK(32 - 1))
160#define SWD_DCRYPT_SIZE(x)((((x) + (32 - 1)) >> 5) * sizeof(u_int32_t)) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK)(((x) + (32 - 1)) >> 5) * sizeof(u_int32_t))
161 u_int32_t *swd_decrypt; /* bitmap for decryption */
162 struct swap_key *swd_keys; /* keys for different parts */
163#endif
164};
165
166/*
167 * swap device priority entry; the list is kept sorted on `spi_priority'.
168 */
169struct swappri {
170 int spi_priority; /* priority */
171 TAILQ_HEAD(spi_swapdev, swapdev)struct spi_swapdev { struct swapdev *tqh_first; struct swapdev
**tqh_last; }
spi_swapdev;
172 /* tailq of swapdevs at this priority */
173 LIST_ENTRY(swappri)struct { struct swappri *le_next; struct swappri **le_prev; } spi_swappri; /* global list of pri's */
174};
175
176/*
177 * The following two structures are used to keep track of data transfers
178 * on swap devices associated with regular files.
179 * NOTE: this code is more or less a copy of vnd.c; we use the same
180 * structure names here to ease porting..
181 */
182struct vndxfer {
183 struct buf *vx_bp; /* Pointer to parent buffer */
184 struct swapdev *vx_sdp;
185 int vx_error;
186 int vx_pending; /* # of pending aux buffers */
187 int vx_flags;
188#define VX_BUSY1 1
189#define VX_DEAD2 2
190};
191
192struct vndbuf {
193 struct buf vb_buf;
194 struct vndxfer *vb_vnx;
195 struct task vb_task;
196};
197
198/*
199 * We keep a of pool vndbuf's and vndxfer structures.
200 */
201struct pool vndxfer_pool;
202struct pool vndbuf_pool;
203
204
205/*
206 * local variables
207 */
208struct extent *swapmap; /* controls the mapping of /dev/drum */
209
210/* list of all active swap devices [by priority] */
211LIST_HEAD(swap_priority, swappri)struct swap_priority { struct swappri *lh_first; };
212struct swap_priority swap_priority;
213
214/* locks */
215struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk"){ 0, "swplk" };
216
217/*
218 * prototypes
219 */
220void swapdrum_add(struct swapdev *, int);
221struct swapdev *swapdrum_getsdp(int);
222
223struct swapdev *swaplist_find(struct vnode *, int);
224void swaplist_insert(struct swapdev *,
225 struct swappri *, int);
226void swaplist_trim(void);
227
228int swap_on(struct proc *, struct swapdev *);
229int swap_off(struct proc *, struct swapdev *);
230
231void sw_reg_strategy(struct swapdev *, struct buf *, int);
232void sw_reg_iodone(struct buf *);
233void sw_reg_iodone_internal(void *);
234void sw_reg_start(struct swapdev *);
235
236int uvm_swap_io(struct vm_page **, int, int, int);
237
238void swapmount(void);
239boolean_t uvm_swap_allocpages(struct vm_page **, int);
240
241#ifdef UVM_SWAP_ENCRYPT1
242/* for swap encrypt */
243void uvm_swap_markdecrypt(struct swapdev *, int, int, int);
244boolean_t uvm_swap_needdecrypt(struct swapdev *, int);
245void uvm_swap_initcrypt(struct swapdev *, int);
246#endif
247
248/*
249 * uvm_swap_init: init the swap system data structures and locks
250 *
251 * => called at boot time from init_main.c after the filesystems
252 * are brought up (which happens after uvm_init())
253 */
254void
255uvm_swap_init(void)
256{
257 /*
258 * first, init the swap list, its counter, and its lock.
259 * then get a handle on the vnode for /dev/drum by using
260 * the its dev_t number ("swapdev", from MD conf.c).
261 */
262 LIST_INIT(&swap_priority)do { ((&swap_priority)->lh_first) = ((void *)0); } while
(0)
;
263 uvmexp.nswapdev = 0;
264
265 if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp))
266 panic("uvm_swap_init: can't get vnode for swap device");
267
268 /*
269 * create swap block extent to map /dev/drum. The extent spans
270 * 1 to INT_MAX allows 2 gigablocks of swap space. Note that
271 * block 0 is reserved (used to indicate an allocation failure,
272 * or no allocation).
273 */
274 swapmap = extent_create("swapmap", 1, INT_MAX0x7fffffff,
275 M_VMSWAP92, 0, 0, EX_NOWAIT0x0000);
276 if (swapmap == 0)
277 panic("uvm_swap_init: extent_create failed");
278
279 /* allocate pools for structures used for swapping to files. */
280 pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, IPL_BIO0x6, 0,
281 "swp vnx", NULL((void *)0));
282 pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, IPL_BIO0x6, 0,
283 "swp vnd", NULL((void *)0));
284
285 /* Setup the initial swap partition */
286 swapmount();
287}
288
289#ifdef UVM_SWAP_ENCRYPT1
290void
291uvm_swap_initcrypt_all(void)
292{
293 struct swapdev *sdp;
294 struct swappri *spp;
295 int npages;
296
297
298 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
299 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
300 if (sdp->swd_decrypt == NULL((void *)0)) {
301 npages = dbtob((uint64_t)sdp->swd_nblks)(((uint64_t)sdp->swd_se.se_nblks) << 9) >>
302 PAGE_SHIFT12;
303 uvm_swap_initcrypt(sdp, npages);
304 }
305 }
306 }
307}
308
309void
310uvm_swap_initcrypt(struct swapdev *sdp, int npages)
311{
312 /*
313 * keep information if a page needs to be decrypted when we get it
314 * from the swap device.
315 * We cannot chance a malloc later, if we are doing ASYNC puts,
316 * we may not call malloc with M_WAITOK. This consumes only
317 * 8KB memory for a 256MB swap partition.
318 */
319 sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages)((((npages) + (32 - 1)) >> 5) * sizeof(u_int32_t)), M_VMSWAP92,
320 M_WAITOK0x0001|M_ZERO0x0008);
321 sdp->swd_keys = mallocarray(SWD_KEY_SIZE(npages)(((npages) + (1 << 7) - 1) >> 7),
322 sizeof(struct swap_key), M_VMSWAP92, M_WAITOK0x0001|M_ZERO0x0008);
323}
324
325#endif /* UVM_SWAP_ENCRYPT */
326
327boolean_t
328uvm_swap_allocpages(struct vm_page **pps, int npages)
329{
330 struct pglist pgl;
331 int i;
332 boolean_t fail;
333
334 /* Estimate if we will succeed */
335 uvm_lock_fpageq()mtx_enter(&uvm.fpageqlock);
336
337 fail = uvmexp.free - npages < uvmexp.reserve_kernel;
338
339 uvm_unlock_fpageq()mtx_leave(&uvm.fpageqlock);
340
341 if (fail)
342 return FALSE0;
343
344 TAILQ_INIT(&pgl)do { (&pgl)->tqh_first = ((void *)0); (&pgl)->tqh_last
= &(&pgl)->tqh_first; } while (0)
;
345 if (uvm_pglistalloc(npages * PAGE_SIZE(1 << 12), dma_constraint.ucr_low,
346 dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_NOWAIT0x0002))
347 return FALSE0;
348
349 for (i = 0; i < npages; i++) {
350 pps[i] = TAILQ_FIRST(&pgl)((&pgl)->tqh_first);
351 /* *sigh* */
352 atomic_setbits_intx86_atomic_setbits_u32(&pps[i]->pg_flags, PG_BUSY0x00000001);
353 TAILQ_REMOVE(&pgl, pps[i], pageq)do { if (((pps[i])->pageq.tqe_next) != ((void *)0)) (pps[i
])->pageq.tqe_next->pageq.tqe_prev = (pps[i])->pageq
.tqe_prev; else (&pgl)->tqh_last = (pps[i])->pageq.
tqe_prev; *(pps[i])->pageq.tqe_prev = (pps[i])->pageq.tqe_next
; ((pps[i])->pageq.tqe_prev) = ((void *)-1); ((pps[i])->
pageq.tqe_next) = ((void *)-1); } while (0)
;
354 }
355
356 return TRUE1;
357}
358
359void
360uvm_swap_freepages(struct vm_page **pps, int npages)
361{
362 int i;
363
364 uvm_lock_pageq()mtx_enter(&uvm.pageqlock);
365 for (i = 0; i < npages; i++)
366 uvm_pagefree(pps[i]);
367 uvm_unlock_pageq()mtx_leave(&uvm.pageqlock);
368}
369
370#ifdef UVM_SWAP_ENCRYPT1
371/*
372 * Mark pages on the swap device for later decryption
373 */
374
375void
376uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages,
377 int decrypt)
378{
379 int pagestart, i;
380 int off, bit;
381
382 if (!sdp)
383 return;
384
385 pagestart = startslot - sdp->swd_drumoffset;
386 for (i = 0; i < npages; i++, pagestart++) {
387 off = SWD_DCRYPT_OFF(pagestart)((pagestart) >> 5);
388 bit = SWD_DCRYPT_BIT(pagestart)((pagestart) & (32 - 1));
389 if (decrypt)
390 /* pages read need decryption */
391 sdp->swd_decrypt[off] |= 1 << bit;
392 else
393 /* pages read do not need decryption */
394 sdp->swd_decrypt[off] &= ~(1 << bit);
395 }
396}
397
398/*
399 * Check if the page that we got from disk needs to be decrypted
400 */
401
402boolean_t
403uvm_swap_needdecrypt(struct swapdev *sdp, int off)
404{
405 if (!sdp)
406 return FALSE0;
407
408 off -= sdp->swd_drumoffset;
409 return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)((off) >> 5)] & (1 << SWD_DCRYPT_BIT(off)((off) & (32 - 1))) ?
410 TRUE1 : FALSE0;
411}
412
413void
414uvm_swap_finicrypt_all(void)
415{
416 struct swapdev *sdp;
417 struct swappri *spp;
418 struct swap_key *key;
419 unsigned int nkeys;
420
421 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
422 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
423 if (sdp->swd_decrypt == NULL((void *)0))
424 continue;
425
426 nkeys = dbtob((uint64_t)sdp->swd_nblks)(((uint64_t)sdp->swd_se.se_nblks) << 9) >> PAGE_SHIFT12;
427 key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys)(((nkeys) + (1 << 7) - 1) >> 7) - 1);
428 do {
429 if (key->refcount != 0)
430 swap_key_delete(key);
431 } while (key-- != sdp->swd_keys);
432 }
433 }
434}
435#endif /* UVM_SWAP_ENCRYPT */
436
437/*
438 * swaplist functions: functions that operate on the list of swap
439 * devices on the system.
440 */
441
442/*
443 * swaplist_insert: insert swap device "sdp" into the global list
444 *
445 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
446 * => caller must provide a newly malloc'd swappri structure (we will
447 * FREE it if we don't need it... this it to prevent malloc blocking
448 * here while adding swap)
449 */
450void
451swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority)
452{
453 struct swappri *spp, *pspp;
454
455 /*
456 * find entry at or after which to insert the new device.
457 */
458 pspp = NULL((void *)0);
459 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
460 if (priority <= spp->spi_priority)
461 break;
462 pspp = spp;
463 }
464
465 /*
466 * new priority?
467 */
468 if (spp == NULL((void *)0) || spp->spi_priority != priority) {
469 spp = newspp; /* use newspp! */
470
471 spp->spi_priority = priority;
472 TAILQ_INIT(&spp->spi_swapdev)do { (&spp->spi_swapdev)->tqh_first = ((void *)0); (
&spp->spi_swapdev)->tqh_last = &(&spp->spi_swapdev
)->tqh_first; } while (0)
;
473
474 if (pspp)
475 LIST_INSERT_AFTER(pspp, spp, spi_swappri)do { if (((spp)->spi_swappri.le_next = (pspp)->spi_swappri
.le_next) != ((void *)0)) (pspp)->spi_swappri.le_next->
spi_swappri.le_prev = &(spp)->spi_swappri.le_next; (pspp
)->spi_swappri.le_next = (spp); (spp)->spi_swappri.le_prev
= &(pspp)->spi_swappri.le_next; } while (0)
;
476 else
477 LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri)do { if (((spp)->spi_swappri.le_next = (&swap_priority
)->lh_first) != ((void *)0)) (&swap_priority)->lh_first
->spi_swappri.le_prev = &(spp)->spi_swappri.le_next
; (&swap_priority)->lh_first = (spp); (spp)->spi_swappri
.le_prev = &(&swap_priority)->lh_first; } while (0
)
;
478 } else {
479 /* we don't need a new priority structure, free it */
480 free(newspp, M_VMSWAP92, sizeof(*newspp));
481 }
482
483 /*
484 * priority found (or created). now insert on the priority's
485 * tailq list and bump the total number of swapdevs.
486 */
487 sdp->swd_priorityswd_se.se_priority = priority;
488 TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next)do { (sdp)->swd_next.tqe_next = ((void *)0); (sdp)->swd_next
.tqe_prev = (&spp->spi_swapdev)->tqh_last; *(&spp
->spi_swapdev)->tqh_last = (sdp); (&spp->spi_swapdev
)->tqh_last = &(sdp)->swd_next.tqe_next; } while (0
)
;
489 uvmexp.nswapdev++;
490}
491
492/*
493 * swaplist_find: find and optionally remove a swap device from the
494 * global list.
495 *
496 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
497 * => we return the swapdev we found (and removed)
498 */
499struct swapdev *
500swaplist_find(struct vnode *vp, boolean_t remove)
501{
502 struct swapdev *sdp;
503 struct swappri *spp;
504
505 /*
506 * search the lists for the requested vp
507 */
508 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
509 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
510 if (sdp->swd_vp != vp)
511 continue;
512 if (remove) {
513 TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next)do { if (((sdp)->swd_next.tqe_next) != ((void *)0)) (sdp)->
swd_next.tqe_next->swd_next.tqe_prev = (sdp)->swd_next.
tqe_prev; else (&spp->spi_swapdev)->tqh_last = (sdp
)->swd_next.tqe_prev; *(sdp)->swd_next.tqe_prev = (sdp)
->swd_next.tqe_next; ((sdp)->swd_next.tqe_prev) = ((void
*)-1); ((sdp)->swd_next.tqe_next) = ((void *)-1); } while
(0)
;
514 uvmexp.nswapdev--;
515 }
516 return (sdp);
517 }
518 }
519 return (NULL((void *)0));
520}
521
522
523/*
524 * swaplist_trim: scan priority list for empty priority entries and kill
525 * them.
526 *
527 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
528 */
529void
530swaplist_trim(void)
531{
532 struct swappri *spp, *nextspp;
533
534 LIST_FOREACH_SAFE(spp, &swap_priority, spi_swappri, nextspp)for ((spp) = ((&swap_priority)->lh_first); (spp) &&
((nextspp) = ((spp)->spi_swappri.le_next), 1); (spp) = (nextspp
))
{
535 if (!TAILQ_EMPTY(&spp->spi_swapdev)(((&spp->spi_swapdev)->tqh_first) == ((void *)0)))
536 continue;
537 LIST_REMOVE(spp, spi_swappri)do { if ((spp)->spi_swappri.le_next != ((void *)0)) (spp)->
spi_swappri.le_next->spi_swappri.le_prev = (spp)->spi_swappri
.le_prev; *(spp)->spi_swappri.le_prev = (spp)->spi_swappri
.le_next; ((spp)->spi_swappri.le_prev) = ((void *)-1); ((spp
)->spi_swappri.le_next) = ((void *)-1); } while (0)
;
538 free(spp, M_VMSWAP92, sizeof(*spp));
539 }
540}
541
542/*
543 * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
544 *
545 * => caller must hold swap_syscall_lock
546 * => uvm.swap_data_lock should be unlocked (we may sleep)
547 */
548void
549swapdrum_add(struct swapdev *sdp, int npages)
550{
551 u_long result;
552
553 if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,extent_alloc_subregion((swapmap), (swapmap)->ex_start, (swapmap
)->ex_end, (npages), (1), (0), (0), (0x0001), (&result
))
554 EX_WAITOK, &result)extent_alloc_subregion((swapmap), (swapmap)->ex_start, (swapmap
)->ex_end, (npages), (1), (0), (0), (0x0001), (&result
))
)
555 panic("swapdrum_add");
556
557 sdp->swd_drumoffset = result;
558 sdp->swd_drumsize = npages;
559}
560
561/*
562 * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
563 * to the "swapdev" that maps that section of the drum.
564 *
565 * => each swapdev takes one big contig chunk of the drum
566 * => caller must hold uvm.swap_data_lock
567 */
568struct swapdev *
569swapdrum_getsdp(int pgno)
570{
571 struct swapdev *sdp;
572 struct swappri *spp;
573
574 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
575 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
576 if (pgno >= sdp->swd_drumoffset &&
577 pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
578 return sdp;
579 }
580 }
581 }
582 return NULL((void *)0);
583}
584
585
586/*
587 * sys_swapctl: main entry point for swapctl(2) system call
588 * [with two helper functions: swap_on and swap_off]
589 */
590int
591sys_swapctl(struct proc *p, void *v, register_t *retval)
592{
593 struct sys_swapctl_args /* {
594 syscallarg(int) cmd;
595 syscallarg(void *) arg;
596 syscallarg(int) misc;
597 } */ *uap = (struct sys_swapctl_args *)v;
598 struct vnode *vp;
599 struct nameidata nd;
600 struct swappri *spp;
601 struct swapdev *sdp;
602 struct swapent *sep;
603 char userpath[MAXPATHLEN1024];
604 size_t len;
605 int count, error, misc;
606 int priority;
607
608 misc = SCARG(uap, misc)((uap)->misc.le.datum);
609
610 /*
611 * ensure serialized syscall access by grabbing the swap_syscall_lock
612 */
613 rw_enter_write(&swap_syscall_lock);
614
615 /*
616 * we handle the non-priv NSWAP and STATS request first.
617 *
618 * SWAP_NSWAP: return number of config'd swap devices
619 * [can also be obtained with uvmexp sysctl]
620 */
621 if (SCARG(uap, cmd)((uap)->cmd.le.datum) == SWAP_NSWAP3) {
622 *retval = uvmexp.nswapdev;
623 error = 0;
624 goto out;
625 }
626
627 /*
628 * SWAP_STATS: get stats on current # of configured swap devs
629 *
630 * note that the swap_priority list can't change as long
631 * as we are holding the swap_syscall_lock. we don't want
632 * to grab the uvm.swap_data_lock because we may fault&sleep during
633 * copyout() and we don't want to be holding that lock then!
634 */
635 if (SCARG(uap, cmd)((uap)->cmd.le.datum) == SWAP_STATS4) {
636 sep = (struct swapent *)SCARG(uap, arg)((uap)->arg.le.datum);
637 count = 0;
638
639 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
640 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
641 if (count >= misc)
642 continue;
643
644 sdp->swd_inuseswd_se.se_inuse =
645 btodb((u_int64_t)sdp->swd_npginuse <<(((u_int64_t)sdp->swd_npginuse << 12) >> 9)
646 PAGE_SHIFT)(((u_int64_t)sdp->swd_npginuse << 12) >> 9);
647 error = copyout(&sdp->swd_se, sep,
648 sizeof(struct swapent));
649 if (error)
650 goto out;
651
652 /* now copy out the path if necessary */
653 error = copyoutstr(sdp->swd_path,
654 sep->se_path, sizeof(sep->se_path), NULL((void *)0));
655 if (error)
656 goto out;
657
658 count++;
659 sep++;
660 }
661 }
662
663 *retval = count;
664 error = 0;
665 goto out;
666 }
667
668 /* all other requests require superuser privs. verify. */
669 if ((error = suser(p)) || (error = pledge_swapctl(p)))
670 goto out;
671
672 /*
673 * at this point we expect a path name in arg. we will
674 * use namei() to gain a vnode reference (vref), and lock
675 * the vnode (VOP_LOCK).
676 */
677 error = copyinstr(SCARG(uap, arg)((uap)->arg.le.datum), userpath, sizeof(userpath), &len);
678 if (error)
679 goto out;
680 disk_map(userpath, userpath, sizeof(userpath), DM_OPENBLCK0x2);
681 NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, userpath, p)ndinitat(&nd, 0, 0x0040|0x0004, UIO_SYSSPACE, -100, userpath
, p)
;
682 if ((error = namei(&nd)))
683 goto out;
684 vp = nd.ni_vp;
685 /* note: "vp" is referenced and locked */
686
687 error = 0; /* assume no error */
688 switch(SCARG(uap, cmd)((uap)->cmd.le.datum)) {
689 case SWAP_DUMPDEV7:
690 if (vp->v_type != VBLK) {
691 error = ENOTBLK15;
692 break;
693 }
694 dumpdev = vp->v_rdevv_un.vu_specinfo->si_rdev;
695 break;
696 case SWAP_CTL5:
697 /*
698 * get new priority, remove old entry (if any) and then
699 * reinsert it in the correct place. finally, prune out
700 * any empty priority structures.
701 */
702 priority = SCARG(uap, misc)((uap)->misc.le.datum);
703 spp = malloc(sizeof *spp, M_VMSWAP92, M_WAITOK0x0001);
704 if ((sdp = swaplist_find(vp, 1)) == NULL((void *)0)) {
705 error = ENOENT2;
706 } else {
707 swaplist_insert(sdp, spp, priority);
708 swaplist_trim();
709 }
710 if (error)
711 free(spp, M_VMSWAP92, sizeof(*spp));
712 break;
713 case SWAP_ON1:
714 /*
715 * If the device is a regular file, make sure the filesystem
716 * can be used for swapping.
717 */
718 if (vp->v_type == VREG &&
719 (vp->v_mount->mnt_flag & MNT_SWAPPABLE0x00200000) == 0) {
720 error = ENOTSUP91;
721 break;
722 }
723
724 /*
725 * check for duplicates. if none found, then insert a
726 * dummy entry on the list to prevent someone else from
727 * trying to enable this device while we are working on
728 * it.
729 */
730 priority = SCARG(uap, misc)((uap)->misc.le.datum);
731 if ((sdp = swaplist_find(vp, 0)) != NULL((void *)0)) {
732 error = EBUSY16;
733 break;
734 }
735 sdp = malloc(sizeof *sdp, M_VMSWAP92, M_WAITOK0x0001|M_ZERO0x0008);
736 spp = malloc(sizeof *spp, M_VMSWAP92, M_WAITOK0x0001);
737 sdp->swd_flagsswd_se.se_flags = SWF_FAKE0x00000008; /* placeholder only */
738 sdp->swd_vp = vp;
739 sdp->swd_devswd_se.se_dev = (vp->v_type == VBLK) ? vp->v_rdevv_un.vu_specinfo->si_rdev : NODEV(dev_t)(-1);
740
741 /*
742 * XXX Is NFS elaboration necessary?
743 */
744 if (vp->v_type == VREG) {
745 sdp->swd_cred = crdup(p->p_ucred);
746 }
747
748 swaplist_insert(sdp, spp, priority);
749
750 sdp->swd_pathlen = len;
751 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP92, M_WAITOK0x0001);
752 strlcpy(sdp->swd_path, userpath, len);
753
754 /*
755 * we've now got a FAKE placeholder in the swap list.
756 * now attempt to enable swap on it. if we fail, undo
757 * what we've done and kill the fake entry we just inserted.
758 * if swap_on is a success, it will clear the SWF_FAKE flag
759 */
760
761 if ((error = swap_on(p, sdp)) != 0) {
762 (void) swaplist_find(vp, 1); /* kill fake entry */
763 swaplist_trim();
764 if (vp->v_type == VREG) {
765 crfree(sdp->swd_cred);
766 }
767 free(sdp->swd_path, M_VMSWAP92, sdp->swd_pathlen);
768 free(sdp, M_VMSWAP92, sizeof(*sdp));
769 break;
770 }
771 break;
772 case SWAP_OFF2:
773 if ((sdp = swaplist_find(vp, 0)) == NULL((void *)0)) {
774 error = ENXIO6;
775 break;
776 }
777
778 /*
779 * If a device isn't in use or enabled, we
780 * can't stop swapping from it (again).
781 */
782 if ((sdp->swd_flagsswd_se.se_flags & (SWF_INUSE0x00000001|SWF_ENABLE0x00000002)) == 0) {
783 error = EBUSY16;
784 break;
785 }
786
787 /*
788 * do the real work.
789 */
790 error = swap_off(p, sdp);
791 break;
792 default:
793 error = EINVAL22;
794 }
795
796 /* done! release the ref gained by namei() and unlock. */
797 vput(vp);
798
799out:
800 rw_exit_write(&swap_syscall_lock);
801
802 return (error);
803}
804
805/*
806 * swap_on: attempt to enable a swapdev for swapping. note that the
807 * swapdev is already on the global list, but disabled (marked
808 * SWF_FAKE).
809 *
810 * => we avoid the start of the disk (to protect disk labels)
811 * => caller should leave uvm.swap_data_lock unlocked, we may lock it
812 * if needed.
813 */
814int
815swap_on(struct proc *p, struct swapdev *sdp)
816{
817 static int count = 0; /* static */
818 struct vnode *vp;
819 int error, npages, nblocks, size;
820 long addr;
821 struct vattr va;
822#if defined(NFSCLIENT1)
823 extern const struct vops nfs_vops;
824#endif /* defined(NFSCLIENT) */
825 dev_t dev;
826
827 /*
828 * we want to enable swapping on sdp. the swd_vp contains
829 * the vnode we want (locked and ref'd), and the swd_dev
830 * contains the dev_t of the file, if it a block device.
831 */
832
833 vp = sdp->swd_vp;
834 dev = sdp->swd_devswd_se.se_dev;
835
836#if NVND1 > 0
837 /* no swapping to vnds. */
838 if (bdevsw[major(dev)(((unsigned)(dev) >> 8) & 0xff)].d_strategy == vndstrategy)
839 return (EOPNOTSUPP45);
840#endif
841
842 /*
843 * open the swap file (mostly useful for block device files to
844 * let device driver know what is up).
845 *
846 * we skip the open/close for root on swap because the root
847 * has already been opened when root was mounted (mountroot).
848 */
849 if (vp != rootvp) {
850 if ((error = VOP_OPEN(vp, FREAD0x0001|FWRITE0x0002, p->p_ucred, p)))
851 return (error);
852 }
853
854 /* XXX this only works for block devices */
855 /*
856 * we now need to determine the size of the swap area. for
857 * block specials we can call the d_psize function.
858 * for normal files, we must stat [get attrs].
859 *
860 * we put the result in nblks.
861 * for normal files, we also want the filesystem block size
862 * (which we get with statfs).
863 */
864 switch (vp->v_type) {
865 case VBLK:
866 if (bdevsw[major(dev)(((unsigned)(dev) >> 8) & 0xff)].d_psize == 0 ||
867 (nblocks = (*bdevsw[major(dev)(((unsigned)(dev) >> 8) & 0xff)].d_psize)(dev)) == -1) {
868 error = ENXIO6;
869 goto bad;
870 }
871 break;
872
873 case VREG:
874 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
875 goto bad;
876 nblocks = (int)btodb(va.va_size)((va.va_size) >> 9);
877 if ((error =
878 VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)(*(vp->v_mount)->mnt_op->vfs_statfs)(vp->v_mount,
&vp->v_mount->mnt_stat, p)
) != 0)
879 goto bad;
880
881 sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
882 /*
883 * limit the max # of outstanding I/O requests we issue
884 * at any one time. take it easy on NFS servers.
885 */
886#if defined(NFSCLIENT1)
887 if (vp->v_op == &nfs_vops)
888 sdp->swd_maxactive = 2; /* XXX */
889 else
890#endif /* defined(NFSCLIENT) */
891 sdp->swd_maxactive = 8; /* XXX */
892 bufq_init(&sdp->swd_bufq, BUFQ_FIFO0);
893 break;
894
895 default:
896 error = ENXIO6;
897 goto bad;
898 }
899
900 /*
901 * save nblocks in a safe place and convert to pages.
902 */
903
904 sdp->swd_nblksswd_se.se_nblks = nblocks;
905 npages = dbtob((u_int64_t)nblocks)(((u_int64_t)nblocks) << 9) >> PAGE_SHIFT12;
906
907 /*
908 * for block special files, we want to make sure that leave
909 * the disklabel and bootblocks alone, so we arrange to skip
910 * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
911 * note that because of this the "size" can be less than the
912 * actual number of blocks on the device.
913 */
914 if (vp->v_type == VBLK) {
915 /* we use pages 1 to (size - 1) [inclusive] */
916 size = npages - 1;
917 addr = 1;
918 } else {
919 /* we use pages 0 to (size - 1) [inclusive] */
920 size = npages;
921 addr = 0;
922 }
923
924 /*
925 * make sure we have enough blocks for a reasonable sized swap
926 * area. we want at least one page.
927 */
928
929 if (size < 1) {
930 error = EINVAL22;
931 goto bad;
932 }
933
934 /*
935 * now we need to allocate an extent to manage this swap device
936 */
937 snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x",
938 count++);
939
940 /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
941 sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP92,
942 0, 0, EX_WAITOK0x0001);
943 /* allocate the `saved' region from the extent so it won't be used */
944 if (addr) {
945 if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK0x0001))
946 panic("disklabel reserve");
947 /* XXX: is extent synchronized with swd_npginuse? */
948 }
949#ifdef HIBERNATE1
950 /*
951 * Lock down the last region of primary disk swap, in case
952 * hibernate needs to place a signature there.
953 */
954 if (dev == swdevt[0].sw_dev && vp->v_type == VBLK && size > 3 ) {
955 if (extent_alloc_region(sdp->swd_ex,
956 npages - 1 - 1, 1, EX_WAITOK0x0001))
957 panic("hibernate reserve");
958 /* XXX: is extent synchronized with swd_npginuse? */
959 }
960#endif
961
962 /* add a ref to vp to reflect usage as a swap device. */
963 vref(vp);
964
965#ifdef UVM_SWAP_ENCRYPT1
966 if (uvm_doswapencrypt)
967 uvm_swap_initcrypt(sdp, npages);
968#endif
969 /* now add the new swapdev to the drum and enable. */
970 swapdrum_add(sdp, npages);
971 sdp->swd_npages = size;
972 sdp->swd_flagsswd_se.se_flags &= ~SWF_FAKE0x00000008; /* going live */
973 sdp->swd_flagsswd_se.se_flags |= (SWF_INUSE0x00000001|SWF_ENABLE0x00000002);
974 uvmexp.swpages += size;
975 return (0);
976
977bad:
978 /* failure: close device if necessary and return error. */
979 if (vp != rootvp)
980 (void)VOP_CLOSE(vp, FREAD0x0001|FWRITE0x0002, p->p_ucred, p);
981 return (error);
982}
983
984/*
985 * swap_off: stop swapping on swapdev
986 *
987 * => swap data should be locked, we will unlock.
988 */
989int
990swap_off(struct proc *p, struct swapdev *sdp)
991{
992 int error = 0;
993
994 /* disable the swap area being removed */
995 sdp->swd_flagsswd_se.se_flags &= ~SWF_ENABLE0x00000002;
996
997 /*
998 * the idea is to find all the pages that are paged out to this
999 * device, and page them all in. in uvm, swap-backed pageable
1000 * memory can take two forms: aobjs and anons. call the
1001 * swapoff hook for each subsystem to bring in pages.
1002 */
1003
1004 if (uao_swap_off(sdp->swd_drumoffset,
1005 sdp->swd_drumoffset + sdp->swd_drumsize) ||
1006 amap_swap_off(sdp->swd_drumoffset,
1007 sdp->swd_drumoffset + sdp->swd_drumsize)) {
1008
1009 error = ENOMEM12;
1010 } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
1011 error = EBUSY16;
1012 }
1013
1014 if (error) {
1015 sdp->swd_flagsswd_se.se_flags |= SWF_ENABLE0x00000002;
1016 return (error);
1017 }
1018
1019 /*
1020 * done with the vnode and saved creds.
1021 * drop our ref on the vnode before calling VOP_CLOSE()
1022 * so that spec_close() can tell if this is the last close.
1023 */
1024 if (sdp->swd_vp->v_type == VREG) {
1025 crfree(sdp->swd_cred);
1026 }
1027 vrele(sdp->swd_vp);
1028 if (sdp->swd_vp != rootvp) {
1029 (void) VOP_CLOSE(sdp->swd_vp, FREAD0x0001|FWRITE0x0002, p->p_ucred, p);
1030 }
1031
1032 uvmexp.swpages -= sdp->swd_npages;
1033
1034 if (swaplist_find(sdp->swd_vp, 1) == NULL((void *)0))
1035 panic("swap_off: swapdev not in list");
1036 swaplist_trim();
1037
1038 /*
1039 * free all resources!
1040 */
1041 extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
1042 EX_WAITOK0x0001);
1043 extent_destroy(sdp->swd_ex);
1044 /* free sdp->swd_path ? */
1045 free(sdp, M_VMSWAP92, sizeof(*sdp));
1046 return (0);
1047}
1048
1049/*
1050 * /dev/drum interface and i/o functions
1051 */
1052
1053/*
1054 * swstrategy: perform I/O on the drum
1055 *
1056 * => we must map the i/o request from the drum to the correct swapdev.
1057 */
1058void
1059swstrategy(struct buf *bp)
1060{
1061 struct swapdev *sdp;
1062 int s, pageno, bn;
1063
1064 /*
1065 * convert block number to swapdev. note that swapdev can't
1066 * be yanked out from under us because we are holding resources
1067 * in it (i.e. the blocks we are doing I/O on).
1068 */
1069 pageno = dbtob((u_int64_t)bp->b_blkno)(((u_int64_t)bp->b_blkno) << 9) >> PAGE_SHIFT12;
1070 sdp = swapdrum_getsdp(pageno);
1071 if (sdp == NULL((void *)0)) {
1072 bp->b_error = EINVAL22;
1073 bp->b_flags |= B_ERROR0x00000400;
1074 s = splbio()splraise(0x6);
1075 biodone(bp);
1076 splx(s)spllower(s);
1077 return;
1078 }
1079
1080 /* convert drum page number to block number on this swapdev. */
1081 pageno -= sdp->swd_drumoffset; /* page # on swapdev */
1082 bn = btodb((u_int64_t)pageno << PAGE_SHIFT)(((u_int64_t)pageno << 12) >> 9); /* convert to diskblock */
1083
1084 /*
1085 * for block devices we finish up here.
1086 * for regular files we have to do more work which we delegate
1087 * to sw_reg_strategy().
1088 */
1089 switch (sdp->swd_vp->v_type) {
1090 default:
1091 panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
1092 case VBLK:
1093 /*
1094 * must convert "bp" from an I/O on /dev/drum to an I/O
1095 * on the swapdev (sdp).
1096 */
1097 s = splbio()splraise(0x6);
1098 buf_replacevnode(bp, sdp->swd_vp);
1099
1100 bp->b_blkno = bn;
1101 splx(s)spllower(s);
1102 VOP_STRATEGY(bp->b_vp, bp);
1103 return;
1104 case VREG:
1105 /* delegate to sw_reg_strategy function. */
1106 sw_reg_strategy(sdp, bp, bn);
1107 return;
1108 }
1109 /* NOTREACHED */
1110}
1111
1112/*
1113 * sw_reg_strategy: handle swap i/o to regular files
1114 */
1115void
1116sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
1117{
1118 struct vnode *vp;
1119 struct vndxfer *vnx;
1120 daddr_t nbn;
1121 caddr_t addr;
1122 off_t byteoff;
1123 int s, off, nra, error, sz, resid;
1124
1125 /*
1126 * allocate a vndxfer head for this transfer and point it to
1127 * our buffer.
1128 */
1129 vnx = pool_get(&vndxfer_pool, PR_WAITOK0x0001);
1130 vnx->vx_flags = VX_BUSY1;
1131 vnx->vx_error = 0;
1132 vnx->vx_pending = 0;
1133 vnx->vx_bp = bp;
1134 vnx->vx_sdp = sdp;
1135
1136 /*
1137 * setup for main loop where we read filesystem blocks into
1138 * our buffer.
1139 */
1140 error = 0;
Value stored to 'error' is never read
1141 bp->b_resid = bp->b_bcount; /* nothing transferred yet! */
1142 addr = bp->b_data; /* current position in buffer */
1143 byteoff = dbtob((u_int64_t)bn)(((u_int64_t)bn) << 9);
1144
1145 for (resid = bp->b_resid; resid; resid -= sz) {
1146 struct vndbuf *nbp;
1147 /*
1148 * translate byteoffset into block number. return values:
1149 * vp = vnode of underlying device
1150 * nbn = new block number (on underlying vnode dev)
1151 * nra = num blocks we can read-ahead (excludes requested
1152 * block)
1153 */
1154 nra = 0;
1155 error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
1156 &vp, &nbn, &nra);
1157
1158 if (error == 0 && nbn == -1) {
1159 /*
1160 * this used to just set error, but that doesn't
1161 * do the right thing. Instead, it causes random
1162 * memory errors. The panic() should remain until
1163 * this condition doesn't destabilize the system.
1164 */
1165#if 1
1166 panic("sw_reg_strategy: swap to sparse file");
1167#else
1168 error = EIO5; /* failure */
1169#endif
1170 }
1171
1172 /*
1173 * punt if there was an error or a hole in the file.
1174 * we must wait for any i/o ops we have already started
1175 * to finish before returning.
1176 *
1177 * XXX we could deal with holes here but it would be
1178 * a hassle (in the write case).
1179 */
1180 if (error) {
1181 s = splbio()splraise(0x6);
1182 vnx->vx_error = error; /* pass error up */
1183 goto out;
1184 }
1185
1186 /*
1187 * compute the size ("sz") of this transfer (in bytes).
1188 */
1189 off = byteoff % sdp->swd_bsize;
1190 sz = (1 + nra) * sdp->swd_bsize - off;
1191 if (sz > resid)
1192 sz = resid;
1193
1194 /*
1195 * now get a buf structure. note that the vb_buf is
1196 * at the front of the nbp structure so that you can
1197 * cast pointers between the two structure easily.
1198 */
1199 nbp = pool_get(&vndbuf_pool, PR_WAITOK0x0001);
1200 nbp->vb_buf.b_flags = bp->b_flags | B_CALL0x00000040;
1201 nbp->vb_buf.b_bcount = sz;
1202 nbp->vb_buf.b_bufsize = sz;
1203 nbp->vb_buf.b_error = 0;
1204 nbp->vb_buf.b_data = addr;
1205 nbp->vb_buf.b_bq = NULL((void *)0);
1206 nbp->vb_buf.b_blkno = nbn + btodb(off)((off) >> 9);
1207 nbp->vb_buf.b_proc = bp->b_proc;
1208 nbp->vb_buf.b_iodone = sw_reg_iodone;
1209 nbp->vb_buf.b_vp = NULLVP((struct vnode *)((void *)0));
1210 nbp->vb_buf.b_vnbufs.le_next = NOLIST((struct buf *)0x87654321);
1211 LIST_INIT(&nbp->vb_buf.b_dep)do { ((&nbp->vb_buf.b_dep)->lh_first) = ((void *)0)
; } while (0)
;
1212
1213 /*
1214 * set b_dirtyoff/end and b_validoff/end. this is
1215 * required by the NFS client code (otherwise it will
1216 * just discard our I/O request).
1217 */
1218 if (bp->b_dirtyend == 0) {
1219 nbp->vb_buf.b_dirtyoff = 0;
1220 nbp->vb_buf.b_dirtyend = sz;
1221 } else {
1222 nbp->vb_buf.b_dirtyoff =
1223 max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
1224 nbp->vb_buf.b_dirtyend =
1225 min(sz,
1226 max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
1227 }
1228 if (bp->b_validend == 0) {
1229 nbp->vb_buf.b_validoff = 0;
1230 nbp->vb_buf.b_validend = sz;
1231 } else {
1232 nbp->vb_buf.b_validoff =
1233 max(0, bp->b_validoff - (bp->b_bcount-resid));
1234 nbp->vb_buf.b_validend =
1235 min(sz,
1236 max(0, bp->b_validend - (bp->b_bcount-resid)));
1237 }
1238
1239 /* patch it back to the vnx */
1240 nbp->vb_vnx = vnx;
1241 task_set(&nbp->vb_task, sw_reg_iodone_internal, nbp);
1242
1243 s = splbio()splraise(0x6);
1244 if (vnx->vx_error != 0) {
1245 pool_put(&vndbuf_pool, nbp);
1246 goto out;
1247 }
1248 vnx->vx_pending++;
1249
1250 /* assoc new buffer with underlying vnode */
1251 bgetvp(vp, &nbp->vb_buf);
1252
1253 /* start I/O if we are not over our limit */
1254 bufq_queue(&sdp->swd_bufq, &nbp->vb_buf);
1255 sw_reg_start(sdp);
1256 splx(s)spllower(s);
1257
1258 /*
1259 * advance to the next I/O
1260 */
1261 byteoff += sz;
1262 addr += sz;
1263 }
1264
1265 s = splbio()splraise(0x6);
1266
1267out: /* Arrive here at splbio */
1268 vnx->vx_flags &= ~VX_BUSY1;
1269 if (vnx->vx_pending == 0) {
1270 if (vnx->vx_error != 0) {
1271 bp->b_error = vnx->vx_error;
1272 bp->b_flags |= B_ERROR0x00000400;
1273 }
1274 pool_put(&vndxfer_pool, vnx);
1275 biodone(bp);
1276 }
1277 splx(s)spllower(s);
1278}
1279
1280/* sw_reg_start: start an I/O request on the requested swapdev. */
1281void
1282sw_reg_start(struct swapdev *sdp)
1283{
1284 struct buf *bp;
1285
1286 /* XXX: recursion control */
1287 if ((sdp->swd_flagsswd_se.se_flags & SWF_BUSY0x00000004) != 0)
1288 return;
1289
1290 sdp->swd_flagsswd_se.se_flags |= SWF_BUSY0x00000004;
1291
1292 while (sdp->swd_active < sdp->swd_maxactive) {
1293 bp = bufq_dequeue(&sdp->swd_bufq);
1294 if (bp == NULL((void *)0))
1295 break;
1296
1297 sdp->swd_active++;
1298
1299 if ((bp->b_flags & B_READ0x00008000) == 0)
1300 bp->b_vp->v_numoutput++;
1301
1302 VOP_STRATEGY(bp->b_vp, bp);
1303 }
1304 sdp->swd_flagsswd_se.se_flags &= ~SWF_BUSY0x00000004;
1305}
1306
1307/*
1308 * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
1309 *
1310 * => note that we can recover the vndbuf struct by casting the buf ptr
1311 *
1312 * XXX:
1313 * We only put this onto a taskq here, because of the maxactive game since
1314 * it basically requires us to call back into VOP_STRATEGY() (where we must
1315 * be able to sleep) via sw_reg_start().
1316 */
1317void
1318sw_reg_iodone(struct buf *bp)
1319{
1320 struct vndbuf *vbp = (struct vndbuf *)bp;
1321 task_add(systq, &vbp->vb_task);
1322}
1323
1324void
1325sw_reg_iodone_internal(void *xvbp)
1326{
1327 struct vndbuf *vbp = xvbp;
1328 struct vndxfer *vnx = vbp->vb_vnx;
1329 struct buf *pbp = vnx->vx_bp; /* parent buffer */
1330 struct swapdev *sdp = vnx->vx_sdp;
1331 int resid, s;
1332
1333 s = splbio()splraise(0x6);
1334
1335 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
1336 pbp->b_resid -= resid;
1337 vnx->vx_pending--;
1338
1339 /* pass error upward */
1340 if (vbp->vb_buf.b_error)
1341 vnx->vx_error = vbp->vb_buf.b_error;
1342
1343 /* disassociate this buffer from the vnode (if any). */
1344 if (vbp->vb_buf.b_vp != NULL((void *)0)) {
1345 brelvp(&vbp->vb_buf);
1346 }
1347
1348 /* kill vbp structure */
1349 pool_put(&vndbuf_pool, vbp);
1350
1351 /*
1352 * wrap up this transaction if it has run to completion or, in
1353 * case of an error, when all auxiliary buffers have returned.
1354 */
1355 if (vnx->vx_error != 0) {
1356 /* pass error upward */
1357 pbp->b_flags |= B_ERROR0x00000400;
1358 pbp->b_error = vnx->vx_error;
1359 if ((vnx->vx_flags & VX_BUSY1) == 0 && vnx->vx_pending == 0) {
1360 pool_put(&vndxfer_pool, vnx);
1361 biodone(pbp);
1362 }
1363 } else if (pbp->b_resid == 0) {
1364 KASSERT(vnx->vx_pending == 0)((vnx->vx_pending == 0) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_swap.c", 1364, "vnx->vx_pending == 0"
))
;
1365 if ((vnx->vx_flags & VX_BUSY1) == 0) {
1366 pool_put(&vndxfer_pool, vnx);
1367 biodone(pbp);
1368 }
1369 }
1370
1371 /*
1372 * done! start next swapdev I/O if one is pending
1373 */
1374 sdp->swd_active--;
1375 sw_reg_start(sdp);
1376 splx(s)spllower(s);
1377}
1378
1379
1380/*
1381 * uvm_swap_alloc: allocate space on swap
1382 *
1383 * => allocation is done "round robin" down the priority list, as we
1384 * allocate in a priority we "rotate" the tail queue.
1385 * => space can be freed with uvm_swap_free
1386 * => we return the page slot number in /dev/drum (0 == invalid slot)
1387 * => we lock uvm.swap_data_lock
1388 * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
1389 */
1390int
1391uvm_swap_alloc(int *nslots, boolean_t lessok)
1392{
1393 struct swapdev *sdp;
1394 struct swappri *spp;
1395 u_long result;
1396
1397 /*
1398 * no swap devices configured yet? definite failure.
1399 */
1400 if (uvmexp.nswapdev < 1)
1401 return 0;
1402
1403 /*
1404 * lock data lock, convert slots into blocks, and enter loop
1405 */
1406 KERNEL_ASSERT_LOCKED()((_kernel_lock_held()) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_swap.c"
, 1406, "_kernel_lock_held()"))
;
1407ReTry: /* XXXMRG */
1408 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
1409 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
1410 /* if it's not enabled, then we can't swap from it */
1411 if ((sdp->swd_flagsswd_se.se_flags & SWF_ENABLE0x00000002) == 0)
1412 continue;
1413 if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
1414 continue;
1415 if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0,extent_alloc_subregion((sdp->swd_ex), (sdp->swd_ex)->
ex_start, (sdp->swd_ex)->ex_end, (*nslots), (1), (0), (
0), (0x0010|0x0000), (&result))
1416 EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,extent_alloc_subregion((sdp->swd_ex), (sdp->swd_ex)->
ex_start, (sdp->swd_ex)->ex_end, (*nslots), (1), (0), (
0), (0x0010|0x0000), (&result))
1417 &result)extent_alloc_subregion((sdp->swd_ex), (sdp->swd_ex)->
ex_start, (sdp->swd_ex)->ex_end, (*nslots), (1), (0), (
0), (0x0010|0x0000), (&result))
!= 0) {
1418 continue;
1419 }
1420
1421 /*
1422 * successful allocation! now rotate the tailq.
1423 */
1424 TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next)do { if (((sdp)->swd_next.tqe_next) != ((void *)0)) (sdp)->
swd_next.tqe_next->swd_next.tqe_prev = (sdp)->swd_next.
tqe_prev; else (&spp->spi_swapdev)->tqh_last = (sdp
)->swd_next.tqe_prev; *(sdp)->swd_next.tqe_prev = (sdp)
->swd_next.tqe_next; ((sdp)->swd_next.tqe_prev) = ((void
*)-1); ((sdp)->swd_next.tqe_next) = ((void *)-1); } while
(0)
;
1425 TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next)do { (sdp)->swd_next.tqe_next = ((void *)0); (sdp)->swd_next
.tqe_prev = (&spp->spi_swapdev)->tqh_last; *(&spp
->spi_swapdev)->tqh_last = (sdp); (&spp->spi_swapdev
)->tqh_last = &(sdp)->swd_next.tqe_next; } while (0
)
;
1426 sdp->swd_npginuse += *nslots;
1427 uvmexp.swpginuse += *nslots;
1428 /* done! return drum slot number */
1429 return result + sdp->swd_drumoffset;
1430 }
1431 }
1432
1433 /* XXXMRG: BEGIN HACK */
1434 if (*nslots > 1 && lessok) {
1435 *nslots = 1;
1436 goto ReTry; /* XXXMRG: ugh! extent should support this for us */
1437 }
1438 /* XXXMRG: END HACK */
1439
1440 return 0; /* failed */
1441}
1442
1443/*
1444 * uvm_swapisfull: return true if all of available swap is allocated
1445 * and in use.
1446 */
1447int
1448uvm_swapisfull(void)
1449{
1450 int result;
1451
1452 KERNEL_LOCK()_kernel_lock();
1453 KASSERT(uvmexp.swpgonly <= uvmexp.swpages)((uvmexp.swpgonly <= uvmexp.swpages) ? (void)0 : __assert(
"diagnostic ", "/usr/src/sys/uvm/uvm_swap.c", 1453, "uvmexp.swpgonly <= uvmexp.swpages"
))
;
1454 result = (uvmexp.swpgonly == uvmexp.swpages);
1455 KERNEL_UNLOCK()_kernel_unlock();
1456
1457 return result;
1458}
1459
1460/*
1461 * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
1462 *
1463 * => we lock uvm.swap_data_lock
1464 */
1465void
1466uvm_swap_markbad(int startslot, int nslots)
1467{
1468 struct swapdev *sdp;
1469
1470 KERNEL_LOCK()_kernel_lock();
1471 sdp = swapdrum_getsdp(startslot);
1472 if (sdp != NULL((void *)0)) {
1473 /*
1474 * we just keep track of how many pages have been marked bad
1475 * in this device, to make everything add up in swap_off().
1476 * we assume here that the range of slots will all be within
1477 * one swap device.
1478 */
1479 sdp->swd_npgbad += nslots;
1480 }
1481 KERNEL_UNLOCK()_kernel_unlock();
1482}
1483
1484/*
1485 * uvm_swap_free: free swap slots
1486 *
1487 * => this can be all or part of an allocation made by uvm_swap_alloc
1488 * => we lock uvm.swap_data_lock
1489 */
1490void
1491uvm_swap_free(int startslot, int nslots)
1492{
1493 struct swapdev *sdp;
1494
1495 /*
1496 * ignore attempts to free the "bad" slot.
1497 */
1498
1499 if (startslot == SWSLOT_BAD(-1)) {
1500 return;
1501 }
1502
1503 /*
1504 * convert drum slot offset back to sdp, free the blocks
1505 * in the extent, and return. must hold pri lock to do
1506 * lookup and access the extent.
1507 */
1508 KERNEL_LOCK()_kernel_lock();
1509 sdp = swapdrum_getsdp(startslot);
1510 KASSERT(uvmexp.nswapdev >= 1)((uvmexp.nswapdev >= 1) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_swap.c", 1510, "uvmexp.nswapdev >= 1"
))
;
1511 KASSERT(sdp != NULL)((sdp != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_swap.c"
, 1511, "sdp != NULL"))
;
1512 KASSERT(sdp->swd_npginuse >= nslots)((sdp->swd_npginuse >= nslots) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/uvm/uvm_swap.c", 1512, "sdp->swd_npginuse >= nslots"
))
;
1513 if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
1514 EX_MALLOCOK0x0010|EX_NOWAIT0x0000) != 0) {
1515 printf("warning: resource shortage: %d pages of swap lost\n",
1516 nslots);
1517 }
1518
1519 sdp->swd_npginuse -= nslots;
1520 uvmexp.swpginuse -= nslots;
1521#ifdef UVM_SWAP_ENCRYPT1
1522 {
1523 int i;
1524 if (swap_encrypt_initialized) {
1525 /* Dereference keys */
1526 for (i = 0; i < nslots; i++)
1527 if (uvm_swap_needdecrypt(sdp, startslot + i)) {
1528 struct swap_key *key;
1529
1530 key = SWD_KEY(sdp, startslot + i)&((sdp)->swd_keys[((startslot + i) - (sdp)->swd_drumoffset
) >> 7])
;
1531 if (key->refcount != 0)
1532 SWAP_KEY_PUT(sdp, key)do { (key)->refcount--; if ((key)->refcount == 0) { swap_key_delete
(key); } } while(0);
;
1533 }
1534
1535 /* Mark range as not decrypt */
1536 uvm_swap_markdecrypt(sdp, startslot, nslots, 0);
1537 }
1538 }
1539#endif /* UVM_SWAP_ENCRYPT */
1540 KERNEL_UNLOCK()_kernel_unlock();
1541}
1542
1543/*
1544 * uvm_swap_put: put any number of pages into a contig place on swap
1545 *
1546 * => can be sync or async
1547 */
1548int
1549uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags)
1550{
1551 int result;
1552
1553 result = uvm_swap_io(ppsp, swslot, npages, B_WRITE0x00000000 |
1554 ((flags & PGO_SYNCIO0x002) ? 0 : B_ASYNC0x00000004));
1555
1556 return (result);
1557}
1558
1559/*
1560 * uvm_swap_get: get a single page from swap
1561 *
1562 * => usually a sync op (from fault)
1563 */
1564int
1565uvm_swap_get(struct vm_page *page, int swslot, int flags)
1566{
1567 int result;
1568
1569 uvmexp.nswget++;
1570 KASSERT(flags & PGO_SYNCIO)((flags & 0x002) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_swap.c"
, 1570, "flags & PGO_SYNCIO"))
;
1571 if (swslot == SWSLOT_BAD(-1)) {
1572 return VM_PAGER_ERROR4;
1573 }
1574
1575 KERNEL_LOCK()_kernel_lock();
1576 /* this page is (about to be) no longer only in swap. */
1577 atomic_dec_int(&uvmexp.swpgonly)_atomic_dec_int(&uvmexp.swpgonly);
1578
1579 result = uvm_swap_io(&page, swslot, 1, B_READ0x00008000 |
1580 ((flags & PGO_SYNCIO0x002) ? 0 : B_ASYNC0x00000004));
1581
1582 if (result != VM_PAGER_OK0 && result != VM_PAGER_PEND3) {
1583 /* oops, the read failed so it really is still only in swap. */
1584 atomic_inc_int(&uvmexp.swpgonly)_atomic_inc_int(&uvmexp.swpgonly);
1585 }
1586 KERNEL_UNLOCK()_kernel_unlock();
1587 return (result);
1588}
1589
1590/*
1591 * uvm_swap_io: do an i/o operation to swap
1592 */
1593
1594int
1595uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
1596{
1597 daddr_t startblk;
1598 struct buf *bp;
1599 vaddr_t kva;
1600 int result, s, mapinflags, pflag, bounce = 0, i;
1601 boolean_t write, async;
1602 vaddr_t bouncekva;
1603 struct vm_page *tpps[MAXBSIZE(64 * 1024) >> PAGE_SHIFT12];
1604#ifdef UVM_SWAP_ENCRYPT1
1605 struct swapdev *sdp;
1606 int encrypt = 0;
1607#endif
1608
1609 KERNEL_ASSERT_LOCKED()((_kernel_lock_held()) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/uvm/uvm_swap.c"
, 1609, "_kernel_lock_held()"))
;
1610
1611 write = (flags & B_READ0x00008000) == 0;
1612 async = (flags & B_ASYNC0x00000004) != 0;
1613
1614 /* convert starting drum slot to block number */
1615 startblk = btodb((u_int64_t)startslot << PAGE_SHIFT)(((u_int64_t)startslot << 12) >> 9);
1616
1617 /*
1618 * first, map the pages into the kernel (XXX: currently required
1619 * by buffer system).
1620 */
1621 mapinflags = !write ? UVMPAGER_MAPIN_READ0x02 : UVMPAGER_MAPIN_WRITE0x00;
1622 if (!async)
1623 mapinflags |= UVMPAGER_MAPIN_WAITOK0x01;
1624 kva = uvm_pagermapin(pps, npages, mapinflags);
1625 if (kva == 0)
1626 return (VM_PAGER_AGAIN5);
1627
1628#ifdef UVM_SWAP_ENCRYPT1
1629 if (write) {
1630 /*
1631 * Check if we need to do swap encryption on old pages.
1632 * Later we need a different scheme, that swap encrypts
1633 * all pages of a process that had at least one page swap
1634 * encrypted. Then we might not need to copy all pages
1635 * in the cluster, and avoid the memory overheard in
1636 * swapping.
1637 */
1638 if (uvm_doswapencrypt)
1639 encrypt = 1;
1640 }
1641
1642 if (swap_encrypt_initialized || encrypt) {
1643 /*
1644 * we need to know the swap device that we are swapping to/from
1645 * to see if the pages need to be marked for decryption or
1646 * actually need to be decrypted.
1647 * XXX - does this information stay the same over the whole
1648 * execution of this function?
1649 */
1650 sdp = swapdrum_getsdp(startslot);
1651 }
1652
1653 /*
1654 * Check that we are dma capable for read (write always bounces
1655 * through the swapencrypt anyway...
1656 */
1657 if (write && encrypt) {
1658 bounce = 1; /* bounce through swapencrypt always */
1659 } else {
1660#else
1661 {
1662#endif
1663
1664 for (i = 0; i < npages; i++) {
1665 if (VM_PAGE_TO_PHYS(pps[i])((pps[i])->phys_addr) < dma_constraint.ucr_low ||
1666 VM_PAGE_TO_PHYS(pps[i])((pps[i])->phys_addr) > dma_constraint.ucr_high) {
1667 bounce = 1;
1668 break;
1669 }
1670 }
1671 }
1672
1673 if (bounce) {
1674 int swmapflags;
1675
1676 /* We always need write access. */
1677 swmapflags = UVMPAGER_MAPIN_READ0x02;
1678 if (!async)
1679 swmapflags |= UVMPAGER_MAPIN_WAITOK0x01;
1680
1681 if (!uvm_swap_allocpages(tpps, npages)) {
1682 uvm_pagermapout(kva, npages);
1683 return (VM_PAGER_AGAIN5);
1684 }
1685
1686 bouncekva = uvm_pagermapin(tpps, npages, swmapflags);
1687 if (bouncekva == 0) {
1688 uvm_pagermapout(kva, npages);
1689 uvm_swap_freepages(tpps, npages);
1690 return (VM_PAGER_AGAIN5);
1691 }
1692 }
1693
1694 /* encrypt to swap */
1695 if (write && bounce) {
1696 int i, opages;
1697 caddr_t src, dst;
1698 u_int64_t block;
1699
1700 src = (caddr_t) kva;
1701 dst = (caddr_t) bouncekva;
1702 block = startblk;
1703 for (i = 0; i < npages; i++) {
1704#ifdef UVM_SWAP_ENCRYPT1
1705 struct swap_key *key;
1706
1707 if (encrypt) {
1708 key = SWD_KEY(sdp, startslot + i)&((sdp)->swd_keys[((startslot + i) - (sdp)->swd_drumoffset
) >> 7])
;
1709 SWAP_KEY_GET(sdp, key)do { if ((key)->refcount == 0) { swap_key_create(key); } (
key)->refcount++; } while(0);
; /* add reference */
1710
1711 swap_encrypt(key, src, dst, block, PAGE_SIZE(1 << 12));
1712 block += btodb(PAGE_SIZE)(((1 << 12)) >> 9);
1713 } else {
1714#else
1715 {
1716#endif /* UVM_SWAP_ENCRYPT */
1717 memcpy(dst, src, PAGE_SIZE)__builtin_memcpy((dst), (src), ((1 << 12)));
1718 }
1719 /* this just tells async callbacks to free */
1720 atomic_setbits_intx86_atomic_setbits_u32(&tpps[i]->pg_flags, PQ_ENCRYPT0x00400000);
1721 src += PAGE_SIZE(1 << 12);
1722 dst += PAGE_SIZE(1 << 12);
1723 }
1724
1725 uvm_pagermapout(kva, npages);
1726
1727 /* dispose of pages we dont use anymore */
1728 opages = npages;
1729 uvm_pager_dropcluster(NULL((void *)0), NULL((void *)0), pps, &opages,
1730 PGO_PDFREECLUST0x080);
1731
1732 kva = bouncekva;
1733 }
1734
1735 /*
1736 * now allocate a buf for the i/o.
1737 * [make sure we don't put the pagedaemon to sleep...]
1738 */
1739 pflag = (async || curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
== uvm.pagedaemon_proc) ? PR_NOWAIT0x0002 :
1740 PR_WAITOK0x0001;
1741 bp = pool_get(&bufpool, pflag | PR_ZERO0x0008);
1742
1743 /*
1744 * if we failed to get a swapbuf, return "try again"
1745 */
1746 if (bp == NULL((void *)0)) {
1747 if (write && bounce) {
1748#ifdef UVM_SWAP_ENCRYPT1
1749 int i;
1750
1751 /* swap encrypt needs cleanup */
1752 if (encrypt)
1753 for (i = 0; i < npages; i++)
1754 SWAP_KEY_PUT(sdp, SWD_KEY(sdp,do { (&((sdp)->swd_keys[((startslot + i) - (sdp)->swd_drumoffset
) >> 7]))->refcount--; if ((&((sdp)->swd_keys
[((startslot + i) - (sdp)->swd_drumoffset) >> 7]))->
refcount == 0) { swap_key_delete(&((sdp)->swd_keys[((startslot
+ i) - (sdp)->swd_drumoffset) >> 7])); } } while(0)
;
1755 startslot + i))do { (&((sdp)->swd_keys[((startslot + i) - (sdp)->swd_drumoffset
) >> 7]))->refcount--; if ((&((sdp)->swd_keys
[((startslot + i) - (sdp)->swd_drumoffset) >> 7]))->
refcount == 0) { swap_key_delete(&((sdp)->swd_keys[((startslot
+ i) - (sdp)->swd_drumoffset) >> 7])); } } while(0)
;
;
1756#endif
1757
1758 uvm_pagermapout(kva, npages);
1759 uvm_swap_freepages(tpps, npages);
1760 }
1761 return (VM_PAGER_AGAIN5);
1762 }
1763
1764 /*
1765 * prevent ASYNC reads.
1766 * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
1767 * assumes that all gets are SYNCIO. Just make sure here.
1768 * XXXARTUBC - might not be true anymore.
1769 */
1770 if (!write) {
1771 flags &= ~B_ASYNC0x00000004;
1772 async = 0;
1773 }
1774
1775 /*
1776 * fill in the bp. we currently route our i/o through
1777 * /dev/drum's vnode [swapdev_vp].
1778 */
1779 bp->b_flags = B_BUSY0x00000010 | B_NOCACHE0x00001000 | B_RAW0x00004000 | (flags & (B_READ0x00008000|B_ASYNC0x00000004));
1780 bp->b_proc = &proc0; /* XXX */
1781 bp->b_vnbufs.le_next = NOLIST((struct buf *)0x87654321);
1782 if (bounce)
1783 bp->b_data = (caddr_t)bouncekva;
1784 else
1785 bp->b_data = (caddr_t)kva;
1786 bp->b_bq = NULL((void *)0);
1787 bp->b_blkno = startblk;
1788 LIST_INIT(&bp->b_dep)do { ((&bp->b_dep)->lh_first) = ((void *)0); } while
(0)
;
1789 s = splbio()splraise(0x6);
1790 bp->b_vp = NULL((void *)0);
1791 buf_replacevnode(bp, swapdev_vp);
1792 splx(s)spllower(s);
1793 bp->b_bufsize = bp->b_bcount = (long)npages << PAGE_SHIFT12;
1794
1795 /*
1796 * for pageouts we must set "dirtyoff" [NFS client code needs it].
1797 * and we bump v_numoutput (counter of number of active outputs).
1798 */
1799 if (write) {
1800 bp->b_dirtyoff = 0;
1801 bp->b_dirtyend = npages << PAGE_SHIFT12;
1802#ifdef UVM_SWAP_ENCRYPT1
1803 /* mark the pages in the drum for decryption */
1804 if (swap_encrypt_initialized)
1805 uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
1806#endif
1807 s = splbio()splraise(0x6);
1808 swapdev_vp->v_numoutput++;
1809 splx(s)spllower(s);
1810 }
1811
1812 /* for async ops we must set up the iodone handler. */
1813 if (async) {
1814 bp->b_flags |= B_CALL0x00000040 | (curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
== uvm.pagedaemon_proc ?
1815 B_PDAEMON0x00200000 : 0);
1816 bp->b_iodone = uvm_aio_biodone;
1817 }
1818
1819 /* now we start the I/O, and if async, return. */
1820 VOP_STRATEGY(bp->b_vp, bp);
1821 if (async)
1822 return (VM_PAGER_PEND3);
1823
1824 /* must be sync i/o. wait for it to finish */
1825 (void) biowait(bp);
1826 result = (bp->b_flags & B_ERROR0x00000400) ? VM_PAGER_ERROR4 : VM_PAGER_OK0;
1827
1828 /* decrypt swap */
1829 if (!write && !(bp->b_flags & B_ERROR0x00000400)) {
1830 int i;
1831 caddr_t data = (caddr_t)kva;
1832 caddr_t dst = (caddr_t)kva;
1833 u_int64_t block = startblk;
1834
1835 if (bounce)
1836 data = (caddr_t)bouncekva;
1837
1838 for (i = 0; i < npages; i++) {
1839#ifdef UVM_SWAP_ENCRYPT1
1840 struct swap_key *key;
1841
1842 /* Check if we need to decrypt */
1843 if (swap_encrypt_initialized &&
1844 uvm_swap_needdecrypt(sdp, startslot + i)) {
1845 key = SWD_KEY(sdp, startslot + i)&((sdp)->swd_keys[((startslot + i) - (sdp)->swd_drumoffset
) >> 7])
;
1846 if (key->refcount == 0) {
1847 result = VM_PAGER_ERROR4;
1848 break;
1849 }
1850 swap_decrypt(key, data, dst, block, PAGE_SIZE(1 << 12));
1851 } else if (bounce) {
1852#else
1853 if (bounce) {
1854#endif
1855 memcpy(dst, data, PAGE_SIZE)__builtin_memcpy((dst), (data), ((1 << 12)));
1856 }
1857 data += PAGE_SIZE(1 << 12);
1858 dst += PAGE_SIZE(1 << 12);
1859 block += btodb(PAGE_SIZE)(((1 << 12)) >> 9);
1860 }
1861 if (bounce)
1862 uvm_pagermapout(bouncekva, npages);
1863 }
1864 /* kill the pager mapping */
1865 uvm_pagermapout(kva, npages);
1866
1867 /* Not anymore needed, free after encryption/bouncing */
1868 if (!write && bounce)
1869 uvm_swap_freepages(tpps, npages);
1870
1871 /* now dispose of the buf */
1872 s = splbio()splraise(0x6);
1873 if (bp->b_vp)
1874 brelvp(bp);
1875
1876 if (write && bp->b_vp)
1877 vwakeup(bp->b_vp);
1878 pool_put(&bufpool, bp);
1879 splx(s)spllower(s);
1880
1881 /* finally return. */
1882 return (result);
1883}
1884
1885void
1886swapmount(void)
1887{
1888 struct swapdev *sdp;
1889 struct swappri *spp;
1890 struct vnode *vp;
1891 dev_t swap_dev = swdevt[0].sw_dev;
1892 char *nam;
1893 char path[MNAMELEN90 + 1];
1894
1895 /*
1896 * No locking here since we happen to know that we will just be called
1897 * once before any other process has forked.
1898 */
1899 if (swap_dev == NODEV(dev_t)(-1))
1900 return;
1901
1902#if defined(NFSCLIENT1)
1903 if (swap_dev == NETDEV(dev_t)(-2)) {
1904 extern struct nfs_diskless nfs_diskless;
1905
1906 snprintf(path, sizeof(path), "%s",
1907 nfs_diskless.nd_swap.ndm_host);
1908 vp = nfs_diskless.sw_vp;
1909 goto gotit;
1910 } else
1911#endif
1912 if (bdevvp(swap_dev, &vp))
1913 return;
1914
1915 /* Construct a potential path to swap */
1916 if ((nam = findblkname(major(swap_dev)(((unsigned)(swap_dev) >> 8) & 0xff))))
1917 snprintf(path, sizeof(path), "/dev/%s%d%c", nam,
1918 DISKUNIT(swap_dev)(((unsigned)((swap_dev) & 0xff) | (((swap_dev) & 0xffff0000
) >> 8)) / 16)
, 'a' + DISKPART(swap_dev)(((unsigned)((swap_dev) & 0xff) | (((swap_dev) & 0xffff0000
) >> 8)) % 16)
);
1919 else
1920 snprintf(path, sizeof(path), "blkdev0x%x",
1921 swap_dev);
1922
1923#if defined(NFSCLIENT1)
1924gotit:
1925#endif
1926 sdp = malloc(sizeof(*sdp), M_VMSWAP92, M_WAITOK0x0001|M_ZERO0x0008);
1927 spp = malloc(sizeof(*spp), M_VMSWAP92, M_WAITOK0x0001);
1928
1929 sdp->swd_flagsswd_se.se_flags = SWF_FAKE0x00000008;
1930 sdp->swd_devswd_se.se_dev = swap_dev;
1931
1932 sdp->swd_pathlen = strlen(path) + 1;
1933 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP92, M_WAITOK0x0001 | M_ZERO0x0008);
1934 strlcpy(sdp->swd_path, path, sdp->swd_pathlen);
1935
1936 sdp->swd_vp = vp;
1937
1938 swaplist_insert(sdp, spp, 0);
1939
1940 if (swap_on(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
, sdp)) {
1941 swaplist_find(vp, 1);
1942 swaplist_trim();
1943 vput(sdp->swd_vp);
1944 free(sdp->swd_path, M_VMSWAP92, sdp->swd_pathlen);
1945 free(sdp, M_VMSWAP92, sizeof(*sdp));
1946 return;
1947 }
1948}
1949
1950#ifdef HIBERNATE1
1951int
1952uvm_hibswap(dev_t dev, u_long *sp, u_long *ep)
1953{
1954 struct swapdev *sdp, *swd = NULL((void *)0);
1955 struct swappri *spp;
1956 struct extent_region *exr, *exrn;
1957 u_long start = 0, end = 0, size = 0;
1958
1959 /* no swap devices configured yet? */
1960 if (uvmexp.nswapdev < 1 || dev != swdevt[0].sw_dev)
1961 return (1);
1962
1963 LIST_FOREACH(spp, &swap_priority, spi_swappri)for((spp) = ((&swap_priority)->lh_first); (spp)!= ((void
*)0); (spp) = ((spp)->spi_swappri.le_next))
{
1964 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)for((sdp) = ((&spp->spi_swapdev)->tqh_first); (sdp)
!= ((void *)0); (sdp) = ((sdp)->swd_next.tqe_next))
{
1965 if (sdp->swd_devswd_se.se_dev == dev)
1966 swd = sdp;
1967 }
1968 }
1969
1970 if (swd == NULL((void *)0) || (swd->swd_flagsswd_se.se_flags & SWF_ENABLE0x00000002) == 0)
1971 return (1);
1972
1973 LIST_FOREACH(exr, &swd->swd_ex->ex_regions, er_link)for((exr) = ((&swd->swd_ex->ex_regions)->lh_first
); (exr)!= ((void *)0); (exr) = ((exr)->er_link.le_next))
{
1974 u_long gapstart, gapend, gapsize;
1975
1976 gapstart = exr->er_end + 1;
1977 exrn = LIST_NEXT(exr, er_link)((exr)->er_link.le_next);
1978 if (!exrn)
1979 break;
1980 gapend = exrn->er_start - 1;
1981 gapsize = gapend - gapstart;
1982 if (gapsize > size) {
1983 start = gapstart;
1984 end = gapend;
1985 size = gapsize;
1986 }
1987 }
1988
1989 if (size) {
1990 *sp = start;
1991 *ep = end;
1992 return (0);
1993 }
1994 return (1);
1995}
1996#endif /* HIBERNATE */