Bug Summary

File:arch/amd64/amd64/pmap.c
Warning:line 2056, column 3
Value stored to 'level' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name pmap.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/arch/amd64/amd64/pmap.c
1/* $OpenBSD: pmap.c,v 1.148 2021/09/14 16:14:50 kettenis Exp $ */
2/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright 2001 (c) Wasabi Systems, Inc.
31 * All rights reserved.
32 *
33 * Written by Frank van der Linden for Wasabi Systems, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed for the NetBSD Project by
46 * Wasabi Systems, Inc.
47 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
48 * or promote products derived from this software without specific prior
49 * written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
53 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
54 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
55 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
56 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
57 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
58 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
59 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
61 * POSSIBILITY OF SUCH DAMAGE.
62 */
63
64/*
65 * This is the i386 pmap modified and generalized to support x86-64
66 * as well. The idea is to hide the upper N levels of the page tables
67 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest
68 * is mostly untouched, except that it uses some more generalized
69 * macros and interfaces.
70 *
71 * This pmap has been tested on the i386 as well, and it can be easily
72 * adapted to PAE.
73 *
74 * fvdl@wasabisystems.com 18-Jun-2001
75 */
76
77/*
78 * pmap.c: i386 pmap module rewrite
79 * Chuck Cranor <chuck@ccrc.wustl.edu>
80 * 11-Aug-97
81 *
82 * history of this pmap module: in addition to my own input, i used
83 * the following references for this rewrite of the i386 pmap:
84 *
85 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
86 * BSD hp300 pmap done by Mike Hibler at University of Utah.
87 * it was then ported to the i386 by William Jolitz of UUNET
88 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
89 * project fixed some bugs and provided some speed ups.
90 *
91 * [2] the FreeBSD i386 pmap. this pmap seems to be the
92 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
93 * and David Greenman.
94 *
95 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
96 * between several processors. the VAX version was done by
97 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
98 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
99 * David Golub, and Richard Draves. the alpha version was
100 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
101 * (NetBSD/alpha).
102 */
103
104#include <sys/param.h>
105#include <sys/systm.h>
106#include <sys/atomic.h>
107#include <sys/proc.h>
108#include <sys/pool.h>
109#include <sys/user.h>
110#include <sys/mutex.h>
111
112#include <uvm/uvm.h>
113
114#include <machine/cpu.h>
115#ifdef MULTIPROCESSOR1
116#include <machine/i82489reg.h>
117#include <machine/i82489var.h>
118#endif
119
120#include "vmm.h"
121
122#if NVMM1 > 0
123#include <machine/vmmvar.h>
124#endif /* NVMM > 0 */
125
126#include "acpi.h"
127
128/* #define PMAP_DEBUG */
129
130#ifdef PMAP_DEBUG
131#define DPRINTF(x...) do { printf(x); } while(0)
132#else
133#define DPRINTF(x...)
134#endif /* PMAP_DEBUG */
135
136
137/*
138 * general info:
139 *
140 * - for an explanation of how the i386 MMU hardware works see
141 * the comments in <machine/pte.h>.
142 *
143 * - for an explanation of the general memory structure used by
144 * this pmap (including the recursive mapping), see the comments
145 * in <machine/pmap.h>.
146 *
147 * this file contains the code for the "pmap module." the module's
148 * job is to manage the hardware's virtual to physical address mappings.
149 * note that there are two levels of mapping in the VM system:
150 *
151 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
152 * to map ranges of virtual address space to objects/files. for
153 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
154 * to the file /bin/ls starting at offset zero." note that
155 * the upper layer mapping is not concerned with how individual
156 * vm_pages are mapped.
157 *
158 * [2] the lower layer of the VM system (the pmap) maintains the mappings
159 * from virtual addresses. it is concerned with which vm_page is
160 * mapped where. for example, when you run /bin/ls and start
161 * at page 0x1000 the fault routine may lookup the correct page
162 * of the /bin/ls file and then ask the pmap layer to establish
163 * a mapping for it.
164 *
165 * note that information in the lower layer of the VM system can be
166 * thrown away since it can easily be reconstructed from the info
167 * in the upper layer.
168 *
169 * data structures we use include:
170 * - struct pmap: describes the address space of one process
171 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
172 * - struct pg_to_free: a list of virtual addresses whose mappings
173 * have been changed. used for TLB flushing.
174 */
175
176/*
177 * memory allocation
178 *
179 * - there are three data structures that we must dynamically allocate:
180 *
181 * [A] new process' page directory page (PDP)
182 * - plan 1: done at pmap_create() we use
183 * pool_get(&pmap_pmap_pool, PR_WAITOK) to do this allocation.
184 *
185 * if we are low in free physical memory then we sleep in
186 * pool_get() -- in this case this is ok since we are creating
187 * a new pmap and should not be holding any locks.
188 *
189 * XXX: the fork code currently has no way to return an "out of
190 * memory, try again" error code since uvm_fork [fka vm_fork]
191 * is a void function.
192 *
193 * [B] new page tables pages (PTP)
194 * call uvm_pagealloc()
195 * => success: zero page, add to pm_pdir
196 * => failure: we are out of free vm_pages, let pmap_enter()
197 * tell UVM about it.
198 *
199 * note: for kernel PTPs, we start with NKPTP of them. as we map
200 * kernel memory (at uvm_map time) we check to see if we've grown
201 * the kernel pmap. if so, we call the optional function
202 * pmap_growkernel() to grow the kernel PTPs in advance.
203 *
204 * [C] pv_entry structures
205 * - try to allocate one from the pool.
206 * If we fail, we simply let pmap_enter() tell UVM about it.
207 */
208
209long nkptp[] = NKPTP_INITIALIZER{ 0, 0, 0, 0 };
210
211const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER{ (((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL
)|0x00000000001ff000UL), ((0x0000ff8000000000UL|0x0000007fc0000000UL
)|0x000000003fe00000UL), (0x0000ff8000000000UL|0x0000007fc0000000UL
), 0x0000ff8000000000UL }
;
212const int ptp_shifts[] = PTP_SHIFT_INITIALIZER{ 12, 21, 30, 39 };
213const long nkptpmax[] = NKPTPMAX_INITIALIZER{ (unsigned long)((unsigned long)((unsigned long)((unsigned long
)1 * 512) * 512) * 512), (unsigned long)((unsigned long)((unsigned
long)1 * 512) * 512), (unsigned long)((unsigned long)1 * 512
), (unsigned long)1 }
;
214const long nbpd[] = NBPD_INITIALIZER{ (1ULL << 12), (1ULL << 21), (1ULL << 30),
(1ULL << 39) }
;
215pd_entry_t *const normal_pdes[] = PDES_INITIALIZER{ ((pd_entry_t *)((char *)((pt_entry_t *) (255 * (1ULL <<
39))) + 255 * (1ULL << 30))), ((pd_entry_t *)((char *)
((pd_entry_t *)((char *)((pt_entry_t *) (255 * (1ULL <<
39))) + 255 * (1ULL << 30))) + 255 * (1ULL << 21
))), ((pd_entry_t *)((char *)((pd_entry_t *)((char *)((pd_entry_t
*)((char *)((pt_entry_t *) (255 * (1ULL << 39))) + 255
* (1ULL << 30))) + 255 * (1ULL << 21))) + 255 * (
1ULL << 12))) }
;
216
217#define pmap_pte_set(p, n)_atomic_swap_64((p), (n)) atomic_swap_64(p, n)_atomic_swap_64((p), (n))
218#define pmap_pte_clearbits(p, b)x86_atomic_clearbits_u64(p, b) x86_atomic_clearbits_u64(p, b)
219#define pmap_pte_setbits(p, b)x86_atomic_setbits_u64(p, b) x86_atomic_setbits_u64(p, b)
220
221/*
222 * global data structures
223 */
224
225struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
226
227/*
228 * pmap_pg_wc: if our processor supports PAT then we set this
229 * to be the pte bits for Write Combining. Else we fall back to
230 * UC- so mtrrs can override the cacheability;
231 */
232int pmap_pg_wc = PG_UCMINUS(0x0000000000000010UL);
233
234/*
235 * pmap_use_pcid: nonzero if PCID use is enabled (currently we require INVPCID)
236 *
237 * The next three are zero unless and until PCID support is enabled so code
238 * can just 'or' them in as needed without tests.
239 * cr3_pcid: CR3_REUSE_PCID
240 * cr3_pcid_proc and cr3_pcid_temp: PCID_PROC and PCID_TEMP
241 */
242#if PCID_KERN0 != 0
243# error "pmap.c assumes PCID_KERN is zero"
244#endif
245int pmap_use_pcid;
246static u_int cr3_pcid_proc;
247static u_int cr3_pcid_temp;
248/* these two are accessed from locore.o */
249paddr_t cr3_reuse_pcid;
250paddr_t cr3_pcid_proc_intel;
251
252/*
253 * other data structures
254 */
255
256pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
257int pmap_initialized = 0; /* pmap_init done yet? */
258
259/*
260 * pv management structures.
261 */
262struct pool pmap_pv_pool;
263
264/*
265 * linked list of all non-kernel pmaps
266 */
267
268struct pmap_head pmaps;
269struct mutex pmaps_lock = MUTEX_INITIALIZER(IPL_VM){ ((void *)0), ((((0xa)) > 0x0 && ((0xa)) < 0x9
) ? 0x9 : ((0xa))), 0x0 }
;
270
271/*
272 * pool that pmap structures are allocated from
273 */
274
275struct pool pmap_pmap_pool;
276
277/*
278 * When we're freeing a ptp, we need to delay the freeing until all
279 * tlb shootdown has been done. This is the list of the to-be-freed pages.
280 */
281TAILQ_HEAD(pg_to_free, vm_page)struct pg_to_free { struct vm_page *tqh_first; struct vm_page
**tqh_last; }
;
282
283/*
284 * pool that PDPs are allocated from
285 */
286
287struct pool pmap_pdp_pool;
288void pmap_pdp_ctor(pd_entry_t *);
289void pmap_pdp_ctor_intel(pd_entry_t *);
290
291extern vaddr_t msgbuf_vaddr;
292extern paddr_t msgbuf_paddr;
293
294extern vaddr_t idt_vaddr; /* we allocate IDT early */
295extern paddr_t idt_paddr;
296
297extern vaddr_t lo32_vaddr;
298extern vaddr_t lo32_paddr;
299
300vaddr_t virtual_avail;
301extern int end;
302
303/*
304 * local prototypes
305 */
306
307void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *,
308 vaddr_t, struct vm_page *);
309struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t);
310struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
311int pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs);
312void pmap_free_ptp(struct pmap *, struct vm_page *,
313 vaddr_t, struct pg_to_free *);
314void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *);
315#ifdef MULTIPROCESSOR1
316static int pmap_is_active(struct pmap *, struct cpu_info *);
317#endif
318paddr_t pmap_map_ptes(struct pmap *);
319struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
320void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
321void pmap_remove_ept(struct pmap *, vaddr_t, vaddr_t);
322void pmap_do_remove_ept(struct pmap *, vaddr_t);
323int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
324int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
325 vaddr_t, int, struct pv_entry **);
326void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
327 vaddr_t, vaddr_t, int, struct pv_entry **);
328#define PMAP_REMOVE_ALL0 0 /* remove all mappings */
329#define PMAP_REMOVE_SKIPWIRED1 1 /* skip wired mappings */
330
331void pmap_unmap_ptes(struct pmap *, paddr_t);
332int pmap_get_physpage(vaddr_t, int, paddr_t *);
333int pmap_pdes_valid(vaddr_t, pd_entry_t *);
334void pmap_alloc_level(vaddr_t, int, long *);
335
336static inline
337void pmap_sync_flags_pte(struct vm_page *, u_long);
338
339void pmap_tlb_shootpage(struct pmap *, vaddr_t, int);
340void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t, int);
341void pmap_tlb_shoottlb(struct pmap *, int);
342#ifdef MULTIPROCESSOR1
343void pmap_tlb_shootwait(void);
344#else
345#define pmap_tlb_shootwait() do { } while (0)
346#endif
347
348/*
349 * p m a p i n l i n e h e l p e r f u n c t i o n s
350 */
351
352/*
353 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
354 * of course the kernel is always loaded
355 */
356
357static inline int
358pmap_is_curpmap(struct pmap *pmap)
359{
360 return((pmap == pmap_kernel()(&kernel_pmap_store)) ||
361 (pmap->pm_pdirpa == (rcr3() & CR3_PADDR0x7ffffffffffff000ULL)));
362}
363
364/*
365 * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
366 */
367
368#ifdef MULTIPROCESSOR1
369static inline int
370pmap_is_active(struct pmap *pmap, struct cpu_info *ci)
371{
372 return pmap == pmap_kernel()(&kernel_pmap_store) || pmap == ci->ci_proc_pmap;
373}
374#endif
375
376static inline u_int
377pmap_pte2flags(u_long pte)
378{
379 return (((pte & PG_U0x0000000000000020UL) ? PG_PMAP_REF0x02000000 : 0) |
380 ((pte & PG_M0x0000000000000040UL) ? PG_PMAP_MOD0x01000000 : 0));
381}
382
383static inline void
384pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
385{
386 if (pte & (PG_U0x0000000000000020UL|PG_M0x0000000000000040UL)) {
387 atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, pmap_pte2flags(pte));
388 }
389}
390
391/*
392 * pmap_map_ptes: map a pmap's PTEs into KVM
393 *
394 * This should not be done for EPT pmaps
395 */
396paddr_t
397pmap_map_ptes(struct pmap *pmap)
398{
399 paddr_t cr3;
400
401 KASSERT(pmap->pm_type != PMAP_TYPE_EPT)((pmap->pm_type != 2) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c"
, 401, "pmap->pm_type != PMAP_TYPE_EPT"))
;
402
403 /* the kernel's pmap is always accessible */
404 if (pmap == pmap_kernel()(&kernel_pmap_store))
405 return 0;
406
407 /*
408 * Lock the target map before switching to its page tables to
409 * guarantee other CPUs have finished changing the tables before
410 * we potentially start caching table and TLB entries.
411 */
412 mtx_enter(&pmap->pm_mtx);
413
414 cr3 = rcr3();
415 KASSERT((cr3 & CR3_PCID) == PCID_KERN ||(((cr3 & 0xfffULL) == 0 || (cr3 & 0xfffULL) == 1) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c"
, 416, "(cr3 & CR3_PCID) == PCID_KERN || (cr3 & CR3_PCID) == PCID_PROC"
))
416 (cr3 & CR3_PCID) == PCID_PROC)(((cr3 & 0xfffULL) == 0 || (cr3 & 0xfffULL) == 1) ? (
void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c"
, 416, "(cr3 & CR3_PCID) == PCID_KERN || (cr3 & CR3_PCID) == PCID_PROC"
))
;
417 if (pmap->pm_pdirpa == (cr3 & CR3_PADDR0x7ffffffffffff000ULL))
418 cr3 = 0;
419 else {
420 cr3 |= cr3_reuse_pcid;
421 lcr3(pmap->pm_pdirpa | cr3_pcid_temp);
422 }
423
424 return cr3;
425}
426
427void
428pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3)
429{
430 if (pmap != pmap_kernel()(&kernel_pmap_store))
431 mtx_leave(&pmap->pm_mtx);
432
433 if (save_cr3 != 0)
434 lcr3(save_cr3);
435}
436
437int
438pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs)
439{
440 u_long mask, shift;
441 pd_entry_t pde;
442 paddr_t pdpa;
443 int lev;
444
445 pdpa = pm->pm_pdirpa;
446 shift = L4_SHIFT39;
447 mask = L4_MASK0x0000ff8000000000UL;
448 for (lev = PTP_LEVELS4; lev > 0; lev--) {
449 *pd = (pd_entry_t *)PMAP_DIRECT_MAP(pdpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdpa))
;
450 *offs = (VA_SIGN_POS(va)((va) & ~0xffff000000000000) & mask) >> shift;
451 pde = (*pd)[*offs];
452
453 /* Large pages are different, break early if we run into one. */
454 if ((pde & (PG_PS0x0000000000000080UL|PG_V0x0000000000000001UL)) != PG_V0x0000000000000001UL)
455 return (lev - 1);
456
457 pdpa = ((*pd)[*offs] & PG_FRAME0x000ffffffffff000UL);
458 /* 4096/8 == 512 == 2^9 entries per level */
459 shift -= 9;
460 mask >>= 9;
461 }
462
463 return (0);
464}
465
466/*
467 * p m a p k e n t e r f u n c t i o n s
468 *
469 * functions to quickly enter/remove pages from the kernel address
470 * space. pmap_kremove is exported to MI kernel. we make use of
471 * the recursive PTE mappings.
472 */
473
474/*
475 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
476 *
477 * => no need to lock anything, assume va is already allocated
478 * => should be faster than normal pmap enter function
479 */
480
481void
482pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
483{
484 pt_entry_t *pte, opte, npte;
485
486 pte = kvtopte(va);
487
488 npte = (pa & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1))) | ((prot & PROT_WRITE0x02) ? PG_RW0x0000000000000002UL : PG_RO0x0000000000000000UL) |
489 ((pa & PMAP_NOCACHE0x1) ? PG_N0x0000000000000010UL : 0) |
490 ((pa & PMAP_WC0x2) ? pmap_pg_wc : 0) | PG_V0x0000000000000001UL;
491
492 /* special 1:1 mappings in the first 2MB must not be global */
493 if (va >= (vaddr_t)NBPD_L2(1ULL << 21))
494 npte |= pg_g_kern;
495
496 if (!(prot & PROT_EXEC0x04))
497 npte |= pg_nx;
498 opte = pmap_pte_set(pte, npte)_atomic_swap_64((pte), (npte));
499#ifdef LARGEPAGES
500 /* XXX For now... */
501 if (opte & PG_PS0x0000000000000080UL)
502 panic("%s: PG_PS", __func__);
503#endif
504 if (pmap_valid_entry(opte)((opte) & 0x0000000000000001UL)) {
505 if (pa & PMAP_NOCACHE0x1 && (opte & PG_N0x0000000000000010UL) == 0)
506 wbinvd_on_all_cpus();
507 /* This shouldn't happen */
508 pmap_tlb_shootpage(pmap_kernel()(&kernel_pmap_store), va, 1);
509 pmap_tlb_shootwait();
510 }
511}
512
513/*
514 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
515 *
516 * => no need to lock anything
517 * => caller must dispose of any vm_page mapped in the va range
518 * => note: not an inline function
519 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
520 * => we assume kernel only unmaps valid addresses and thus don't bother
521 * checking the valid bit before doing TLB flushing
522 */
523
524void
525pmap_kremove(vaddr_t sva, vsize_t len)
526{
527 pt_entry_t *pte, opte;
528 vaddr_t va, eva;
529
530 eva = sva + len;
531
532 for (va = sva; va != eva; va += PAGE_SIZE(1 << 12)) {
533 pte = kvtopte(va);
534
535 opte = pmap_pte_set(pte, 0)_atomic_swap_64((pte), (0));
536#ifdef LARGEPAGES
537 KASSERT((opte & PG_PS) == 0)(((opte & 0x0000000000000080UL) == 0) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c", 537, "(opte & PG_PS) == 0"
))
;
538#endif
539 KASSERT((opte & PG_PVLIST) == 0)(((opte & 0x0000000000000400UL) == 0) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c", 539, "(opte & PG_PVLIST) == 0"
))
;
540 }
541
542 pmap_tlb_shootrange(pmap_kernel()(&kernel_pmap_store), sva, eva, 1);
543 pmap_tlb_shootwait();
544}
545
546/*
547 * pmap_set_pml4_early
548 *
549 * Utility function to map 2GB of 2MB pages to 'pa'. The VA that is assigned
550 * is the pml4 entry for 'early mappings' (see pmap.h). This function is used
551 * by display drivers that need to map their framebuffers early, before the
552 * pmap is fully initialized (eg, to show panic messages).
553 *
554 * Users of this function must call pmap_clear_pml4_early to remove the
555 * mapping when finished.
556 *
557 * Parameters:
558 * pa: phys addr to map
559 *
560 * Return value:
561 * VA mapping to 'pa'. This mapping is 2GB in size and starts at the base
562 * of the 2MB region containing 'va'.
563 */
564vaddr_t
565pmap_set_pml4_early(paddr_t pa)
566{
567 extern paddr_t early_pte_pages;
568 pt_entry_t *pml4e, *pte;
569 int i, j, off;
570 paddr_t curpa;
571 vaddr_t va;
572
573 pml4e = (pt_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE0xffffffff80000000);
574 pml4e[PDIR_SLOT_EARLY((511 - 4) - 1)] = (pd_entry_t)early_pte_pages | PG_V0x0000000000000001UL | PG_RW0x0000000000000002UL;
575
576 off = pa & PAGE_MASK_L2((1ULL << 21) - 1);
577 curpa = pa & L2_FRAME((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL
)
;
578
579 pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (early_pte_pages))
;
580 memset(pte, 0, 3 * NBPG)__builtin_memset((pte), (0), (3 * (1 << 12)));
581
582 pte[0] = (early_pte_pages + NBPG(1 << 12)) | PG_V0x0000000000000001UL | PG_RW0x0000000000000002UL;
583 pte[1] = (early_pte_pages + 2 * NBPG(1 << 12)) | PG_V0x0000000000000001UL | PG_RW0x0000000000000002UL;
584
585 pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages + NBPG)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (early_pte_pages + (1 << 12)))
;
586 for (i = 0; i < 2; i++) {
587 /* 2 early pages of mappings */
588 for (j = 0; j < 512; j++) {
589 /* j[0..511] : 2MB mappings per page */
590 pte[(i * 512) + j] = curpa | PG_V0x0000000000000001UL | PG_RW0x0000000000000002UL | PG_PS0x0000000000000080UL;
591 curpa += (2 * 1024 * 1024);
592 }
593 }
594
595 va = (vaddr_t)((PDIR_SLOT_EARLY((511 - 4) - 1) * 512ULL) << L3_SHIFT30) + off;
596 return VA_SIGN_NEG(va)((va) | 0xffff000000000000);
597}
598
599/*
600 * pmap_clear_pml4_early
601 *
602 * Clears the mapping previously established with pmap_set_pml4_early.
603 */
604void
605pmap_clear_pml4_early(void)
606{
607 extern paddr_t early_pte_pages;
608 pt_entry_t *pml4e, *pte;
609
610 pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (early_pte_pages))
;
611 memset(pte, 0, 3 * NBPG)__builtin_memset((pte), (0), (3 * (1 << 12)));
612
613 pml4e = (pd_entry_t *)pmap_kernel()(&kernel_pmap_store)->pm_pdir;
614 pml4e[PDIR_SLOT_EARLY((511 - 4) - 1)] = 0;
615 tlbflush();
616}
617
618/*
619 * p m a p i n i t f u n c t i o n s
620 *
621 * pmap_bootstrap and pmap_init are called during system startup
622 * to init the pmap module. pmap_bootstrap() does a low level
623 * init just to get things rolling. pmap_init() finishes the job.
624 */
625
626/*
627 * pmap_bootstrap: get the system in a state where it can run with VM
628 * properly enabled (called before main()). the VM system is
629 * fully init'd later...
630 */
631
632paddr_t
633pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
634{
635 vaddr_t kva_start = VM_MIN_KERNEL_ADDRESS0xffff800000000000;
636 struct pmap *kpm;
637 int curslot, i, j, p;
638 long ndmpdp;
639 paddr_t dmpd, dmpdp, start_cur, cur_pa;
640 vaddr_t kva, kva_end;
641 pt_entry_t *pml3, *pml2;
642
643 /*
644 * define the boundaries of the managed kernel virtual address
645 * space.
646 */
647
648 virtual_avail = kva_start; /* first free KVA */
649
650 /*
651 * set up protection_codes: we need to be able to convert from
652 * a MI protection code (some combo of VM_PROT...) to something
653 * we can jam into a i386 PTE.
654 */
655
656 protection_codes[PROT_NONE0x00] = pg_nx; /* --- */
657 protection_codes[PROT_EXEC0x04] = PG_RO0x0000000000000000UL; /* --x */
658 protection_codes[PROT_READ0x01] = PG_RO0x0000000000000000UL | pg_nx; /* -r- */
659 protection_codes[PROT_READ0x01 | PROT_EXEC0x04] = PG_RO0x0000000000000000UL; /* -rx */
660 protection_codes[PROT_WRITE0x02] = PG_RW0x0000000000000002UL | pg_nx; /* w-- */
661 protection_codes[PROT_WRITE0x02 | PROT_EXEC0x04] = PG_RW0x0000000000000002UL; /* w-x */
662 protection_codes[PROT_WRITE0x02 | PROT_READ0x01] = PG_RW0x0000000000000002UL | pg_nx; /* wr- */
663 protection_codes[PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04] = PG_RW0x0000000000000002UL; /* wrx */
664
665 /*
666 * now we init the kernel's pmap
667 *
668 * the kernel pmap's pm_obj is not used for much. however, in
669 * user pmaps the pm_obj contains the list of active PTPs.
670 * the pm_obj currently does not have a pager.
671 */
672
673 kpm = pmap_kernel()(&kernel_pmap_store);
674 for (i = 0; i < PTP_LEVELS4 - 1; i++) {
675 uvm_obj_init(&kpm->pm_obj[i], &pmap_pager, 1);
676 kpm->pm_ptphint[i] = NULL((void *)0);
677 }
678 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list))__builtin_memset((&kpm->pm_list), (0), (sizeof(kpm->
pm_list)))
; /* pm_list not used */
679 kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE0xffffffff80000000);
680 kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3;
681 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
682 atop(kva_start - VM_MIN_KERNEL_ADDRESS)((kva_start - 0xffff800000000000) >> 12);
683 /*
684 * the above is just a rough estimate and not critical to the proper
685 * operation of the system.
686 */
687
688 kpm->pm_type = PMAP_TYPE_NORMAL1;
689
690 curpcb({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curpcb
->pcb_pmap = kpm; /* proc0's pcb */
691
692 /*
693 * Configure and enable PCID use if supported.
694 * Currently we require INVPCID support.
695 */
696 if ((cpu_ecxfeature & CPUIDECX_PCID0x00020000) && cpuid_level >= 0x07) {
697 uint32_t ebx, dummy;
698 CPUID_LEAF(0x7, 0, dummy, ebx, dummy, dummy)__asm volatile("cpuid" : "=a" (dummy), "=b" (ebx), "=c" (dummy
), "=d" (dummy) : "a" (0x7), "c" (0))
;
699 if (ebx & SEFF0EBX_INVPCID0x00000400) {
700 pmap_use_pcid = 1;
701 /*
702 * We cannot use global mappings because
703 * invpcid function 0 does not invalidate global
704 * mappings. The hardware can cache kernel
705 * mappings based on PCID_KERN, i.e. there is no
706 * need for global mappings.
707 */
708 pg_g_kern = 0;
709 lcr4( rcr4() | CR4_PCIDE0x00020000 );
710 cr3_pcid_proc = PCID_PROC1;
711 cr3_pcid_temp = PCID_TEMP3;
712 cr3_reuse_pcid = CR3_REUSE_PCID(1ULL << 63);
713 cr3_pcid_proc_intel = PCID_PROC_INTEL2;
714 }
715 }
716
717 /*
718 * Add PG_G attribute to already mapped kernel pages. pg_g_kern
719 * is calculated in locore0.S and may be set to:
720 *
721 * 0 if this CPU does not safely support global pages in the kernel
722 * (Intel/Meltdown)
723 * PG_G if this CPU does safely support global pages in the kernel
724 * (AMD)
725 */
726#if KERNBASE0xffffffff80000000 == VM_MIN_KERNEL_ADDRESS0xffff800000000000
727 for (kva = VM_MIN_KERNEL_ADDRESS0xffff800000000000 ; kva < virtual_avail ;
728#else
729 kva_end = roundup((vaddr_t)&end, PAGE_SIZE)(((((vaddr_t)&end)+(((1 << 12))-1))/((1 << 12
)))*((1 << 12)))
;
730 for (kva = KERNBASE0xffffffff80000000; kva < kva_end ;
731#endif
732 kva += PAGE_SIZE(1 << 12)) {
733 unsigned long p1i = pl1_i(kva)(((((kva) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
;
734 if (pmap_valid_entry(PTE_BASE[p1i])((((pt_entry_t *) (255 * (1ULL << 39)))[p1i]) & 0x0000000000000001UL
)
)
735 PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[p1i] |= pg_g_kern;
736 }
737
738 /*
739 * Map the direct map. The first 4GB were mapped in locore, here
740 * we map the rest if it exists. We actually use the direct map
741 * here to set up the page tables, we're assuming that we're still
742 * operating in the lower 4GB of memory.
743 *
744 * Map (up to) the first 512GB of physical memory first. This part
745 * is handled differently than physical memory > 512GB since we have
746 * already mapped part of this range in locore0.
747 */
748 ndmpdp = (max_pa + NBPD_L3(1ULL << 30) - 1) >> L3_SHIFT30;
749 if (ndmpdp < NDML2_ENTRIES4)
750 ndmpdp = NDML2_ENTRIES4; /* At least 4GB */
751 if (ndmpdp > 512)
752 ndmpdp = 512; /* At most 512GB */
753
754 dmpdp = kpm->pm_pdir[PDIR_SLOT_DIRECT(511 - 4)] & PG_FRAME0x000ffffffffff000UL;
755
756 dmpd = first_avail; first_avail += ndmpdp * PAGE_SIZE(1 << 12);
757
758 for (i = NDML2_ENTRIES4; i < NPDPG((1 << 12) / sizeof (pd_entry_t)) * ndmpdp; i++) {
759 paddr_t pdp;
760 vaddr_t va;
761
762 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]);
763 va = PMAP_DIRECT_MAP(pdp)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdp))
;
764
765 *((pd_entry_t *)va) = ((paddr_t)i << L2_SHIFT21);
766 *((pd_entry_t *)va) |= PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL | PG_PS0x0000000000000080UL | pg_g_kern | PG_U0x0000000000000020UL |
767 PG_M0x0000000000000040UL | pg_nx;
768 }
769
770 for (i = NDML2_ENTRIES4; i < ndmpdp; i++) {
771 paddr_t pdp;
772 vaddr_t va;
773
774 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
775 va = PMAP_DIRECT_MAP(pdp)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdp))
;
776
777 *((pd_entry_t *)va) = dmpd + (i << PAGE_SHIFT12);
778 *((pd_entry_t *)va) |= PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL | PG_U0x0000000000000020UL | PG_M0x0000000000000040UL | pg_nx;
779 }
780
781 kpm->pm_pdir[PDIR_SLOT_DIRECT(511 - 4)] = dmpdp | PG_V0x0000000000000001UL | PG_KW0x0000000000000002UL | PG_U0x0000000000000020UL |
782 PG_M0x0000000000000040UL | pg_nx;
783
784 /* Map any remaining physical memory > 512GB */
785 for (curslot = 1 ; curslot < NUM_L4_SLOT_DIRECT4 ; curslot++) {
786 /*
787 * Start of current range starts at PA (curslot) * 512GB
788 */
789 start_cur = (paddr_t)(curslot * NBPD_L4(1ULL << 39));
790 if (max_pa > start_cur) {
791 /* Next 512GB, new PML4e and L3(512GB) page */
792 dmpd = first_avail; first_avail += PAGE_SIZE(1 << 12);
793 pml3 = (pt_entry_t *)PMAP_DIRECT_MAP(dmpd)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (dmpd))
;
794 kpm->pm_pdir[PDIR_SLOT_DIRECT(511 - 4) + curslot] = dmpd |
795 PG_KW0x0000000000000002UL | PG_V0x0000000000000001UL | PG_U0x0000000000000020UL | PG_M0x0000000000000040UL | pg_nx;
796
797 /* Calculate full 1GB pages in this 512GB region */
798 p = ((max_pa - start_cur) >> L3_SHIFT30);
799
800 /* Check if a partial (<1GB) page remains */
801 if (max_pa & L2_MASK0x000000003fe00000UL)
802 p++;
803
804 /*
805 * Handle the case where this range is full and there
806 * is still more memory after (p would be > 512).
807 */
808 if (p > NPDPG((1 << 12) / sizeof (pd_entry_t)))
809 p = NPDPG((1 << 12) / sizeof (pd_entry_t));
810
811 /* Allocate 'p' L2(1GB) pages and populate */
812 for (i = 0; i < p; i++) {
813 dmpd = first_avail; first_avail += PAGE_SIZE(1 << 12);
814 pml2 = (pt_entry_t *)PMAP_DIRECT_MAP(dmpd)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (dmpd))
;
815 pml3[i] = dmpd |
816 PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL | PG_U0x0000000000000020UL | PG_M0x0000000000000040UL | pg_nx;
817
818 cur_pa = start_cur + (i << L3_SHIFT30);
819 j = 0;
820
821 while (cur_pa < max_pa && j < NPDPG((1 << 12) / sizeof (pd_entry_t))) {
822 pml2[j] = curslot * NBPD_L4(1ULL << 39) +
823 (uint64_t)i * NBPD_L3(1ULL << 30) +
824 (uint64_t)j * NBPD_L2(1ULL << 21);
825 pml2[j] |= PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL | pg_g_kern |
826 PG_U0x0000000000000020UL | PG_M0x0000000000000040UL | pg_nx | PG_PS0x0000000000000080UL;
827 cur_pa += NBPD_L2(1ULL << 21);
828 j++;
829 }
830 }
831 }
832 }
833
834 tlbflush();
835
836 msgbuf_vaddr = virtual_avail;
837 virtual_avail += round_page(MSGBUFSIZE)((((32 * (1 << 12))) + ((1 << 12) - 1)) & ~((
1 << 12) - 1))
;
838
839 idt_vaddr = virtual_avail;
840 virtual_avail += 2 * PAGE_SIZE(1 << 12);
841 idt_paddr = first_avail; /* steal a page */
842 first_avail += 2 * PAGE_SIZE(1 << 12);
843
844#if defined(MULTIPROCESSOR1) || \
845 (NACPI1 > 0 && !defined(SMALL_KERNEL))
846 /*
847 * Grab a page below 4G for things that need it (i.e.
848 * having an initial %cr3 for the MP trampoline).
849 */
850 lo32_vaddr = virtual_avail;
851 virtual_avail += PAGE_SIZE(1 << 12);
852 lo32_paddr = first_avail;
853 first_avail += PAGE_SIZE(1 << 12);
854#endif
855
856 /*
857 * init the global lists.
858 */
859 LIST_INIT(&pmaps)do { ((&pmaps)->lh_first) = ((void *)0); } while (0);
860
861 /*
862 * initialize the pmap pools.
863 */
864
865 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_VM0xa, 0,
866 "pmappl", NULL((void *)0));
867 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM0xa, 0,
868 "pvpl", &pool_allocator_single);
869 pool_sethiwat(&pmap_pv_pool, 32 * 1024);
870
871 /*
872 * initialize the PDE pool.
873 */
874
875 pool_init(&pmap_pdp_pool, PAGE_SIZE(1 << 12), 0, IPL_VM0xa, 0,
876 "pdppl", &pool_allocator_single);
877
878 kpm->pm_pdir_intel = NULL((void *)0);
879 kpm->pm_pdirpa_intel = 0;
880
881 /*
882 * ensure the TLB is sync'd with reality by flushing it...
883 */
884
885 tlbflush();
886
887 return first_avail;
888}
889
890/*
891 * pmap_randomize
892 *
893 * Randomizes the location of the kernel pmap
894 */
895void
896pmap_randomize(void)
897{
898 pd_entry_t *pml4va, *oldpml4va;
899 paddr_t pml4pa;
900 int i;
901
902 pml4va = km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, &kd_nowait);
903 if (pml4va == NULL((void *)0))
904 panic("%s: km_alloc failed", __func__);
905
906 /* Copy old PML4 page to new one */
907 oldpml4va = pmap_kernel()(&kernel_pmap_store)->pm_pdir;
908 memcpy(pml4va, oldpml4va, PAGE_SIZE)__builtin_memcpy((pml4va), (oldpml4va), ((1 << 12)));
909
910 /* Switch to new PML4 */
911 pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)pml4va, &pml4pa);
912 lcr3(pml4pa);
913
914 /* Fixup pmap_kernel and proc0's %cr3 */
915 pmap_kernel()(&kernel_pmap_store)->pm_pdirpa = pml4pa;
916 pmap_kernel()(&kernel_pmap_store)->pm_pdir = pml4va;
917 proc0.p_addr->u_pcb.pcb_cr3 = pml4pa;
918
919 /* Fixup recursive PTE PML4E slot. We are only changing the PA */
920 pml4va[PDIR_SLOT_PTE255] = pml4pa | (pml4va[PDIR_SLOT_PTE255] & ~PG_FRAME0x000ffffffffff000UL);
921
922 for (i = 0; i < NPDPG((1 << 12) / sizeof (pd_entry_t)); i++) {
923 /* PTE slot already handled earlier */
924 if (i == PDIR_SLOT_PTE255)
925 continue;
926
927 if (pml4va[i] & PG_FRAME0x000ffffffffff000UL)
928 pmap_randomize_level(&pml4va[i], 3);
929 }
930
931 /* Wipe out bootstrap PML4 */
932 memset(oldpml4va, 0, PAGE_SIZE)__builtin_memset((oldpml4va), (0), ((1 << 12)));
933 tlbflush();
934}
935
936void
937pmap_randomize_level(pd_entry_t *pde, int level)
938{
939 pd_entry_t *new_pd_va;
940 paddr_t old_pd_pa, new_pd_pa;
941 vaddr_t old_pd_va;
942 struct vm_page *pg;
943 int i;
944
945 if (level == 0)
946 return;
947
948 if (level < PTP_LEVELS4 - 1 && (*pde & PG_PS0x0000000000000080UL))
949 return;
950
951 new_pd_va = km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, &kd_nowait);
952 if (new_pd_va == NULL((void *)0))
953 panic("%s: cannot allocate page for L%d page directory",
954 __func__, level);
955
956 old_pd_pa = *pde & PG_FRAME0x000ffffffffff000UL;
957 old_pd_va = PMAP_DIRECT_MAP(old_pd_pa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (old_pd_pa))
;
958 pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)new_pd_va, &new_pd_pa);
959 memcpy(new_pd_va, (void *)old_pd_va, PAGE_SIZE)__builtin_memcpy((new_pd_va), ((void *)old_pd_va), ((1 <<
12)))
;
960 *pde = new_pd_pa | (*pde & ~PG_FRAME0x000ffffffffff000UL);
961
962 tlbflush();
963 memset((void *)old_pd_va, 0, PAGE_SIZE)__builtin_memset(((void *)old_pd_va), (0), ((1 << 12)));
964
965 pg = PHYS_TO_VM_PAGE(old_pd_pa);
966 if (pg != NULL((void *)0)) {
967 pg->wire_count--;
968 pmap_kernel()(&kernel_pmap_store)->pm_stats.resident_count--;
969 if (pg->wire_count <= 1)
970 uvm_pagefree(pg);
971 }
972
973 for (i = 0; i < NPDPG((1 << 12) / sizeof (pd_entry_t)); i++)
974 if (new_pd_va[i] & PG_FRAME0x000ffffffffff000UL)
975 pmap_randomize_level(&new_pd_va[i], level - 1);
976}
977
978/*
979 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various
980 * trampoline code can be entered.
981 */
982paddr_t
983pmap_prealloc_lowmem_ptps(paddr_t first_avail)
984{
985 pd_entry_t *pdes;
986 int level;
987 paddr_t newp;
988
989 pdes = pmap_kernel()(&kernel_pmap_store)->pm_pdir;
990 level = PTP_LEVELS4;
991 for (;;) {
992 newp = first_avail; first_avail += PAGE_SIZE(1 << 12);
993 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE)__builtin_memset(((void *)((vaddr_t)(((((511 - 4) * (1ULL <<
39))) | 0xffff000000000000)) + (newp))), (0), ((1 << 12
)))
;
994 pdes[pl_i(0, level)(((((0) & ~0xffff000000000000)) & ptp_masks[(level)-1
]) >> ptp_shifts[(level)-1])
] = (newp & PG_FRAME0x000ffffffffff000UL) | PG_V0x0000000000000001UL | PG_RW0x0000000000000002UL;
995 level--;
996 if (level <= 1)
997 break;
998 pdes = normal_pdes[level - 2];
999 }
1000
1001 return first_avail;
1002}
1003
1004/*
1005 * pmap_init: no further initialization required on this platform
1006 */
1007void
1008pmap_init(void)
1009{
1010 pmap_initialized = 1;
1011}
1012
1013/*
1014 * p v _ e n t r y f u n c t i o n s
1015 */
1016
1017/*
1018 * main pv_entry manipulation functions:
1019 * pmap_enter_pv: enter a mapping onto a pv list
1020 * pmap_remove_pv: remove a mapping from a pv list
1021 */
1022
1023/*
1024 * pmap_enter_pv: enter a mapping onto a pv list
1025 *
1026 * => caller should adjust ptp's wire_count before calling
1027 *
1028 * pve: preallocated pve for us to use
1029 * ptp: PTP in pmap that maps this VA
1030 */
1031
1032void
1033pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap,
1034 vaddr_t va, struct vm_page *ptp)
1035{
1036 pve->pv_pmap = pmap;
1037 pve->pv_va = va;
1038 pve->pv_ptp = ptp; /* NULL for kernel pmap */
1039 mtx_enter(&pg->mdpage.pv_mtx);
1040 pve->pv_next = pg->mdpage.pv_list; /* add to ... */
1041 pg->mdpage.pv_list = pve; /* ... list */
1042 mtx_leave(&pg->mdpage.pv_mtx);
1043}
1044
1045/*
1046 * pmap_remove_pv: try to remove a mapping from a pv_list
1047 *
1048 * => caller should adjust ptp's wire_count and free PTP if needed
1049 * => we return the removed pve
1050 */
1051
1052struct pv_entry *
1053pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
1054{
1055 struct pv_entry *pve, **prevptr;
1056
1057 mtx_enter(&pg->mdpage.pv_mtx);
1058 prevptr = &pg->mdpage.pv_list;
1059 while ((pve = *prevptr) != NULL((void *)0)) {
1060 if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
1061 *prevptr = pve->pv_next; /* remove it! */
1062 break;
1063 }
1064 prevptr = &pve->pv_next; /* previous pointer */
1065 }
1066 mtx_leave(&pg->mdpage.pv_mtx);
1067 return(pve); /* return removed pve */
1068}
1069
1070/*
1071 * p t p f u n c t i o n s
1072 */
1073
1074struct vm_page *
1075pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
1076{
1077 int lidx = level - 1;
1078 struct vm_page *pg;
1079
1080 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
1081 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])((pmap->pm_ptphint[lidx])->phys_addr))
1082 return (pmap->pm_ptphint[lidx]);
1083
1084 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)((((((va) & ~0xffff000000000000)) & ptp_masks[((level
)+1)-1]) >> ptp_shifts[((level)+1)-1]) * (1 << 12
))
);
1085
1086 return pg;
1087}
1088
1089void
1090pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
1091 struct pg_to_free *pagelist)
1092{
1093 int lidx;
1094 struct uvm_object *obj;
1095
1096 lidx = level - 1;
1097
1098 obj = &pmap->pm_obj[lidx];
1099 pmap->pm_stats.resident_count--;
1100 if (pmap->pm_ptphint[lidx] == ptp)
1101 pmap->pm_ptphint[lidx] = RBT_ROOT(uvm_objtree, &obj->memt)uvm_objtree_RBT_ROOT(&obj->memt);
1102 ptp->wire_count = 0;
1103 uvm_pagerealloc(ptp, NULL((void *)0), 0);
1104 TAILQ_INSERT_TAIL(pagelist, ptp, pageq)do { (ptp)->pageq.tqe_next = ((void *)0); (ptp)->pageq.
tqe_prev = (pagelist)->tqh_last; *(pagelist)->tqh_last =
(ptp); (pagelist)->tqh_last = &(ptp)->pageq.tqe_next
; } while (0)
;
1105}
1106
1107void
1108pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
1109 struct pg_to_free *pagelist)
1110{
1111 unsigned long index;
1112 int level;
1113 vaddr_t invaladdr;
1114
1115 level = 1;
1116 do {
1117 pmap_freepage(pmap, ptp, level, pagelist);
1118 index = pl_i(va, level + 1)(((((va) & ~0xffff000000000000)) & ptp_masks[(level +
1)-1]) >> ptp_shifts[(level + 1)-1])
;
1119 pmap_pte_set(&normal_pdes[level - 1][index], 0)_atomic_swap_64((&normal_pdes[level - 1][index]), (0));
1120 if (level == PTP_LEVELS4 - 1 && pmap->pm_pdir_intel != NULL((void *)0)) {
1121 /* Zap special meltdown PML4e */
1122 pmap_pte_set(&pmap->pm_pdir_intel[index], 0)_atomic_swap_64((&pmap->pm_pdir_intel[index]), (0));
1123 DPRINTF("%s: cleared meltdown PML4e @ index %lu "
1124 "(va range start 0x%llx)\n", __func__, index,
1125 (uint64_t)(index << L4_SHIFT));
1126 }
1127 invaladdr = level == 1 ? (vaddr_t)PTE_BASE((pt_entry_t *) (255 * (1ULL << 39))) :
1128 (vaddr_t)normal_pdes[level - 2];
1129 pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE(1 << 12),
1130 pmap_is_curpmap(curpcb({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curpcb
->pcb_pmap));
1131 if (level < PTP_LEVELS4 - 1) {
1132 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
1133 ptp->wire_count--;
1134 if (ptp->wire_count > 1)
1135 break;
1136 }
1137 } while (++level < PTP_LEVELS4);
1138}
1139
1140/*
1141 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1142 *
1143 * => pmap should NOT be pmap_kernel()
1144 */
1145
1146struct vm_page *
1147pmap_get_ptp(struct pmap *pmap, vaddr_t va)
1148{
1149 struct vm_page *ptp, *pptp;
1150 int i;
1151 unsigned long index;
1152 pd_entry_t *pva, *pva_intel;
1153 paddr_t ppa, pa;
1154 struct uvm_object *obj;
1155
1156 ptp = NULL((void *)0);
1157 pa = (paddr_t)-1;
1158
1159 /*
1160 * Loop through all page table levels seeing if we need to
1161 * add a new page to that level.
1162 */
1163 for (i = PTP_LEVELS4; i > 1; i--) {
1164 /*
1165 * Save values from previous round.
1166 */
1167 pptp = ptp;
1168 ppa = pa;
1169
1170 index = pl_i(va, i)(((((va) & ~0xffff000000000000)) & ptp_masks[(i)-1]) >>
ptp_shifts[(i)-1])
;
1171 pva = normal_pdes[i - 2];
1172
1173 if (pmap_valid_entry(pva[index])((pva[index]) & 0x0000000000000001UL)) {
1174 ppa = pva[index] & PG_FRAME0x000ffffffffff000UL;
1175 ptp = NULL((void *)0);
1176 continue;
1177 }
1178
1179 obj = &pmap->pm_obj[i-2];
1180 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1)((((((va) & ~0xffff000000000000)) & ptp_masks[((i - 1
)+1)-1]) >> ptp_shifts[((i - 1)+1)-1]) * (1 << 12
))
, NULL((void *)0),
1181 UVM_PGA_USERESERVE0x0001|UVM_PGA_ZERO0x0002);
1182
1183 if (ptp == NULL((void *)0))
1184 return NULL((void *)0);
1185
1186 atomic_clearbits_intx86_atomic_clearbits_u32(&ptp->pg_flags, PG_BUSY0x00000001);
1187 ptp->wire_count = 1;
1188 pmap->pm_ptphint[i - 2] = ptp;
1189 pa = VM_PAGE_TO_PHYS(ptp)((ptp)->phys_addr);
1190 pva[index] = (pd_entry_t) (pa | PG_u0x0000000000000004UL | PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL);
1191
1192 /*
1193 * Meltdown Special case - if we are adding a new PML4e for
1194 * usermode addresses, just copy the PML4e to the U-K page
1195 * table.
1196 */
1197 if (pmap->pm_pdir_intel != NULL((void *)0) && i == PTP_LEVELS4 &&
1198 va < VM_MAXUSER_ADDRESS0x00007f7fffffc000) {
1199 pva_intel = pmap->pm_pdir_intel;
1200 pva_intel[index] = pva[index];
1201 DPRINTF("%s: copying usermode PML4e (content=0x%llx) "
1202 "from 0x%llx -> 0x%llx\n", __func__, pva[index],
1203 (uint64_t)&pva[index], (uint64_t)&pva_intel[index]);
1204 }
1205
1206 pmap->pm_stats.resident_count++;
1207 /*
1208 * If we're not in the top level, increase the
1209 * wire count of the parent page.
1210 */
1211 if (i < PTP_LEVELS4) {
1212 if (pptp == NULL((void *)0))
1213 pptp = pmap_find_ptp(pmap, va, ppa, i);
1214#ifdef DIAGNOSTIC1
1215 if (pptp == NULL((void *)0))
1216 panic("%s: pde page disappeared", __func__);
1217#endif
1218 pptp->wire_count++;
1219 }
1220 }
1221
1222 /*
1223 * ptp is not NULL if we just allocated a new ptp. If it's
1224 * still NULL, we must look up the existing one.
1225 */
1226 if (ptp == NULL((void *)0)) {
1227 ptp = pmap_find_ptp(pmap, va, ppa, 1);
1228#ifdef DIAGNOSTIC1
1229 if (ptp == NULL((void *)0)) {
1230 printf("va %lx ppa %lx\n", (unsigned long)va,
1231 (unsigned long)ppa);
1232 panic("%s: unmanaged user PTP", __func__);
1233 }
1234#endif
1235 }
1236
1237 pmap->pm_ptphint[0] = ptp;
1238 return(ptp);
1239}
1240
1241/*
1242 * p m a p l i f e c y c l e f u n c t i o n s
1243 */
1244
1245/*
1246 * pmap_pdp_ctor: constructor for the PDP cache.
1247 */
1248
1249void
1250pmap_pdp_ctor(pd_entry_t *pdir)
1251{
1252 paddr_t pdirpa;
1253 int npde, i;
1254 struct pmap *kpm = pmap_kernel()(&kernel_pmap_store);
1255
1256 /* fetch the physical address of the page directory. */
1257 (void) pmap_extract(kpm, (vaddr_t) pdir, &pdirpa);
1258
1259 /* zero init area */
1260 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t))__builtin_memset((pdir), (0), (255 * sizeof(pd_entry_t)));
1261
1262 /* put in recursive PDE to map the PTEs */
1263 pdir[PDIR_SLOT_PTE255] = pdirpa | PG_V0x0000000000000001UL | PG_KW0x0000000000000002UL | pg_nx;
1264
1265 npde = nkptp[PTP_LEVELS4 - 1];
1266
1267 /* put in kernel VM PDEs */
1268 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],__builtin_memcpy((&pdir[256]), (&((pd_entry_t *)((char
*)((pd_entry_t *)((char *)((pd_entry_t *)((char *)((pt_entry_t
*) (255 * (1ULL << 39))) + 255 * (1ULL << 30))) +
255 * (1ULL << 21))) + 255 * (1ULL << 12)))[256]
), (npde * sizeof(pd_entry_t)))
1269 npde * sizeof(pd_entry_t))__builtin_memcpy((&pdir[256]), (&((pd_entry_t *)((char
*)((pd_entry_t *)((char *)((pd_entry_t *)((char *)((pt_entry_t
*) (255 * (1ULL << 39))) + 255 * (1ULL << 30))) +
255 * (1ULL << 21))) + 255 * (1ULL << 12)))[256]
), (npde * sizeof(pd_entry_t)))
;
1270
1271 /* zero the rest */
1272 memset(&pdir[PDIR_SLOT_KERN + npde], 0,__builtin_memset((&pdir[256 + npde]), (0), ((((1 <<
12) / (sizeof (pd_entry_t))) - (256 + npde)) * sizeof(pd_entry_t
)))
1273 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t))__builtin_memset((&pdir[256 + npde]), (0), ((((1 <<
12) / (sizeof (pd_entry_t))) - (256 + npde)) * sizeof(pd_entry_t
)))
;
1274
1275 for (i = 0; i < NUM_L4_SLOT_DIRECT4; i++)
1276 pdir[PDIR_SLOT_DIRECT(511 - 4) + i] = kpm->pm_pdir[PDIR_SLOT_DIRECT(511 - 4) + i];
1277
1278#if VM_MIN_KERNEL_ADDRESS0xffff800000000000 != KERNBASE0xffffffff80000000
1279 pdir[pl4_pi(KERNBASE)(((((0xffffffff80000000) & ~0xffff000000000000)) & 0x0000ff8000000000UL
) >> 39)
] = PDP_BASE((pd_entry_t *)((char *)((pd_entry_t *)((char *)((pd_entry_t *
)((char *)((pt_entry_t *) (255 * (1ULL << 39))) + 255 *
(1ULL << 30))) + 255 * (1ULL << 21))) + 255 * (1ULL
<< 12)))
[pl4_pi(KERNBASE)(((((0xffffffff80000000) & ~0xffff000000000000)) & 0x0000ff8000000000UL
) >> 39)
];
1280#endif
1281}
1282
1283void
1284pmap_pdp_ctor_intel(pd_entry_t *pdir)
1285{
1286 struct pmap *kpm = pmap_kernel()(&kernel_pmap_store);
1287
1288 /* Copy PML4es from pmap_kernel's U-K view */
1289 memcpy(pdir, kpm->pm_pdir_intel, PAGE_SIZE)__builtin_memcpy((pdir), (kpm->pm_pdir_intel), ((1 <<
12)))
;
1290}
1291
1292/*
1293 * pmap_create: create a pmap
1294 *
1295 * => note: old pmap interface took a "size" args which allowed for
1296 * the creation of "software only" pmaps (not in bsd).
1297 */
1298
1299struct pmap *
1300pmap_create(void)
1301{
1302 struct pmap *pmap;
1303 int i;
1304
1305 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK0x0001);
1306
1307 mtx_init(&pmap->pm_mtx, IPL_VM)do { (void)(((void *)0)); (void)(0); __mtx_init((&pmap->
pm_mtx), ((((0xa)) > 0x0 && ((0xa)) < 0x9) ? 0x9
: ((0xa)))); } while (0)
;
1308
1309 /* init uvm_object */
1310 for (i = 0; i < PTP_LEVELS4 - 1; i++) {
1311 uvm_obj_init(&pmap->pm_obj[i], &pmap_pager, 1);
1312 pmap->pm_ptphint[i] = NULL((void *)0);
1313 }
1314 pmap->pm_stats.wired_count = 0;
1315 pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
1316 pmap->pm_type = PMAP_TYPE_NORMAL1;
1317
1318 /* allocate PDP */
1319
1320 /*
1321 * note that there is no need to splvm to protect us from
1322 * malloc since malloc allocates out of a submap and we should
1323 * have already allocated kernel PTPs to cover the range...
1324 */
1325
1326 pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK0x0001);
1327 pmap_pdp_ctor(pmap->pm_pdir);
1328
1329 pmap->pm_pdirpa = pmap->pm_pdir[PDIR_SLOT_PTE255] & PG_FRAME0x000ffffffffff000UL;
1330
1331 /*
1332 * Intel CPUs need a special page table to be used during usermode
1333 * execution, one that lacks all kernel mappings.
1334 */
1335 if (cpu_meltdown) {
1336 pmap->pm_pdir_intel = pool_get(&pmap_pdp_pool, PR_WAITOK0x0001);
1337 pmap_pdp_ctor_intel(pmap->pm_pdir_intel);
1338 pmap->pm_stats.resident_count++;
1339 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)pmap->pm_pdir_intel,
1340 &pmap->pm_pdirpa_intel))
1341 panic("%s: unknown PA mapping for meltdown PML4",
1342 __func__);
1343 } else {
1344 pmap->pm_pdir_intel = NULL((void *)0);
1345 pmap->pm_pdirpa_intel = 0;
1346 }
1347
1348 mtx_enter(&pmaps_lock);
1349 LIST_INSERT_HEAD(&pmaps, pmap, pm_list)do { if (((pmap)->pm_list.le_next = (&pmaps)->lh_first
) != ((void *)0)) (&pmaps)->lh_first->pm_list.le_prev
= &(pmap)->pm_list.le_next; (&pmaps)->lh_first
= (pmap); (pmap)->pm_list.le_prev = &(&pmaps)->
lh_first; } while (0)
;
1350 mtx_leave(&pmaps_lock);
1351 return (pmap);
1352}
1353
1354/*
1355 * pmap_destroy: drop reference count on pmap. free pmap if
1356 * reference count goes to zero.
1357 */
1358
1359void
1360pmap_destroy(struct pmap *pmap)
1361{
1362 struct vm_page *pg;
1363 int refs;
1364 int i;
1365
1366 /*
1367 * drop reference count
1368 */
1369
1370 refs = atomic_dec_int_nv(&pmap->pm_obj[0].uo_refs)_atomic_sub_int_nv((&pmap->pm_obj[0].uo_refs), 1);
1371 if (refs > 0) {
1372 return;
1373 }
1374
1375 /*
1376 * remove it from global list of pmaps
1377 */
1378 mtx_enter(&pmaps_lock);
1379 LIST_REMOVE(pmap, pm_list)do { if ((pmap)->pm_list.le_next != ((void *)0)) (pmap)->
pm_list.le_next->pm_list.le_prev = (pmap)->pm_list.le_prev
; *(pmap)->pm_list.le_prev = (pmap)->pm_list.le_next; (
(pmap)->pm_list.le_prev) = ((void *)-1); ((pmap)->pm_list
.le_next) = ((void *)-1); } while (0)
;
1380 mtx_leave(&pmaps_lock);
1381
1382 /*
1383 * free any remaining PTPs
1384 */
1385
1386 for (i = 0; i < PTP_LEVELS4 - 1; i++) {
1387 while ((pg = RBT_ROOT(uvm_objtree,uvm_objtree_RBT_ROOT(&pmap->pm_obj[i].memt)
1388 &pmap->pm_obj[i].memt)uvm_objtree_RBT_ROOT(&pmap->pm_obj[i].memt)) != NULL((void *)0)) {
1389 KASSERT((pg->pg_flags & PG_BUSY) == 0)(((pg->pg_flags & 0x00000001) == 0) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c", 1389,
"(pg->pg_flags & PG_BUSY) == 0"))
;
1390
1391 pg->wire_count = 0;
1392 pmap->pm_stats.resident_count--;
1393
1394 uvm_pagefree(pg);
1395 }
1396 }
1397
1398 pool_put(&pmap_pdp_pool, pmap->pm_pdir);
1399
1400 if (pmap->pm_pdir_intel != NULL((void *)0)) {
1401 pmap->pm_stats.resident_count--;
1402 pool_put(&pmap_pdp_pool, pmap->pm_pdir_intel);
1403 }
1404
1405 pool_put(&pmap_pmap_pool, pmap);
1406}
1407
1408/*
1409 * Add a reference to the specified pmap.
1410 */
1411
1412void
1413pmap_reference(struct pmap *pmap)
1414{
1415 atomic_inc_int(&pmap->pm_obj[0].uo_refs)_atomic_inc_int(&pmap->pm_obj[0].uo_refs);
1416}
1417
1418/*
1419 * pmap_activate: activate a process' pmap (fill in %cr3)
1420 *
1421 * => called from cpu_fork() and when switching pmaps during exec
1422 * => if p is the curproc, then load it into the MMU
1423 */
1424
1425void
1426pmap_activate(struct proc *p)
1427{
1428 struct pcb *pcb = &p->p_addr->u_pcb;
1429 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1430
1431 pcb->pcb_pmap = pmap;
1432 pcb->pcb_cr3 = pmap->pm_pdirpa;
1433 pcb->pcb_cr3 |= (pmap != pmap_kernel()(&kernel_pmap_store)) ? cr3_pcid_proc :
1434 (PCID_KERN0 | cr3_reuse_pcid);
1435
1436 if (p != curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
)
1437 return;
1438
1439 if ((p->p_flag & P_SYSTEM0x00000200) == 0) {
1440 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1441
1442 /* mark the pmap in use by this processor */
1443 self->ci_proc_pmap = pmap;
1444
1445 /* in case we return to userspace without context switching */
1446 if (cpu_meltdown) {
1447 self->ci_kern_cr3 = pcb->pcb_cr3 | cr3_reuse_pcid;
1448 self->ci_user_cr3 = pmap->pm_pdirpa_intel |
1449 cr3_pcid_proc_intel;
1450 }
1451 }
1452
1453 lcr3(pcb->pcb_cr3);
1454}
1455
1456/*
1457 * pmap_deactivate: deactivate a process' pmap
1458 */
1459
1460void
1461pmap_deactivate(struct proc *p)
1462{
1463 if ((p->p_flag & P_SYSTEM0x00000200) == 0) {
1464 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1465
1466 /*
1467 * mark the pmap no longer in use by this processor.
1468 */
1469 KASSERT(self->ci_proc_pmap == p->p_vmspace->vm_map.pmap)((self->ci_proc_pmap == p->p_vmspace->vm_map.pmap) ?
(void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c"
, 1469, "self->ci_proc_pmap == p->p_vmspace->vm_map.pmap"
))
;
1470 self->ci_proc_pmap = NULL((void *)0);
1471 }
1472}
1473
1474/*
1475 * end of lifecycle functions
1476 */
1477
1478/*
1479 * some misc. functions
1480 */
1481
1482int
1483pmap_pdes_valid(vaddr_t va, pd_entry_t *lastpde)
1484{
1485 int i;
1486 unsigned long index;
1487 pd_entry_t pde;
1488
1489 for (i = PTP_LEVELS4; i > 1; i--) {
1490 index = pl_i(va, i)(((((va) & ~0xffff000000000000)) & ptp_masks[(i)-1]) >>
ptp_shifts[(i)-1])
;
1491 pde = normal_pdes[i - 2][index];
1492 if (!pmap_valid_entry(pde)((pde) & 0x0000000000000001UL))
1493 return 0;
1494 }
1495 if (lastpde != NULL((void *)0))
1496 *lastpde = pde;
1497 return 1;
1498}
1499
1500/*
1501 * pmap_extract: extract a PA for the given VA
1502 */
1503
1504int
1505pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
1506{
1507 pt_entry_t *ptes, pte;
1508 int level, offs;
1509
1510 if (pmap == pmap_kernel()(&kernel_pmap_store) && va >= PMAP_DIRECT_BASE(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000)) &&
1511 va < PMAP_DIRECT_END((((((511 - 4) + 4) * (1ULL << 39))) | 0xffff000000000000
))
) {
1512 *pap = va - PMAP_DIRECT_BASE(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000));
1513 return 1;
1514 }
1515
1516 if (pmap != pmap_kernel()(&kernel_pmap_store))
1517 mtx_enter(&pmap->pm_mtx);
1518
1519 level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
1520 pte = ptes[offs];
1521
1522 if (pmap != pmap_kernel()(&kernel_pmap_store))
1523 mtx_leave(&pmap->pm_mtx);
1524
1525 if (__predict_true(level == 0 && pmap_valid_entry(pte))__builtin_expect(((level == 0 && ((pte) & 0x0000000000000001UL
)) != 0), 1)
) {
1526 if (pap != NULL((void *)0))
1527 *pap = (pte & PG_FRAME0x000ffffffffff000UL) | (va & PAGE_MASK((1 << 12) - 1));
1528 return 1;
1529 }
1530 if (level == 1 && (pte & (PG_PS0x0000000000000080UL|PG_V0x0000000000000001UL)) == (PG_PS0x0000000000000080UL|PG_V0x0000000000000001UL)) {
1531 if (pap != NULL((void *)0))
1532 *pap = (pte & PG_LGFRAME0x000fffffffe00000UL) | (va & PAGE_MASK_L2((1ULL << 21) - 1));
1533 return 1;
1534 }
1535
1536 return 0;
1537}
1538
1539/*
1540 * pmap_zero_page: zero a page
1541 */
1542
1543void
1544pmap_zero_page(struct vm_page *pg)
1545{
1546 pagezero(pmap_map_direct(pg)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (((pg)->phys_addr)))
);
1547}
1548
1549/*
1550 * pmap_flush_cache: flush the cache for a virtual address.
1551 */
1552void
1553pmap_flush_cache(vaddr_t addr, vsize_t len)
1554{
1555 vaddr_t i;
1556
1557 if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_cflushsz == 0) {
1558 wbinvd_on_all_cpus();
1559 return;
1560 }
1561
1562 /* all cpus that have clflush also have mfence. */
1563 mfence();
1564 for (i = addr; i < addr + len; i += curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_cflushsz)
1565 clflush(i);
1566 mfence();
1567}
1568
1569/*
1570 * pmap_copy_page: copy a page
1571 */
1572
1573void
1574pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1575{
1576 vaddr_t srcva = pmap_map_direct(srcpg)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (((srcpg)->phys_addr)))
;
1577 vaddr_t dstva = pmap_map_direct(dstpg)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (((dstpg)->phys_addr)))
;
1578
1579 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE)__builtin_memcpy(((void *)dstva), ((void *)srcva), ((1 <<
12)))
;
1580}
1581
1582/*
1583 * p m a p r e m o v e f u n c t i o n s
1584 *
1585 * functions that remove mappings
1586 */
1587
1588/*
1589 * pmap_remove_ptes: remove PTEs from a PTP
1590 *
1591 * => must have proper locking on pmap_master_lock
1592 * => PTP must be mapped into KVA
1593 * => PTP should be null if pmap == pmap_kernel()
1594 */
1595
1596void
1597pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
1598 vaddr_t startva, vaddr_t endva, int flags, struct pv_entry **free_pvs)
1599{
1600 struct pv_entry *pve;
1601 pt_entry_t *pte = (pt_entry_t *) ptpva;
1602 struct vm_page *pg;
1603 pt_entry_t opte;
1604
1605 /*
1606 * note that ptpva points to the PTE that maps startva. this may
1607 * or may not be the first PTE in the PTP.
1608 *
1609 * we loop through the PTP while there are still PTEs to look at
1610 * and the wire_count is greater than 1 (because we use the wire_count
1611 * to keep track of the number of real PTEs in the PTP).
1612 */
1613
1614 for (/*null*/; startva < endva && (ptp == NULL((void *)0) || ptp->wire_count > 1)
1615 ; pte++, startva += PAGE_SIZE(1 << 12)) {
1616 if (!pmap_valid_entry(*pte)((*pte) & 0x0000000000000001UL))
1617 continue; /* VA not mapped */
1618 if ((flags & PMAP_REMOVE_SKIPWIRED1) && (*pte & PG_W0x0000000000000200UL)) {
1619 continue;
1620 }
1621
1622 /* atomically save the old PTE and zap! it */
1623 opte = pmap_pte_set(pte, 0)_atomic_swap_64((pte), (0));
1624
1625 if (opte & PG_W0x0000000000000200UL)
1626 pmap->pm_stats.wired_count--;
1627 pmap->pm_stats.resident_count--;
1628
1629 if (ptp != NULL((void *)0))
1630 ptp->wire_count--; /* dropping a PTE */
1631
1632 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME0x000ffffffffff000UL);
1633
1634 /*
1635 * if we are not on a pv list we are done.
1636 */
1637
1638 if ((opte & PG_PVLIST0x0000000000000400UL) == 0) {
1639#ifdef DIAGNOSTIC1
1640 if (pg != NULL((void *)0))
1641 panic("%s: managed page without PG_PVLIST: "
1642 "va 0x%lx, opte 0x%llx", __func__,
1643 startva, opte);
1644#endif
1645 continue;
1646 }
1647
1648#ifdef DIAGNOSTIC1
1649 if (pg == NULL((void *)0))
1650 panic("%s: unmanaged page marked PG_PVLIST: "
1651 "va 0x%lx, opte 0x%llx", __func__,
1652 startva, opte);
1653#endif
1654
1655 /* sync R/M bits */
1656 pmap_sync_flags_pte(pg, opte);
1657 pve = pmap_remove_pv(pg, pmap, startva);
1658 if (pve != NULL((void *)0)) {
1659 pve->pv_next = *free_pvs;
1660 *free_pvs = pve;
1661 }
1662
1663 /* end of "for" loop: time for next pte */
1664 }
1665}
1666
1667/*
1668 * pmap_remove_pte: remove a single PTE from a PTP
1669 *
1670 * => must have proper locking on pmap_master_lock
1671 * => PTP must be mapped into KVA
1672 * => PTP should be null if pmap == pmap_kernel()
1673 * => returns true if we removed a mapping
1674 */
1675
1676int
1677pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
1678 vaddr_t va, int flags, struct pv_entry **free_pvs)
1679{
1680 struct pv_entry *pve;
1681 struct vm_page *pg;
1682 pt_entry_t opte;
1683
1684 if (!pmap_valid_entry(*pte)((*pte) & 0x0000000000000001UL))
1685 return 0; /* VA not mapped */
1686 if ((flags & PMAP_REMOVE_SKIPWIRED1) && (*pte & PG_W0x0000000000000200UL)) {
1687 return 0;
1688 }
1689
1690 /* atomically save the old PTE and zap! it */
1691 opte = pmap_pte_set(pte, 0)_atomic_swap_64((pte), (0));
1692
1693 if (opte & PG_W0x0000000000000200UL)
1694 pmap->pm_stats.wired_count--;
1695 pmap->pm_stats.resident_count--;
1696
1697 if (ptp != NULL((void *)0))
1698 ptp->wire_count--; /* dropping a PTE */
1699
1700 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME0x000ffffffffff000UL);
1701
1702 /*
1703 * if we are not on a pv list we are done.
1704 */
1705 if ((opte & PG_PVLIST0x0000000000000400UL) == 0) {
1706#ifdef DIAGNOSTIC1
1707 if (pg != NULL((void *)0))
1708 panic("%s: managed page without PG_PVLIST: "
1709 "va 0x%lx, opte 0x%llx", __func__, va, opte);
1710#endif
1711 return 1;
1712 }
1713
1714#ifdef DIAGNOSTIC1
1715 if (pg == NULL((void *)0))
1716 panic("%s: unmanaged page marked PG_PVLIST: "
1717 "va 0x%lx, opte 0x%llx", __func__, va, opte);
1718#endif
1719
1720 /* sync R/M bits */
1721 pmap_sync_flags_pte(pg, opte);
1722 pve = pmap_remove_pv(pg, pmap, va);
1723 if (pve != NULL((void *)0)) {
1724 pve->pv_next = *free_pvs;
1725 *free_pvs = pve;
1726 }
1727
1728 return 1;
1729}
1730
1731/*
1732 * pmap_remove: top level mapping removal function
1733 *
1734 * => caller should not be holding any pmap locks
1735 */
1736
1737void
1738pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
1739{
1740 if (pmap->pm_type == PMAP_TYPE_EPT2)
1741 pmap_remove_ept(pmap, sva, eva);
1742 else
1743 pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL0);
1744}
1745
1746/*
1747 * pmap_do_remove: mapping removal guts
1748 *
1749 * => caller should not be holding any pmap locks
1750 */
1751
1752void
1753pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
1754{
1755 pd_entry_t pde;
1756 int result;
1757 paddr_t ptppa;
1758 vaddr_t blkendva;
1759 struct vm_page *ptp;
1760 struct pv_entry *pve;
1761 struct pv_entry *free_pvs = NULL((void *)0);
1762 vaddr_t va;
1763 int shootall = 0, shootself;
1764 struct pg_to_free empty_ptps;
1765 paddr_t scr3;
1766
1767 TAILQ_INIT(&empty_ptps)do { (&empty_ptps)->tqh_first = ((void *)0); (&empty_ptps
)->tqh_last = &(&empty_ptps)->tqh_first; } while
(0)
;
1768
1769 scr3 = pmap_map_ptes(pmap);
1770 shootself = (scr3 == 0);
1771
1772 /*
1773 * removing one page? take shortcut function.
1774 */
1775
1776 if (sva + PAGE_SIZE(1 << 12) == eva) {
1777 if (pmap_pdes_valid(sva, &pde)) {
1778
1779 /* PA of the PTP */
1780 ptppa = pde & PG_FRAME0x000ffffffffff000UL;
1781
1782 /* get PTP if non-kernel mapping */
1783
1784 if (pmap == pmap_kernel()(&kernel_pmap_store)) {
1785 /* we never free kernel PTPs */
1786 ptp = NULL((void *)0);
1787 } else {
1788 ptp = pmap_find_ptp(pmap, sva, ptppa, 1);
1789#ifdef DIAGNOSTIC1
1790 if (ptp == NULL((void *)0))
1791 panic("%s: unmanaged PTP detected",
1792 __func__);
1793#endif
1794 }
1795
1796 /* do it! */
1797 result = pmap_remove_pte(pmap, ptp,
1798 &PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(sva)(((((sva) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
], sva, flags, &free_pvs);
1799
1800 /*
1801 * if mapping removed and the PTP is no longer
1802 * being used, free it!
1803 */
1804
1805 if (result && ptp && ptp->wire_count <= 1)
1806 pmap_free_ptp(pmap, ptp, sva, &empty_ptps);
1807 pmap_tlb_shootpage(pmap, sva, shootself);
1808 pmap_unmap_ptes(pmap, scr3);
1809 pmap_tlb_shootwait();
1810 } else {
1811 pmap_unmap_ptes(pmap, scr3);
1812 }
1813
1814 goto cleanup;
1815 }
1816
1817 if ((eva - sva > 32 * PAGE_SIZE(1 << 12)) && sva < VM_MIN_KERNEL_ADDRESS0xffff800000000000)
1818 shootall = 1;
1819
1820 for (va = sva; va < eva; va = blkendva) {
1821 /* determine range of block */
1822 blkendva = x86_round_pdr(va + 1)((((unsigned long)(va + 1)) + ((1ULL << 21) - 1)) &
~((1ULL << 21) - 1))
;
1823 if (blkendva > eva)
1824 blkendva = eva;
1825
1826 /*
1827 * XXXCDC: our PTE mappings should never be removed
1828 * with pmap_remove! if we allow this (and why would
1829 * we?) then we end up freeing the pmap's page
1830 * directory page (PDP) before we are finished using
1831 * it when we hit in in the recursive mapping. this
1832 * is BAD.
1833 *
1834 * long term solution is to move the PTEs out of user
1835 * address space. and into kernel address space (up
1836 * with APTE). then we can set VM_MAXUSER_ADDRESS to
1837 * be VM_MAX_ADDRESS.
1838 */
1839
1840 if (pl_i(va, PTP_LEVELS)(((((va) & ~0xffff000000000000)) & ptp_masks[(4)-1]) >>
ptp_shifts[(4)-1])
== PDIR_SLOT_PTE255)
1841 /* XXXCDC: ugly hack to avoid freeing PDP here */
1842 continue;
1843
1844 if (!pmap_pdes_valid(va, &pde))
1845 continue;
1846
1847 /* PA of the PTP */
1848 ptppa = pde & PG_FRAME0x000ffffffffff000UL;
1849
1850 /* get PTP if non-kernel mapping */
1851 if (pmap == pmap_kernel()(&kernel_pmap_store)) {
1852 /* we never free kernel PTPs */
1853 ptp = NULL((void *)0);
1854 } else {
1855 ptp = pmap_find_ptp(pmap, va, ptppa, 1);
1856#ifdef DIAGNOSTIC1
1857 if (ptp == NULL((void *)0))
1858 panic("%s: unmanaged PTP detected", __func__);
1859#endif
1860 }
1861 pmap_remove_ptes(pmap, ptp, (vaddr_t)&PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(va)(((((va) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
],
1862 va, blkendva, flags, &free_pvs);
1863
1864 /* if PTP is no longer being used, free it! */
1865 if (ptp && ptp->wire_count <= 1) {
1866 pmap_free_ptp(pmap, ptp, va, &empty_ptps);
1867 }
1868 }
1869
1870 if (shootall)
1871 pmap_tlb_shoottlb(pmap, shootself);
1872 else
1873 pmap_tlb_shootrange(pmap, sva, eva, shootself);
1874
1875 pmap_unmap_ptes(pmap, scr3);
1876 pmap_tlb_shootwait();
1877
1878cleanup:
1879 while ((pve = free_pvs) != NULL((void *)0)) {
1880 free_pvs = pve->pv_next;
1881 pool_put(&pmap_pv_pool, pve);
1882 }
1883
1884 while ((ptp = TAILQ_FIRST(&empty_ptps)((&empty_ptps)->tqh_first)) != NULL((void *)0)) {
1885 TAILQ_REMOVE(&empty_ptps, ptp, pageq)do { if (((ptp)->pageq.tqe_next) != ((void *)0)) (ptp)->
pageq.tqe_next->pageq.tqe_prev = (ptp)->pageq.tqe_prev;
else (&empty_ptps)->tqh_last = (ptp)->pageq.tqe_prev
; *(ptp)->pageq.tqe_prev = (ptp)->pageq.tqe_next; ((ptp
)->pageq.tqe_prev) = ((void *)-1); ((ptp)->pageq.tqe_next
) = ((void *)-1); } while (0)
;
1886 uvm_pagefree(ptp);
1887 }
1888}
1889
1890/*
1891 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
1892 *
1893 * => R/M bits are sync'd back to attrs
1894 */
1895
1896void
1897pmap_page_remove(struct vm_page *pg)
1898{
1899 struct pv_entry *pve;
1900 struct pmap *pm;
1901 pt_entry_t opte;
1902#ifdef DIAGNOSTIC1
1903 pd_entry_t pde;
1904#endif
1905 struct pg_to_free empty_ptps;
1906 struct vm_page *ptp;
1907 paddr_t scr3;
1908 int shootself;
1909
1910 TAILQ_INIT(&empty_ptps)do { (&empty_ptps)->tqh_first = ((void *)0); (&empty_ptps
)->tqh_last = &(&empty_ptps)->tqh_first; } while
(0)
;
1911
1912 mtx_enter(&pg->mdpage.pv_mtx);
1913 while ((pve = pg->mdpage.pv_list) != NULL((void *)0)) {
1914 pmap_reference(pve->pv_pmap);
1915 pm = pve->pv_pmap;
1916 mtx_leave(&pg->mdpage.pv_mtx);
1917
1918 /* XXX use direct map? */
1919 scr3 = pmap_map_ptes(pm); /* locks pmap */
1920 shootself = (scr3 == 0);
1921
1922 /*
1923 * We dropped the pvlist lock before grabbing the pmap
1924 * lock to avoid lock ordering problems. This means
1925 * we have to check the pvlist again since somebody
1926 * else might have modified it. All we care about is
1927 * that the pvlist entry matches the pmap we just
1928 * locked. If it doesn't, unlock the pmap and try
1929 * again.
1930 */
1931 mtx_enter(&pg->mdpage.pv_mtx);
1932 if ((pve = pg->mdpage.pv_list) == NULL((void *)0) ||
1933 pve->pv_pmap != pm) {
1934 mtx_leave(&pg->mdpage.pv_mtx);
1935 pmap_unmap_ptes(pm, scr3); /* unlocks pmap */
1936 pmap_destroy(pm);
1937 mtx_enter(&pg->mdpage.pv_mtx);
1938 continue;
1939 }
1940
1941 pg->mdpage.pv_list = pve->pv_next;
1942 mtx_leave(&pg->mdpage.pv_mtx);
1943
1944#ifdef DIAGNOSTIC1
1945 if (pve->pv_ptp != NULL((void *)0) && pmap_pdes_valid(pve->pv_va, &pde) &&
1946 (pde & PG_FRAME0x000ffffffffff000UL) != VM_PAGE_TO_PHYS(pve->pv_ptp)((pve->pv_ptp)->phys_addr)) {
1947 printf("%s: pg=%p: va=%lx, pv_ptp=%p\n", __func__,
1948 pg, pve->pv_va, pve->pv_ptp);
1949 printf("%s: PTP's phys addr: "
1950 "actual=%lx, recorded=%lx\n", __func__,
1951 (unsigned long)(pde & PG_FRAME0x000ffffffffff000UL),
1952 VM_PAGE_TO_PHYS(pve->pv_ptp)((pve->pv_ptp)->phys_addr));
1953 panic("%s: mapped managed page has "
1954 "invalid pv_ptp field", __func__);
1955 }
1956#endif
1957
1958 /* atomically save the old PTE and zap it */
1959 opte = pmap_pte_set(&PTE_BASE[pl1_i(pve->pv_va)], 0)_atomic_swap_64((&((pt_entry_t *) (255 * (1ULL << 39
)))[(((((pve->pv_va) & ~0xffff000000000000)) & (((
0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL
)|0x00000000001ff000UL)) >> 12)]), (0))
;
1960
1961 if (opte & PG_W0x0000000000000200UL)
1962 pve->pv_pmap->pm_stats.wired_count--;
1963 pve->pv_pmap->pm_stats.resident_count--;
1964
1965 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va, shootself);
1966
1967 pmap_sync_flags_pte(pg, opte);
1968
1969 /* update the PTP reference count. free if last reference. */
1970 if (pve->pv_ptp != NULL((void *)0)) {
1971 pve->pv_ptp->wire_count--;
1972 if (pve->pv_ptp->wire_count <= 1) {
1973 pmap_free_ptp(pve->pv_pmap, pve->pv_ptp,
1974 pve->pv_va, &empty_ptps);
1975 }
1976 }
1977 pmap_unmap_ptes(pve->pv_pmap, scr3); /* unlocks pmap */
1978 pmap_destroy(pve->pv_pmap);
1979 pool_put(&pmap_pv_pool, pve);
1980 mtx_enter(&pg->mdpage.pv_mtx);
1981 }
1982 mtx_leave(&pg->mdpage.pv_mtx);
1983
1984 pmap_tlb_shootwait();
1985
1986 while ((ptp = TAILQ_FIRST(&empty_ptps)((&empty_ptps)->tqh_first)) != NULL((void *)0)) {
1987 TAILQ_REMOVE(&empty_ptps, ptp, pageq)do { if (((ptp)->pageq.tqe_next) != ((void *)0)) (ptp)->
pageq.tqe_next->pageq.tqe_prev = (ptp)->pageq.tqe_prev;
else (&empty_ptps)->tqh_last = (ptp)->pageq.tqe_prev
; *(ptp)->pageq.tqe_prev = (ptp)->pageq.tqe_next; ((ptp
)->pageq.tqe_prev) = ((void *)-1); ((ptp)->pageq.tqe_next
) = ((void *)-1); } while (0)
;
1988 uvm_pagefree(ptp);
1989 }
1990}
1991
1992/*
1993 * p m a p a t t r i b u t e f u n c t i o n s
1994 * functions that test/change managed page's attributes
1995 * since a page can be mapped multiple times we must check each PTE that
1996 * maps it by going down the pv lists.
1997 */
1998
1999/*
2000 * pmap_test_attrs: test a page's attributes
2001 */
2002
2003int
2004pmap_test_attrs(struct vm_page *pg, unsigned int testbits)
2005{
2006 struct pv_entry *pve;
2007 pt_entry_t *ptes;
2008 int level, offs;
2009 u_long mybits, testflags;
2010
2011 testflags = pmap_pte2flags(testbits);
2012
2013 if (pg->pg_flags & testflags)
2014 return 1;
2015
2016 mybits = 0;
2017 mtx_enter(&pg->mdpage.pv_mtx);
2018 for (pve = pg->mdpage.pv_list; pve != NULL((void *)0) && mybits == 0;
2019 pve = pve->pv_next) {
2020 level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
2021 &offs);
2022 mybits |= (ptes[offs] & testbits);
2023 }
2024 mtx_leave(&pg->mdpage.pv_mtx);
2025
2026 if (mybits == 0)
2027 return 0;
2028
2029 atomic_setbits_intx86_atomic_setbits_u32(&pg->pg_flags, pmap_pte2flags(mybits));
2030
2031 return 1;
2032}
2033
2034/*
2035 * pmap_clear_attrs: change a page's attributes
2036 *
2037 * => we return 1 if we cleared one of the bits we were asked to
2038 */
2039
2040int
2041pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits)
2042{
2043 struct pv_entry *pve;
2044 pt_entry_t *ptes, opte;
2045 u_long clearflags;
2046 int result, level, offs;
2047
2048 clearflags = pmap_pte2flags(clearbits);
2049
2050 result = pg->pg_flags & clearflags;
2051 if (result)
2052 atomic_clearbits_intx86_atomic_clearbits_u32(&pg->pg_flags, clearflags);
2053
2054 mtx_enter(&pg->mdpage.pv_mtx);
2055 for (pve = pg->mdpage.pv_list; pve != NULL((void *)0); pve = pve->pv_next) {
2056 level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
Value stored to 'level' is never read
2057 &offs);
2058 opte = ptes[offs];
2059 if (opte & clearbits) {
2060 result = 1;
2061 pmap_pte_clearbits(&ptes[offs], (opte & clearbits))x86_atomic_clearbits_u64(&ptes[offs], (opte & clearbits
))
;
2062 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va,
2063 pmap_is_curpmap(pve->pv_pmap));
2064 }
2065 }
2066 mtx_leave(&pg->mdpage.pv_mtx);
2067
2068 pmap_tlb_shootwait();
2069
2070 return (result != 0);
2071}
2072
2073/*
2074 * p m a p p r o t e c t i o n f u n c t i o n s
2075 */
2076
2077/*
2078 * pmap_page_protect: change the protection of all recorded mappings
2079 * of a managed page
2080 *
2081 * => NOTE: this is an inline function in pmap.h
2082 */
2083
2084/* see pmap.h */
2085
2086/*
2087 * pmap_protect: set the protection in of the pages in a pmap
2088 *
2089 * => NOTE: this is an inline function in pmap.h
2090 */
2091
2092/* see pmap.h */
2093
2094/*
2095 * pmap_write_protect: write-protect pages in a pmap
2096 */
2097
2098void
2099pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2100{
2101 pt_entry_t nx, *spte, *epte;
2102 vaddr_t blockend;
2103 int shootall = 0, shootself;
2104 vaddr_t va;
2105 paddr_t scr3;
2106
2107 scr3 = pmap_map_ptes(pmap);
2108 shootself = (scr3 == 0);
2109
2110 /* should be ok, but just in case ... */
2111 sva &= PG_FRAME0x000ffffffffff000UL;
2112 eva &= PG_FRAME0x000ffffffffff000UL;
2113
2114 nx = 0;
2115 if (!(prot & PROT_EXEC0x04))
2116 nx = pg_nx;
2117
2118 if ((eva - sva > 32 * PAGE_SIZE(1 << 12)) && sva < VM_MIN_KERNEL_ADDRESS0xffff800000000000)
2119 shootall = 1;
2120
2121 for (va = sva; va < eva ; va = blockend) {
2122 blockend = (va & L2_FRAME((0x0000ff8000000000UL|0x0000007fc0000000UL)|0x000000003fe00000UL
)
) + NBPD_L2(1ULL << 21);
2123 if (blockend > eva)
2124 blockend = eva;
2125
2126 /*
2127 * XXXCDC: our PTE mappings should never be write-protected!
2128 *
2129 * long term solution is to move the PTEs out of user
2130 * address space. and into kernel address space (up
2131 * with APTE). then we can set VM_MAXUSER_ADDRESS to
2132 * be VM_MAX_ADDRESS.
2133 */
2134
2135 /* XXXCDC: ugly hack to avoid freeing PDP here */
2136 if (pl_i(va, PTP_LEVELS)(((((va) & ~0xffff000000000000)) & ptp_masks[(4)-1]) >>
ptp_shifts[(4)-1])
== PDIR_SLOT_PTE255)
2137 continue;
2138
2139 /* empty block? */
2140 if (!pmap_pdes_valid(va, NULL((void *)0)))
2141 continue;
2142
2143#ifdef DIAGNOSTIC1
2144 if (va >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 && va < VM_MAX_ADDRESS0x00007fbfdfeff000)
2145 panic("%s: PTE space", __func__);
2146#endif
2147
2148 spte = &PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(va)(((((va) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
];
2149 epte = &PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(blockend)(((((blockend) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
];
2150
2151 for (/*null */; spte < epte ; spte++) {
2152 if (!pmap_valid_entry(*spte)((*spte) & 0x0000000000000001UL))
2153 continue;
2154 pmap_pte_clearbits(spte, PG_RW)x86_atomic_clearbits_u64(spte, 0x0000000000000002UL);
2155 pmap_pte_setbits(spte, nx)x86_atomic_setbits_u64(spte, nx);
2156 }
2157 }
2158
2159 if (shootall)
2160 pmap_tlb_shoottlb(pmap, shootself);
2161 else
2162 pmap_tlb_shootrange(pmap, sva, eva, shootself);
2163
2164 pmap_unmap_ptes(pmap, scr3);
2165 pmap_tlb_shootwait();
2166}
2167
2168/*
2169 * end of protection functions
2170 */
2171
2172/*
2173 * pmap_unwire: clear the wired bit in the PTE
2174 *
2175 * => mapping should already be in map
2176 */
2177
2178void
2179pmap_unwire(struct pmap *pmap, vaddr_t va)
2180{
2181 pt_entry_t *ptes;
2182 int level, offs;
2183
2184 level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
2185
2186 if (level == 0) {
2187
2188#ifdef DIAGNOSTIC1
2189 if (!pmap_valid_entry(ptes[offs])((ptes[offs]) & 0x0000000000000001UL))
2190 panic("%s: invalid (unmapped) va 0x%lx", __func__, va);
2191#endif
2192 if (__predict_true((ptes[offs] & PG_W) != 0)__builtin_expect((((ptes[offs] & 0x0000000000000200UL) !=
0) != 0), 1)
) {
2193 pmap_pte_clearbits(&ptes[offs], PG_W)x86_atomic_clearbits_u64(&ptes[offs], 0x0000000000000200UL
)
;
2194 pmap->pm_stats.wired_count--;
2195 }
2196#ifdef DIAGNOSTIC1
2197 else {
2198 printf("%s: wiring for pmap %p va 0x%lx "
2199 "didn't change!\n", __func__, pmap, va);
2200 }
2201#endif
2202 }
2203#ifdef DIAGNOSTIC1
2204 else {
2205 panic("%s: invalid PDE", __func__);
2206 }
2207#endif
2208}
2209
2210/*
2211 * pmap_collect: free resources held by a pmap
2212 *
2213 * => optional function.
2214 * => called when a process is swapped out to free memory.
2215 */
2216
2217void
2218pmap_collect(struct pmap *pmap)
2219{
2220 /*
2221 * free all of the pt pages by removing the physical mappings
2222 * for its entire address space.
2223 */
2224
2225/* pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
2226 PMAP_REMOVE_SKIPWIRED);
2227*/
2228}
2229
2230/*
2231 * pmap_copy: copy mappings from one pmap to another
2232 *
2233 * => optional function
2234 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2235 */
2236
2237/*
2238 * defined as macro in pmap.h
2239 */
2240
2241void
2242pmap_enter_special(vaddr_t va, paddr_t pa, vm_prot_t prot)
2243{
2244 uint64_t l4idx, l3idx, l2idx, l1idx;
2245 pd_entry_t *pd, *ptp;
2246 paddr_t npa;
2247 struct pmap *pmap = pmap_kernel()(&kernel_pmap_store);
2248 pt_entry_t *ptes;
2249 int level, offs;
2250
2251 /* If CPU is secure, no need to do anything */
2252 if (!cpu_meltdown)
2253 return;
2254
2255 /* Must be kernel VA */
2256 if (va < VM_MIN_KERNEL_ADDRESS0xffff800000000000)
2257 panic("%s: invalid special mapping va 0x%lx requested",
2258 __func__, va);
2259
2260 if (pmap->pm_pdir_intel == NULL((void *)0))
2261 pmap->pm_pdir_intel = pool_get(&pmap_pdp_pool,
2262 PR_WAITOK0x0001 | PR_ZERO0x0008);
2263
2264 l4idx = (va & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */
2265 l3idx = (va & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */
2266 l2idx = (va & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */
2267 l1idx = (va & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */
2268
2269 DPRINTF("%s: va=0x%llx pa=0x%llx l4idx=%lld l3idx=%lld "
2270 "l2idx=%lld l1idx=%lld\n", __func__, (uint64_t)va,
2271 (uint64_t)pa, l4idx, l3idx, l2idx, l1idx);
2272
2273 /* Start at PML4 / top level */
2274 pd = pmap->pm_pdir_intel;
2275
2276 if (pd == NULL((void *)0))
2277 panic("%s: PML4 not initialized for pmap @ %p", __func__,
2278 pmap);
2279
2280 /* npa = physaddr of PDPT */
2281 npa = pd[l4idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2282
2283 /* Valid PML4e for the 512GB region containing va? */
2284 if (!npa) {
2285 /* No valid PML4E - allocate PDPT page and set PML4E */
2286
2287 ptp = pool_get(&pmap_pdp_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2288
2289 if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
2290 panic("%s: can't locate PDPT page", __func__);
2291
2292 pd[l4idx] = (npa | PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL);
2293
2294 DPRINTF("%s: allocated new PDPT page at phys 0x%llx, "
2295 "setting PML4e[%lld] = 0x%llx\n", __func__,
2296 (uint64_t)npa, l4idx, pd[l4idx]);
2297 }
2298
2299 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2300 if (pd == NULL((void *)0))
2301 panic("%s: can't locate PDPT @ pa=0x%llx", __func__,
2302 (uint64_t)npa);
2303
2304 /* npa = physaddr of PD page */
2305 npa = pd[l3idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2306
2307 /* Valid PDPTe for the 1GB region containing va? */
2308 if (!npa) {
2309 /* No valid PDPTe - allocate PD page and set PDPTe */
2310
2311 ptp = pool_get(&pmap_pdp_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2312
2313 if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
2314 panic("%s: can't locate PD page", __func__);
2315
2316 pd[l3idx] = (npa | PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL);
2317
2318 DPRINTF("%s: allocated new PD page at phys 0x%llx, "
2319 "setting PDPTe[%lld] = 0x%llx\n", __func__,
2320 (uint64_t)npa, l3idx, pd[l3idx]);
2321 }
2322
2323 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2324 if (pd == NULL((void *)0))
2325 panic("%s: can't locate PD page @ pa=0x%llx", __func__,
2326 (uint64_t)npa);
2327
2328 /* npa = physaddr of PT page */
2329 npa = pd[l2idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2330
2331 /* Valid PDE for the 2MB region containing va? */
2332 if (!npa) {
2333 /* No valid PDE - allocate PT page and set PDE */
2334
2335 ptp = pool_get(&pmap_pdp_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2336
2337 if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
2338 panic("%s: can't locate PT page", __func__);
2339
2340 pd[l2idx] = (npa | PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL);
2341
2342 DPRINTF("%s: allocated new PT page at phys 0x%llx, "
2343 "setting PDE[%lld] = 0x%llx\n", __func__,
2344 (uint64_t)npa, l2idx, pd[l2idx]);
2345 }
2346
2347 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2348 if (pd == NULL((void *)0))
2349 panic("%s: can't locate PT page @ pa=0x%llx", __func__,
2350 (uint64_t)npa);
2351
2352 DPRINTF("%s: setting PTE, PT page @ phys 0x%llx virt 0x%llx prot "
2353 "0x%llx was 0x%llx\n", __func__, (uint64_t)npa, (uint64_t)pd,
2354 (uint64_t)prot, (uint64_t)pd[l1idx]);
2355
2356 pd[l1idx] = pa | protection_codes[prot] | PG_V0x0000000000000001UL | PG_W0x0000000000000200UL;
2357
2358 /*
2359 * Look up the corresponding U+K entry. If we're installing the
2360 * same PA into the U-K map then set the PG_G bit on both and copy
2361 * the cache-control bits from the U+K entry to the U-K entry.
2362 */
2363 level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
2364 if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))__builtin_expect(((level == 0 && ((ptes[offs]) & 0x0000000000000001UL
)) != 0), 1)
) {
2365 if (((pd[l1idx] ^ ptes[offs]) & PG_FRAME0x000ffffffffff000UL) == 0) {
2366 pd[l1idx] |= PG_G0x0000000000000100UL | (ptes[offs] & (PG_N0x0000000000000010UL | PG_WT0x0000000000000008UL));
2367 ptes[offs] |= PG_G0x0000000000000100UL;
2368 } else {
2369 DPRINTF("%s: special diffing mapping at %llx\n",
2370 __func__, (long long)va);
2371 }
2372 } else
2373 DPRINTF("%s: no U+K mapping for special mapping?\n", __func__);
2374
2375 DPRINTF("%s: setting PTE[%lld] = 0x%llx\n", __func__, l1idx, pd[l1idx]);
2376}
2377
2378void
2379pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa)
2380{
2381 vaddr_t v;
2382#if NVMM1 > 0
2383 struct vmx_invept_descriptor vid;
2384#endif /* NVMM > 0 */
2385
2386 DPRINTF("%s: sgpa=0x%llx egpa=0x%llx\n", __func__, (uint64_t)sgpa,
2387 (uint64_t)egpa);
2388 for (v = sgpa; v < egpa + PAGE_SIZE(1 << 12); v += PAGE_SIZE(1 << 12))
2389 pmap_do_remove_ept(pmap, v);
2390
2391#if NVMM1 > 0
2392 if (pmap->eptp != 0) {
2393 memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid)));
2394 vid.vid_eptp = pmap->eptp;
2395 DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
2396 vid.vid_eptp);
2397 invept(IA32_VMX_INVEPT_SINGLE_CTX0x1, &vid);
2398 }
2399#endif /* NVMM > 0 */
2400}
2401
2402void
2403pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
2404{
2405 uint64_t l4idx, l3idx, l2idx, l1idx;
2406 struct vm_page *pg3, *pg2, *pg1;
2407 paddr_t npa3, npa2, npa1;
2408 pd_entry_t *pd4, *pd3, *pd2, *pd1;
2409 pd_entry_t *pptes;
2410
2411 l4idx = (gpa & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */
2412 l3idx = (gpa & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */
2413 l2idx = (gpa & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */
2414 l1idx = (gpa & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */
2415
2416 /* Start at PML4 / top level */
2417 pd4 = (pd_entry_t *)pmap->pm_pdir;
2418
2419 if (pd4 == NULL((void *)0))
2420 return;
2421
2422 /* npa3 = physaddr of PDPT */
2423 npa3 = pd4[l4idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2424 if (!npa3)
2425 return;
2426 pd3 = (pd_entry_t *)PMAP_DIRECT_MAP(npa3)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa3))
;
2427 pg3 = PHYS_TO_VM_PAGE(npa3);
2428
2429 /* npa2 = physaddr of PD page */
2430 npa2 = pd3[l3idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2431 if (!npa2)
2432 return;
2433 pd2 = (pd_entry_t *)PMAP_DIRECT_MAP(npa2)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa2))
;
2434 pg2 = PHYS_TO_VM_PAGE(npa2);
2435
2436 /* npa1 = physaddr of PT page */
2437 npa1 = pd2[l2idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2438 if (!npa1)
2439 return;
2440 pd1 = (pd_entry_t *)PMAP_DIRECT_MAP(npa1)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa1))
;
2441 pg1 = PHYS_TO_VM_PAGE(npa1);
2442
2443 if (pd1[l1idx] == 0)
2444 return;
2445
2446 pd1[l1idx] = 0;
2447 pg1->wire_count--;
2448 pmap->pm_stats.resident_count--;
2449
2450 if (pg1->wire_count > 1)
2451 return;
2452
2453 pg1->wire_count = 0;
2454 pptes = (pd_entry_t *)PMAP_DIRECT_MAP(npa2)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa2))
;
2455 pptes[l2idx] = 0;
2456 uvm_pagefree(pg1);
2457 pmap->pm_stats.resident_count--;
2458
2459 pg2->wire_count--;
2460 if (pg2->wire_count > 1)
2461 return;
2462
2463 pg2->wire_count = 0;
2464 pptes = (pd_entry_t *)PMAP_DIRECT_MAP(npa3)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa3))
;
2465 pptes[l3idx] = 0;
2466 uvm_pagefree(pg2);
2467 pmap->pm_stats.resident_count--;
2468
2469 pg3->wire_count--;
2470 if (pg3->wire_count > 1)
2471 return;
2472
2473 pg3->wire_count = 0;
2474 pptes = pd4;
2475 pptes[l4idx] = 0;
2476 uvm_pagefree(pg3);
2477 pmap->pm_stats.resident_count--;
2478}
2479
2480int
2481pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot)
2482{
2483 uint64_t l4idx, l3idx, l2idx, l1idx;
2484 pd_entry_t *pd, npte;
2485 struct vm_page *ptp, *pptp;
2486 paddr_t npa;
2487 struct uvm_object *obj;
2488
2489 if (gpa > MAXDSIZ((paddr_t)32*1024*1024*1024))
2490 return ENOMEM12;
2491
2492 l4idx = (gpa & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */
2493 l3idx = (gpa & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */
2494 l2idx = (gpa & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */
2495 l1idx = (gpa & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */
2496
2497 /* Start at PML4 / top level */
2498 pd = (pd_entry_t *)pmap->pm_pdir;
2499
2500 if (pd == NULL((void *)0))
2501 return ENOMEM12;
2502
2503 /* npa = physaddr of PDPT */
2504 npa = pd[l4idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2505
2506 /* Valid PML4e for the 512GB region containing gpa? */
2507 if (!npa) {
2508 /* No valid PML4e - allocate PDPT page and set PML4e */
2509 obj = &pmap->pm_obj[2]; /* PML4 UVM object */
2510 ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 3)((((((gpa) & ~0xffff000000000000)) & ptp_masks[((3)+1
)-1]) >> ptp_shifts[((3)+1)-1]) * (1 << 12))
, NULL((void *)0),
2511 UVM_PGA_USERESERVE0x0001|UVM_PGA_ZERO0x0002);
2512
2513 if (ptp == NULL((void *)0))
2514 return ENOMEM12;
2515
2516 /*
2517 * New PDPT page - we are setting the first entry, so set
2518 * the wired count to 1
2519 */
2520 ptp->wire_count = 1;
2521
2522 /* Calculate phys address of this new PDPT page */
2523 npa = VM_PAGE_TO_PHYS(ptp)((ptp)->phys_addr);
2524
2525 /*
2526 * Higher levels get full perms; specific permissions are
2527 * entered at the lowest level.
2528 */
2529 pd[l4idx] = (npa | EPT_R(1ULL << 0) | EPT_W(1ULL << 1) | EPT_X(1ULL << 2));
2530
2531 pmap->pm_stats.resident_count++;
2532
2533 pptp = ptp;
2534 } else {
2535 /* Already allocated PML4e */
2536 pptp = PHYS_TO_VM_PAGE(npa);
2537 }
2538
2539 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2540 if (pd == NULL((void *)0))
2541 panic("%s: can't locate PDPT @ pa=0x%llx", __func__,
2542 (uint64_t)npa);
2543
2544 /* npa = physaddr of PD page */
2545 npa = pd[l3idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2546
2547 /* Valid PDPTe for the 1GB region containing gpa? */
2548 if (!npa) {
2549 /* No valid PDPTe - allocate PD page and set PDPTe */
2550 obj = &pmap->pm_obj[1]; /* PDPT UVM object */
2551 ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 2)((((((gpa) & ~0xffff000000000000)) & ptp_masks[((2)+1
)-1]) >> ptp_shifts[((2)+1)-1]) * (1 << 12))
, NULL((void *)0),
2552 UVM_PGA_USERESERVE0x0001|UVM_PGA_ZERO0x0002);
2553
2554 if (ptp == NULL((void *)0))
2555 return ENOMEM12;
2556
2557 /*
2558 * New PD page - we are setting the first entry, so set
2559 * the wired count to 1
2560 */
2561 ptp->wire_count = 1;
2562 pptp->wire_count++;
2563
2564 npa = VM_PAGE_TO_PHYS(ptp)((ptp)->phys_addr);
2565
2566 /*
2567 * Higher levels get full perms; specific permissions are
2568 * entered at the lowest level.
2569 */
2570 pd[l3idx] = (npa | EPT_R(1ULL << 0) | EPT_W(1ULL << 1) | EPT_X(1ULL << 2));
2571
2572 pmap->pm_stats.resident_count++;
2573
2574 pptp = ptp;
2575 } else {
2576 /* Already allocated PDPTe */
2577 pptp = PHYS_TO_VM_PAGE(npa);
2578 }
2579
2580 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2581 if (pd == NULL((void *)0))
2582 panic("%s: can't locate PD page @ pa=0x%llx", __func__,
2583 (uint64_t)npa);
2584
2585 /* npa = physaddr of PT page */
2586 npa = pd[l2idx] & PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2587
2588 /* Valid PDE for the 2MB region containing gpa? */
2589 if (!npa) {
2590 /* No valid PDE - allocate PT page and set PDE */
2591 obj = &pmap->pm_obj[0]; /* PDE UVM object */
2592 ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 1)((((((gpa) & ~0xffff000000000000)) & ptp_masks[((1)+1
)-1]) >> ptp_shifts[((1)+1)-1]) * (1 << 12))
, NULL((void *)0),
2593 UVM_PGA_USERESERVE0x0001|UVM_PGA_ZERO0x0002);
2594
2595 if (ptp == NULL((void *)0))
2596 return ENOMEM12;
2597
2598 pptp->wire_count++;
2599
2600 npa = VM_PAGE_TO_PHYS(ptp)((ptp)->phys_addr);
2601
2602 /*
2603 * Higher level get full perms; specific permissions are
2604 * entered at the lowest level.
2605 */
2606 pd[l2idx] = (npa | EPT_R(1ULL << 0) | EPT_W(1ULL << 1) | EPT_X(1ULL << 2));
2607
2608 pmap->pm_stats.resident_count++;
2609
2610 } else {
2611 /* Find final ptp */
2612 ptp = PHYS_TO_VM_PAGE(npa);
2613 if (ptp == NULL((void *)0))
2614 panic("%s: ptp page vanished?", __func__);
2615 }
2616
2617 pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (npa))
;
2618 if (pd == NULL((void *)0))
2619 panic("%s: can't locate PT page @ pa=0x%llx", __func__,
2620 (uint64_t)npa);
2621
2622 npte = hpa | EPT_WB(6ULL << 3);
2623 if (prot & PROT_READ0x01)
2624 npte |= EPT_R(1ULL << 0);
2625 if (prot & PROT_WRITE0x02)
2626 npte |= EPT_W(1ULL << 1);
2627 if (prot & PROT_EXEC0x04)
2628 npte |= EPT_X(1ULL << 2);
2629
2630 if (pd[l1idx] == 0) {
2631 ptp->wire_count++;
2632 pmap->pm_stats.resident_count++;
2633 } else {
2634 /* XXX flush ept */
2635 }
2636
2637 pd[l1idx] = npte;
2638
2639 return 0;
2640}
2641
2642/*
2643 * pmap_enter: enter a mapping into a pmap
2644 *
2645 * => must be done "now" ... no lazy-evaluation
2646 */
2647
2648int
2649pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
2650{
2651 pt_entry_t opte, npte;
2652 struct vm_page *ptp, *pg = NULL((void *)0);
2653 struct pv_entry *pve, *opve = NULL((void *)0);
2654 int ptpdelta, wireddelta, resdelta;
2655 int wired = (flags & PMAP_WIRED0x00000010) != 0;
2656 int nocache = (pa & PMAP_NOCACHE0x1) != 0;
2657 int wc = (pa & PMAP_WC0x2) != 0;
2658 int error, shootself;
2659 paddr_t scr3;
2660
2661 if (pmap->pm_type == PMAP_TYPE_EPT2)
2662 return pmap_enter_ept(pmap, va, pa, prot);
2663
2664 KASSERT(!(wc && nocache))((!(wc && nocache)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/pmap.c", 2664, "!(wc && nocache)"
))
;
2665 pa &= PMAP_PA_MASK~((paddr_t)((1 << 12) - 1));
2666
2667#ifdef DIAGNOSTIC1
2668 if (va == (vaddr_t) PDP_BASE((pd_entry_t *)((char *)((pd_entry_t *)((char *)((pd_entry_t *
)((char *)((pt_entry_t *) (255 * (1ULL << 39))) + 255 *
(1ULL << 30))) + 255 * (1ULL << 21))) + 255 * (1ULL
<< 12)))
)
2669 panic("%s: trying to map over PDP!", __func__);
2670
2671 /* sanity check: kernel PTPs should already have been pre-allocated */
2672 if (va >= VM_MIN_KERNEL_ADDRESS0xffff800000000000 &&
2673 !pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)])((pmap->pm_pdir[(((((va) & ~0xffff000000000000)) &
ptp_masks[(4)-1]) >> ptp_shifts[(4)-1])]) & 0x0000000000000001UL
)
)
2674 panic("%s: missing kernel PTP for va %lx!", __func__, va);
2675
2676#endif
2677
2678 pve = pool_get(&pmap_pv_pool, PR_NOWAIT0x0002);
2679 if (pve == NULL((void *)0)) {
2680 if (flags & PMAP_CANFAIL0x00000020) {
2681 error = ENOMEM12;
2682 goto out;
2683 }
2684 panic("%s: no pv entries available", __func__);
2685 }
2686
2687 /*
2688 * map in ptes and get a pointer to our PTP (unless we are the kernel)
2689 */
2690
2691 scr3 = pmap_map_ptes(pmap);
2692 shootself = (scr3 == 0);
2693 if (pmap == pmap_kernel()(&kernel_pmap_store)) {
2694 ptp = NULL((void *)0);
2695 } else {
2696 ptp = pmap_get_ptp(pmap, va);
2697 if (ptp == NULL((void *)0)) {
2698 if (flags & PMAP_CANFAIL0x00000020) {
2699 pmap_unmap_ptes(pmap, scr3);
2700 error = ENOMEM12;
2701 goto out;
2702 }
2703 panic("%s: get ptp failed", __func__);
2704 }
2705 }
2706 opte = PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(va)(((((va) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
]; /* old PTE */
2707
2708 /*
2709 * is there currently a valid mapping at our VA?
2710 */
2711
2712 if (pmap_valid_entry(opte)((opte) & 0x0000000000000001UL)) {
2713 /*
2714 * first, calculate pm_stats updates. resident count will not
2715 * change since we are replacing/changing a valid mapping.
2716 * wired count might change...
2717 */
2718
2719 resdelta = 0;
2720 if (wired && (opte & PG_W0x0000000000000200UL) == 0)
2721 wireddelta = 1;
2722 else if (!wired && (opte & PG_W0x0000000000000200UL) != 0)
2723 wireddelta = -1;
2724 else
2725 wireddelta = 0;
2726 ptpdelta = 0;
2727
2728 /*
2729 * is the currently mapped PA the same as the one we
2730 * want to map?
2731 */
2732
2733 if ((opte & PG_FRAME0x000ffffffffff000UL) == pa) {
2734
2735 /* if this is on the PVLIST, sync R/M bit */
2736 if (opte & PG_PVLIST0x0000000000000400UL) {
2737 pg = PHYS_TO_VM_PAGE(pa);
2738#ifdef DIAGNOSTIC1
2739 if (pg == NULL((void *)0))
2740 panic("%s: same pa, PG_PVLIST "
2741 "mapping with unmanaged page: "
2742 "va 0x%lx, opte 0x%llx, pa 0x%lx",
2743 __func__, va, opte, pa);
2744#endif
2745 pmap_sync_flags_pte(pg, opte);
2746 } else {
2747#ifdef DIAGNOSTIC1
2748 if (PHYS_TO_VM_PAGE(pa) != NULL((void *)0))
2749 panic("%s: same pa, no PG_PVLIST "
2750 "mapping with managed page: "
2751 "va 0x%lx, opte 0x%llx, pa 0x%lx",
2752 __func__, va, opte, pa);
2753#endif
2754 }
2755 goto enter_now;
2756 }
2757
2758 /*
2759 * changing PAs: we must remove the old one first
2760 */
2761
2762 /*
2763 * if current mapping is on a pvlist,
2764 * remove it (sync R/M bits)
2765 */
2766
2767 if (opte & PG_PVLIST0x0000000000000400UL) {
2768 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME0x000ffffffffff000UL);
2769#ifdef DIAGNOSTIC1
2770 if (pg == NULL((void *)0))
2771 panic("%s: PG_PVLIST mapping with unmanaged "
2772 "page: va 0x%lx, opte 0x%llx, pa 0x%lx",
2773 __func__, va, opte, pa);
2774#endif
2775 pmap_sync_flags_pte(pg, opte);
2776 opve = pmap_remove_pv(pg, pmap, va);
2777 pg = NULL((void *)0); /* This is not the page we are looking for */
2778 }
2779 } else { /* opte not valid */
2780 resdelta = 1;
2781 if (wired)
2782 wireddelta = 1;
2783 else
2784 wireddelta = 0;
2785 if (ptp != NULL((void *)0))
2786 ptpdelta = 1;
2787 else
2788 ptpdelta = 0;
2789 }
2790
2791 /*
2792 * pve is either NULL or points to a now-free pv_entry structure
2793 * (the latter case is if we called pmap_remove_pv above).
2794 *
2795 * if this entry is to be on a pvlist, enter it now.
2796 */
2797
2798 if (pmap_initialized)
2799 pg = PHYS_TO_VM_PAGE(pa);
2800
2801 if (pg != NULL((void *)0)) {
2802 pmap_enter_pv(pg, pve, pmap, va, ptp);
2803 pve = NULL((void *)0);
2804 }
2805
2806enter_now:
2807 /*
2808 * at this point pg is !NULL if we want the PG_PVLIST bit set
2809 */
2810
2811 pmap->pm_stats.resident_count += resdelta;
2812 pmap->pm_stats.wired_count += wireddelta;
2813 if (ptp != NULL((void *)0))
2814 ptp->wire_count += ptpdelta;
2815
2816 KASSERT(pg == PHYS_TO_VM_PAGE(pa))((pg == PHYS_TO_VM_PAGE(pa)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/pmap.c", 2816, "pg == PHYS_TO_VM_PAGE(pa)"
))
;
2817
2818 npte = pa | protection_codes[prot] | PG_V0x0000000000000001UL;
2819 if (pg != NULL((void *)0)) {
2820 npte |= PG_PVLIST0x0000000000000400UL;
2821 /*
2822 * make sure that if the page is write combined all
2823 * instances of pmap_enter make it so.
2824 */
2825 if (pg->pg_flags & PG_PMAP_WC0x04000000) {
2826 KASSERT(nocache == 0)((nocache == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/pmap.c"
, 2826, "nocache == 0"))
;
2827 wc = 1;
2828 }
2829 }
2830 if (wc)
2831 npte |= pmap_pg_wc;
2832 if (wired)
2833 npte |= PG_W0x0000000000000200UL;
2834 if (nocache)
2835 npte |= PG_N0x0000000000000010UL;
2836 if (va < VM_MAXUSER_ADDRESS0x00007f7fffffc000)
2837 npte |= PG_u0x0000000000000004UL;
2838 else if (va < VM_MAX_ADDRESS0x00007fbfdfeff000)
2839 npte |= (PG_u0x0000000000000004UL | PG_RW0x0000000000000002UL); /* XXXCDC: no longer needed? */
2840 if (pmap == pmap_kernel()(&kernel_pmap_store))
2841 npte |= pg_g_kern;
2842
2843 PTE_BASE((pt_entry_t *) (255 * (1ULL << 39)))[pl1_i(va)(((((va) & ~0xffff000000000000)) & (((0x0000ff8000000000UL
|0x0000007fc0000000UL)|0x000000003fe00000UL)|0x00000000001ff000UL
)) >> 12)
] = npte; /* zap! */
2844
2845 /*
2846 * If we changed anything other than modified/used bits,
2847 * flush the TLB. (is this overkill?)
2848 */
2849 if (pmap_valid_entry(opte)((opte) & 0x0000000000000001UL)) {
2850 if (nocache && (opte & PG_N0x0000000000000010UL) == 0)
2851 wbinvd_on_all_cpus();
2852 pmap_tlb_shootpage(pmap, va, shootself);
2853 }
2854
2855 pmap_unmap_ptes(pmap, scr3);
2856 pmap_tlb_shootwait();
2857
2858 error = 0;
2859
2860out:
2861 if (pve != NULL((void *)0))
2862 pool_put(&pmap_pv_pool, pve);
2863 if (opve != NULL((void *)0))
2864 pool_put(&pmap_pv_pool, opve);
2865
2866 return error;
2867}
2868
2869int
2870pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp)
2871{
2872 struct vm_page *ptp;
2873 struct pmap *kpm = pmap_kernel()(&kernel_pmap_store);
2874
2875 if (uvm.page_init_done == 0) {
2876 vaddr_t va;
2877
2878 /*
2879 * we're growing the kernel pmap early (from
2880 * uvm_pageboot_alloc()). this case must be
2881 * handled a little differently.
2882 */
2883
2884 va = pmap_steal_memory(PAGE_SIZE(1 << 12), NULL((void *)0), NULL((void *)0));
2885 *paddrp = PMAP_DIRECT_UNMAP(va)((paddr_t)(va) - (((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)))
;
2886 } else {
2887 ptp = uvm_pagealloc(&kpm->pm_obj[level - 1],
2888 ptp_va2o(va, level)((((((va) & ~0xffff000000000000)) & ptp_masks[((level
)+1)-1]) >> ptp_shifts[((level)+1)-1]) * (1 << 12
))
, NULL((void *)0),
2889 UVM_PGA_USERESERVE0x0001|UVM_PGA_ZERO0x0002);
2890 if (ptp == NULL((void *)0))
2891 panic("%s: out of memory", __func__);
2892 atomic_clearbits_intx86_atomic_clearbits_u32(&ptp->pg_flags, PG_BUSY0x00000001);
2893 ptp->wire_count = 1;
2894 *paddrp = VM_PAGE_TO_PHYS(ptp)((ptp)->phys_addr);
2895 }
2896 kpm->pm_stats.resident_count++;
2897 return 1;
2898}
2899
2900/*
2901 * Allocate the amount of specified ptps for a ptp level, and populate
2902 * all levels below accordingly, mapping virtual addresses starting at
2903 * kva.
2904 *
2905 * Used by pmap_growkernel.
2906 */
2907void
2908pmap_alloc_level(vaddr_t kva, int lvl, long *needed_ptps)
2909{
2910 unsigned long i;
2911 vaddr_t va;
2912 paddr_t pa;
2913 unsigned long index, endindex;
2914 int level;
2915 pd_entry_t *pdep;
2916
2917 for (level = lvl; level > 1; level--) {
2918 if (level == PTP_LEVELS4)
2919 pdep = pmap_kernel()(&kernel_pmap_store)->pm_pdir;
2920 else
2921 pdep = normal_pdes[level - 2];
2922 va = kva;
2923 index = pl_i(kva, level)(((((kva) & ~0xffff000000000000)) & ptp_masks[(level)
-1]) >> ptp_shifts[(level)-1])
;
2924 endindex = index + needed_ptps[level - 1];
2925 /*
2926 * XXX special case for first time call.
2927 */
2928 if (nkptp[level - 1] != 0)
2929 index++;
2930 else
2931 endindex--;
2932
2933 for (i = index; i <= endindex; i++) {
2934 pmap_get_physpage(va, level - 1, &pa);
2935 pdep[i] = pa | PG_RW0x0000000000000002UL | PG_V0x0000000000000001UL | pg_nx;
2936 nkptp[level - 1]++;
2937 va += nbpd[level - 1];
2938 }
2939 }
2940}
2941
2942/*
2943 * pmap_growkernel: increase usage of KVM space
2944 *
2945 * => we allocate new PTPs for the kernel and install them in all
2946 * the pmaps on the system.
2947 */
2948
2949static vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS0xffff800000000000;
2950
2951vaddr_t
2952pmap_growkernel(vaddr_t maxkvaddr)
2953{
2954 struct pmap *kpm = pmap_kernel()(&kernel_pmap_store), *pm;
2955 int s, i;
2956 unsigned newpdes;
2957 long needed_kptp[PTP_LEVELS4], target_nptp, old;
2958
2959 if (maxkvaddr <= pmap_maxkvaddr)
2960 return pmap_maxkvaddr;
2961
2962 maxkvaddr = x86_round_pdr(maxkvaddr)((((unsigned long)(maxkvaddr)) + ((1ULL << 21) - 1)) &
~((1ULL << 21) - 1))
;
2963 old = nkptp[PTP_LEVELS4 - 1];
2964 /*
2965 * This loop could be optimized more, but pmap_growkernel()
2966 * is called infrequently.
2967 */
2968 for (i = PTP_LEVELS4 - 1; i >= 1; i--) {
2969 target_nptp = pl_i(maxkvaddr, i + 1)(((((maxkvaddr) & ~0xffff000000000000)) & ptp_masks[(
i + 1)-1]) >> ptp_shifts[(i + 1)-1])
-
2970 pl_i(VM_MIN_KERNEL_ADDRESS, i + 1)(((((0xffff800000000000) & ~0xffff000000000000)) & ptp_masks
[(i + 1)-1]) >> ptp_shifts[(i + 1)-1])
;
2971 /*
2972 * XXX only need to check toplevel.
2973 */
2974 if (target_nptp > nkptpmax[i])
2975 panic("%s: out of KVA space", __func__);
2976 needed_kptp[i] = target_nptp - nkptp[i] + 1;
2977 }
2978
2979
2980 s = splhigh()splraise(0xd); /* to be safe */
2981 pmap_alloc_level(pmap_maxkvaddr, PTP_LEVELS4, needed_kptp);
2982
2983 /*
2984 * If the number of top level entries changed, update all
2985 * pmaps.
2986 */
2987 if (needed_kptp[PTP_LEVELS4 - 1] != 0) {
2988 newpdes = nkptp[PTP_LEVELS4 - 1] - old;
2989 mtx_enter(&pmaps_lock);
2990 LIST_FOREACH(pm, &pmaps, pm_list)for((pm) = ((&pmaps)->lh_first); (pm)!= ((void *)0); (
pm) = ((pm)->pm_list.le_next))
{
2991 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],__builtin_memcpy((&pm->pm_pdir[256 + old]), (&kpm->
pm_pdir[256 + old]), (newpdes * sizeof (pd_entry_t)))
2992 &kpm->pm_pdir[PDIR_SLOT_KERN + old],__builtin_memcpy((&pm->pm_pdir[256 + old]), (&kpm->
pm_pdir[256 + old]), (newpdes * sizeof (pd_entry_t)))
2993 newpdes * sizeof (pd_entry_t))__builtin_memcpy((&pm->pm_pdir[256 + old]), (&kpm->
pm_pdir[256 + old]), (newpdes * sizeof (pd_entry_t)))
;
2994 }
2995 mtx_leave(&pmaps_lock);
2996 }
2997 pmap_maxkvaddr = maxkvaddr;
2998 splx(s)spllower(s);
2999
3000 return maxkvaddr;
3001}
3002
3003vaddr_t
3004pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end)
3005{
3006 int segno;
3007 u_int npg;
3008 vaddr_t va;
3009 paddr_t pa;
3010 struct vm_physseg *seg;
3011
3012 size = round_page(size)(((size) + ((1 << 12) - 1)) & ~((1 << 12) - 1
))
;
3013 npg = atop(size)((size) >> 12);
3014
3015 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) {
3016 if (seg->avail_end - seg->avail_start < npg)
3017 continue;
3018 /*
3019 * We can only steal at an ``unused'' segment boundary,
3020 * i.e. either at the start or at the end.
3021 */
3022 if (seg->avail_start == seg->start ||
3023 seg->avail_end == seg->end)
3024 break;
3025 }
3026 if (segno == vm_nphysseg) {
3027 panic("%s: out of memory", __func__);
3028 } else {
3029 if (seg->avail_start == seg->start) {
3030 pa = ptoa(seg->avail_start)((paddr_t)(seg->avail_start) << 12);
3031 seg->avail_start += npg;
3032 seg->start += npg;
3033 } else {
3034 pa = ptoa(seg->avail_end)((paddr_t)(seg->avail_end) << 12) - size;
3035 seg->avail_end -= npg;
3036 seg->end -= npg;
3037 }
3038 /*
3039 * If all the segment has been consumed now, remove it.
3040 * Note that the crash dump code still knows about it
3041 * and will dump it correctly.
3042 */
3043 if (seg->start == seg->end) {
3044 if (vm_nphysseg-- == 1)
3045 panic("%s: out of memory", __func__);
3046 while (segno < vm_nphysseg) {
3047 seg[0] = seg[1]; /* struct copy */
3048 seg++;
3049 segno++;
3050 }
3051 }
3052
3053 va = PMAP_DIRECT_MAP(pa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pa))
;
3054 memset((void *)va, 0, size)__builtin_memset(((void *)va), (0), (size));
3055 }
3056
3057 if (start != NULL((void *)0))
3058 *start = virtual_avail;
3059 if (end != NULL((void *)0))
3060 *end = VM_MAX_KERNEL_ADDRESS0xffff800100000000;
3061
3062 return (va);
3063}
3064
3065void
3066pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
3067{
3068 *vstartp = virtual_avail;
3069 *vendp = VM_MAX_KERNEL_ADDRESS0xffff800100000000;
3070}
3071
3072/*
3073 * pmap_convert
3074 *
3075 * Converts 'pmap' to the new 'mode'.
3076 *
3077 * Parameters:
3078 * pmap: the pmap to convert
3079 * mode: the new mode (see pmap.h, PMAP_TYPE_xxx)
3080 *
3081 * Return value:
3082 * always 0
3083 */
3084int
3085pmap_convert(struct pmap *pmap, int mode)
3086{
3087 pt_entry_t *pte;
3088
3089 pmap->pm_type = mode;
3090
3091 if (mode == PMAP_TYPE_EPT2) {
3092 /* Clear PML4 */
3093 pte = (pt_entry_t *)pmap->pm_pdir;
3094 memset(pte, 0, PAGE_SIZE)__builtin_memset((pte), (0), ((1 << 12)));
3095
3096 /* Give back the meltdown pdir */
3097 if (pmap->pm_pdir_intel != NULL((void *)0)) {
3098 pool_put(&pmap_pdp_pool, pmap->pm_pdir_intel);
3099 pmap->pm_pdir_intel = NULL((void *)0);
3100 }
3101 }
3102
3103 return (0);
3104}
3105
3106#ifdef MULTIPROCESSOR1
3107/*
3108 * Locking for tlb shootdown.
3109 *
3110 * We lock by setting tlb_shoot_wait to the number of cpus that will
3111 * receive our tlb shootdown. After sending the IPIs, we don't need to
3112 * worry about locking order or interrupts spinning for the lock because
3113 * the call that grabs the "lock" isn't the one that releases it. And
3114 * there is nothing that can block the IPI that releases the lock.
3115 *
3116 * The functions are organized so that we first count the number of
3117 * cpus we need to send the IPI to, then we grab the counter, then
3118 * we send the IPIs, then we finally do our own shootdown.
3119 *
3120 * Our shootdown is last to make it parallel with the other cpus
3121 * to shorten the spin time.
3122 *
3123 * Notice that we depend on failures to send IPIs only being able to
3124 * happen during boot. If they happen later, the above assumption
3125 * doesn't hold since we can end up in situations where noone will
3126 * release the lock if we get an interrupt in a bad moment.
3127 */
3128#ifdef MP_LOCKDEBUG
3129#include <ddb/db_output.h>
3130extern int __mp_lock_spinout;
3131#endif
3132
3133volatile long tlb_shoot_wait __attribute__((section(".kudata")));
3134
3135volatile vaddr_t tlb_shoot_addr1 __attribute__((section(".kudata")));
3136volatile vaddr_t tlb_shoot_addr2 __attribute__((section(".kudata")));
3137volatile int tlb_shoot_first_pcid __attribute__((section(".kudata")));
3138
3139
3140/* Obtain the "lock" for TLB shooting */
3141static inline int
3142pmap_start_tlb_shoot(long wait, const char *func)
3143{
3144 int s = splvm()splraise(0xa);
3145
3146 while (atomic_cas_ulong(&tlb_shoot_wait, 0, wait)_atomic_cas_ulong((&tlb_shoot_wait), (0), (wait)) != 0) {
3147#ifdef MP_LOCKDEBUG
3148 int nticks = __mp_lock_spinout;
3149#endif
3150 while (tlb_shoot_wait != 0) {
3151 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
3152#ifdef MP_LOCKDEBUG
3153 if (--nticks <= 0) {
3154 db_printf("%s: spun out", func);
3155 db_enter();
3156 nticks = __mp_lock_spinout;
3157 }
3158#endif
3159 }
3160 }
3161
3162 return s;
3163}
3164
3165void
3166pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
3167{
3168 struct cpu_info *ci, *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
3169 CPU_INFO_ITERATORint cii;
3170 long wait = 0;
3171 u_int64_t mask = 0;
3172 int is_kva = va >= VM_MIN_KERNEL_ADDRESS0xffff800000000000;
3173
3174 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3175 if (ci == self || !(ci->ci_flags & CPUF_RUNNING0x2000))
3176 continue;
3177 if (!is_kva && !pmap_is_active(pm, ci))
3178 continue;
3179 mask |= (1ULL << ci->ci_cpuid);
3180 wait++;
3181 }
3182
3183 if (wait > 0) {
3184 int s = pmap_start_tlb_shoot(wait, __func__);
3185
3186 tlb_shoot_first_pcid = is_kva ? PCID_KERN0 : PCID_PROC1;
3187 tlb_shoot_addr1 = va;
3188 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3189 if ((mask & (1ULL << ci->ci_cpuid)) == 0)
3190 continue;
3191 if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG(0xf0 + 1)) != 0)
3192 panic("%s: ipi failed", __func__);
3193 }
3194 splx(s)spllower(s);
3195 }
3196
3197 if (!pmap_use_pcid) {
3198 if (shootself)
3199 pmap_update_pg(va);
3200 } else if (is_kva) {
3201 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3202 invpcid(INVPCID_ADDR0, PCID_KERN0, va);
3203 } else if (shootself) {
3204 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3205 if (cpu_meltdown)
3206 invpcid(INVPCID_ADDR0, PCID_PROC_INTEL2, va);
3207 }
3208}
3209
3210void
3211pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
3212{
3213 struct cpu_info *ci, *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
3214 CPU_INFO_ITERATORint cii;
3215 long wait = 0;
3216 u_int64_t mask = 0;
3217 int is_kva = sva >= VM_MIN_KERNEL_ADDRESS0xffff800000000000;
3218 vaddr_t va;
3219
3220 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3221 if (ci == self || !(ci->ci_flags & CPUF_RUNNING0x2000))
3222 continue;
3223 if (!is_kva && !pmap_is_active(pm, ci))
3224 continue;
3225 mask |= (1ULL << ci->ci_cpuid);
3226 wait++;
3227 }
3228
3229 if (wait > 0) {
3230 int s = pmap_start_tlb_shoot(wait, __func__);
3231
3232 tlb_shoot_first_pcid = is_kva ? PCID_KERN0 : PCID_PROC1;
3233 tlb_shoot_addr1 = sva;
3234 tlb_shoot_addr2 = eva;
3235 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3236 if ((mask & (1ULL << ci->ci_cpuid)) == 0)
3237 continue;
3238 if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE(0xf0 + 2)) != 0)
3239 panic("%s: ipi failed", __func__);
3240 }
3241 splx(s)spllower(s);
3242 }
3243
3244 if (!pmap_use_pcid) {
3245 if (shootself) {
3246 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12))
3247 pmap_update_pg(va);
3248 }
3249 } else if (is_kva) {
3250 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12)) {
3251 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3252 invpcid(INVPCID_ADDR0, PCID_KERN0, va);
3253 }
3254 } else if (shootself) {
3255 if (cpu_meltdown) {
3256 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12)) {
3257 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3258 invpcid(INVPCID_ADDR0, PCID_PROC_INTEL2, va);
3259 }
3260 } else {
3261 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12))
3262 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3263 }
3264 }
3265}
3266
3267void
3268pmap_tlb_shoottlb(struct pmap *pm, int shootself)
3269{
3270 struct cpu_info *ci, *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
3271 CPU_INFO_ITERATORint cii;
3272 long wait = 0;
3273 u_int64_t mask = 0;
3274
3275 KASSERT(pm != pmap_kernel())((pm != (&kernel_pmap_store)) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/pmap.c", 3275, "pm != pmap_kernel()"
))
;
3276
3277 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3278 if (ci == self || !pmap_is_active(pm, ci) ||
3279 !(ci->ci_flags & CPUF_RUNNING0x2000))
3280 continue;
3281 mask |= (1ULL << ci->ci_cpuid);
3282 wait++;
3283 }
3284
3285 if (wait) {
3286 int s = pmap_start_tlb_shoot(wait, __func__);
3287
3288 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
3289 if ((mask & (1ULL << ci->ci_cpuid)) == 0)
3290 continue;
3291 if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB(0xf0 + 0)) != 0)
3292 panic("%s: ipi failed", __func__);
3293 }
3294 splx(s)spllower(s);
3295 }
3296
3297 if (shootself) {
3298 if (!pmap_use_pcid)
3299 tlbflush();
3300 else {
3301 invpcid(INVPCID_PCID1, PCID_PROC1, 0);
3302 if (cpu_meltdown)
3303 invpcid(INVPCID_PCID1, PCID_PROC_INTEL2, 0);
3304 }
3305 }
3306}
3307
3308void
3309pmap_tlb_shootwait(void)
3310{
3311#ifdef MP_LOCKDEBUG
3312 int nticks = __mp_lock_spinout;
3313#endif
3314 while (tlb_shoot_wait != 0) {
3315 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
3316#ifdef MP_LOCKDEBUG
3317 if (--nticks <= 0) {
3318 db_printf("%s: spun out", __func__);
3319 db_enter();
3320 nticks = __mp_lock_spinout;
3321 }
3322#endif
3323 }
3324}
3325
3326#else /* MULTIPROCESSOR */
3327
3328void
3329pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
3330{
3331 if (!pmap_use_pcid) {
3332 if (shootself)
3333 pmap_update_pg(va);
3334 } else if (va >= VM_MIN_KERNEL_ADDRESS0xffff800000000000) {
3335 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3336 invpcid(INVPCID_ADDR0, PCID_KERN0, va);
3337 } else if (shootself) {
3338 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3339 if (cpu_meltdown)
3340 invpcid(INVPCID_ADDR0, PCID_PROC_INTEL2, va);
3341 }
3342}
3343
3344void
3345pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
3346{
3347 vaddr_t va;
3348
3349 if (!pmap_use_pcid) {
3350 if (shootself) {
3351 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12))
3352 pmap_update_pg(va);
3353 }
3354 } else if (sva >= VM_MIN_KERNEL_ADDRESS0xffff800000000000) {
3355 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12)) {
3356 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3357 invpcid(INVPCID_ADDR0, PCID_KERN0, va);
3358 }
3359 } else if (shootself) {
3360 if (cpu_meltdown) {
3361 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12)) {
3362 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3363 invpcid(INVPCID_ADDR0, PCID_PROC_INTEL2, va);
3364 }
3365 } else {
3366 for (va = sva; va < eva; va += PAGE_SIZE(1 << 12))
3367 invpcid(INVPCID_ADDR0, PCID_PROC1, va);
3368 }
3369 }
3370}
3371
3372void
3373pmap_tlb_shoottlb(struct pmap *pm, int shootself)
3374{
3375 if (shootself) {
3376 if (!pmap_use_pcid)
3377 tlbflush();
3378 else {
3379 invpcid(INVPCID_PCID1, PCID_PROC1, 0);
3380 if (cpu_meltdown)
3381 invpcid(INVPCID_PCID1, PCID_PROC_INTEL2, 0);
3382 }
3383 }
3384}
3385#endif /* MULTIPROCESSOR */