File: | arch/amd64/amd64/machdep.c |
Warning: | line 1616, column 8 Access to field 'avail_end' results in a dereference of a null pointer (loaded from variable 'vps') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: machdep.c,v 1.275 2021/10/06 15:46:03 claudio Exp $ */ | |||
2 | /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ | |||
3 | ||||
4 | /*- | |||
5 | * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc. | |||
6 | * All rights reserved. | |||
7 | * | |||
8 | * This code is derived from software contributed to The NetBSD Foundation | |||
9 | * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace | |||
10 | * Simulation Facility, NASA Ames Research Center. | |||
11 | * | |||
12 | * Redistribution and use in source and binary forms, with or without | |||
13 | * modification, are permitted provided that the following conditions | |||
14 | * are met: | |||
15 | * 1. Redistributions of source code must retain the above copyright | |||
16 | * notice, this list of conditions and the following disclaimer. | |||
17 | * 2. Redistributions in binary form must reproduce the above copyright | |||
18 | * notice, this list of conditions and the following disclaimer in the | |||
19 | * documentation and/or other materials provided with the distribution. | |||
20 | * | |||
21 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
28 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
29 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
31 | * POSSIBILITY OF SUCH DAMAGE. | |||
32 | */ | |||
33 | ||||
34 | /*- | |||
35 | * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. | |||
36 | * All rights reserved. | |||
37 | * | |||
38 | * This code is derived from software contributed to Berkeley by | |||
39 | * William Jolitz. | |||
40 | * | |||
41 | * Redistribution and use in source and binary forms, with or without | |||
42 | * modification, are permitted provided that the following conditions | |||
43 | * are met: | |||
44 | * 1. Redistributions of source code must retain the above copyright | |||
45 | * notice, this list of conditions and the following disclaimer. | |||
46 | * 2. Redistributions in binary form must reproduce the above copyright | |||
47 | * notice, this list of conditions and the following disclaimer in the | |||
48 | * documentation and/or other materials provided with the distribution. | |||
49 | * 3. Neither the name of the University nor the names of its contributors | |||
50 | * may be used to endorse or promote products derived from this software | |||
51 | * without specific prior written permission. | |||
52 | * | |||
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
63 | * SUCH DAMAGE. | |||
64 | * | |||
65 | * @(#)machdep.c 7.4 (Berkeley) 6/3/91 | |||
66 | */ | |||
67 | ||||
68 | #include <sys/param.h> | |||
69 | #include <sys/systm.h> | |||
70 | #include <sys/signal.h> | |||
71 | #include <sys/signalvar.h> | |||
72 | #include <sys/proc.h> | |||
73 | #include <sys/user.h> | |||
74 | #include <sys/exec.h> | |||
75 | #include <sys/buf.h> | |||
76 | #include <sys/reboot.h> | |||
77 | #include <sys/conf.h> | |||
78 | #include <sys/msgbuf.h> | |||
79 | #include <sys/mount.h> | |||
80 | #include <sys/extent.h> | |||
81 | #include <sys/core.h> | |||
82 | #include <sys/kcore.h> | |||
83 | #include <sys/syscallargs.h> | |||
84 | ||||
85 | #include <dev/cons.h> | |||
86 | #include <stand/boot/bootarg.h> | |||
87 | ||||
88 | #include <net/if.h> | |||
89 | #include <uvm/uvm_extern.h> | |||
90 | ||||
91 | #include <sys/sysctl.h> | |||
92 | ||||
93 | #include <machine/cpu_full.h> | |||
94 | #include <machine/cpufunc.h> | |||
95 | #include <machine/pio.h> | |||
96 | #include <machine/psl.h> | |||
97 | #include <machine/reg.h> | |||
98 | #include <machine/fpu.h> | |||
99 | #include <machine/biosvar.h> | |||
100 | #include <machine/mpbiosvar.h> | |||
101 | #include <machine/kcore.h> | |||
102 | #include <machine/tss.h> | |||
103 | ||||
104 | #include <dev/isa/isareg.h> | |||
105 | #include <dev/ic/i8042reg.h> | |||
106 | ||||
107 | #ifdef DDB1 | |||
108 | #include <machine/db_machdep.h> | |||
109 | #include <ddb/db_extern.h> | |||
110 | extern int db_console; | |||
111 | #endif | |||
112 | ||||
113 | #include "isa.h" | |||
114 | #include "isadma.h" | |||
115 | #include "ksyms.h" | |||
116 | ||||
117 | #include "acpi.h" | |||
118 | #if NACPI1 > 0 | |||
119 | #include <dev/acpi/acpivar.h> | |||
120 | #endif | |||
121 | ||||
122 | #include "com.h" | |||
123 | #if NCOM1 > 0 | |||
124 | #include <sys/tty.h> | |||
125 | #include <dev/ic/comvar.h> | |||
126 | #include <dev/ic/comreg.h> | |||
127 | #endif | |||
128 | ||||
129 | #include "softraid.h" | |||
130 | #if NSOFTRAID1 > 0 | |||
131 | #include <dev/softraidvar.h> | |||
132 | #endif | |||
133 | ||||
134 | #ifdef HIBERNATE1 | |||
135 | #include <machine/hibernate_var.h> | |||
136 | #endif /* HIBERNATE */ | |||
137 | ||||
138 | #include "ukbd.h" | |||
139 | #include "pckbc.h" | |||
140 | #if NPCKBC1 > 0 && NUKBD1 > 0 | |||
141 | #include <dev/ic/pckbcvar.h> | |||
142 | #endif | |||
143 | ||||
144 | /* #define MACHDEP_DEBUG */ | |||
145 | ||||
146 | #ifdef MACHDEP_DEBUG | |||
147 | #define DPRINTF(x...) do { printf(x); } while(0) | |||
148 | #else | |||
149 | #define DPRINTF(x...) | |||
150 | #endif /* MACHDEP_DEBUG */ | |||
151 | ||||
152 | /* the following is used externally (sysctl_hw) */ | |||
153 | char machine[] = MACHINE"amd64"; | |||
154 | ||||
155 | /* | |||
156 | * switchto vectors | |||
157 | */ | |||
158 | void (*cpu_idle_cycle_fcn)(void) = NULL((void *)0); | |||
159 | ||||
160 | /* the following is used externally for concurrent handlers */ | |||
161 | int setperf_prio = 0; | |||
162 | ||||
163 | #ifdef CPURESET_DELAY | |||
164 | int cpureset_delay = CPURESET_DELAY; | |||
165 | #else | |||
166 | int cpureset_delay = 0; | |||
167 | #endif | |||
168 | ||||
169 | int physmem; | |||
170 | u_int64_t dumpmem_low; | |||
171 | u_int64_t dumpmem_high; | |||
172 | extern int boothowto; | |||
173 | int cpu_class; | |||
174 | ||||
175 | paddr_t dumpmem_paddr; | |||
176 | vaddr_t dumpmem_vaddr; | |||
177 | psize_t dumpmem_sz; | |||
178 | ||||
179 | vaddr_t kern_end; | |||
180 | ||||
181 | vaddr_t msgbuf_vaddr; | |||
182 | paddr_t msgbuf_paddr; | |||
183 | ||||
184 | vaddr_t idt_vaddr; | |||
185 | paddr_t idt_paddr; | |||
186 | ||||
187 | vaddr_t lo32_vaddr; | |||
188 | paddr_t lo32_paddr; | |||
189 | paddr_t tramp_pdirpa; | |||
190 | ||||
191 | int kbd_reset; | |||
192 | int lid_action = 1; | |||
193 | int pwr_action = 1; | |||
194 | int forceukbd; | |||
195 | ||||
196 | /* | |||
197 | * safepri is a safe priority for sleep to set for a spin-wait | |||
198 | * during autoconfiguration or after a panic. | |||
199 | */ | |||
200 | int safepri = 0; | |||
201 | ||||
202 | struct vm_map *exec_map = NULL((void *)0); | |||
203 | struct vm_map *phys_map = NULL((void *)0); | |||
204 | ||||
205 | /* UVM constraint ranges. */ | |||
206 | struct uvm_constraint_range isa_constraint = { 0x0, 0x00ffffffUL }; | |||
207 | struct uvm_constraint_range dma_constraint = { 0x0, 0xffffffffUL }; | |||
208 | struct uvm_constraint_range *uvm_md_constraints[] = { | |||
209 | &isa_constraint, | |||
210 | &dma_constraint, | |||
211 | NULL((void *)0), | |||
212 | }; | |||
213 | ||||
214 | paddr_t avail_start; | |||
215 | paddr_t avail_end; | |||
216 | ||||
217 | void (*delay_func)(int) = i8254_delay; | |||
218 | void (*initclock_func)(void) = i8254_initclocks; | |||
219 | ||||
220 | /* | |||
221 | * Format of boot information passed to us by 32-bit /boot | |||
222 | */ | |||
223 | typedef struct _boot_args32 { | |||
224 | int ba_type; | |||
225 | int ba_size; | |||
226 | int ba_nextX; /* a ptr in 32-bit world, but not here */ | |||
227 | char ba_arg[1]; | |||
228 | } bootarg32_t; | |||
229 | ||||
230 | #define BOOTARGC_MAX(1 << 12) NBPG(1 << 12) /* one page */ | |||
231 | ||||
232 | bios_bootmac_t *bios_bootmac; | |||
233 | ||||
234 | /* locore copies the arguments from /boot to here for us */ | |||
235 | char bootinfo[BOOTARGC_MAX(1 << 12)]; | |||
236 | int bootinfo_size = BOOTARGC_MAX(1 << 12); | |||
237 | ||||
238 | void getbootinfo(char *, int); | |||
239 | ||||
240 | /* Data passed to us by /boot, filled in by getbootinfo() */ | |||
241 | bios_diskinfo_t *bios_diskinfo; | |||
242 | bios_memmap_t *bios_memmap; | |||
243 | u_int32_t bios_cksumlen; | |||
244 | bios_efiinfo_t *bios_efiinfo; | |||
245 | bios_ucode_t *bios_ucode; | |||
246 | ||||
247 | /* | |||
248 | * Size of memory segments, before any memory is stolen. | |||
249 | */ | |||
250 | phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX16]; | |||
251 | int mem_cluster_cnt; | |||
252 | ||||
253 | int cpu_dump(void); | |||
254 | int cpu_dumpsize(void); | |||
255 | u_long cpu_dump_mempagecnt(void); | |||
256 | void dumpsys(void); | |||
257 | void cpu_init_extents(void); | |||
258 | void map_tramps(void); | |||
259 | void init_x86_64(paddr_t); | |||
260 | void (*cpuresetfn)(void); | |||
261 | void enter_shared_special_pages(void); | |||
262 | ||||
263 | #ifdef APERTURE1 | |||
264 | int allowaperture = 0; | |||
265 | #endif | |||
266 | ||||
267 | /* | |||
268 | * Machine-dependent startup code | |||
269 | */ | |||
270 | void | |||
271 | cpu_startup(void) | |||
272 | { | |||
273 | vaddr_t minaddr, maxaddr; | |||
274 | ||||
275 | msgbuf_vaddr = PMAP_DIRECT_MAP(msgbuf_paddr)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (msgbuf_paddr)); | |||
276 | initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE)((((32 * (1 << 12))) + ((1 << 12) - 1)) & ~(( 1 << 12) - 1))); | |||
277 | ||||
278 | printf("%s", version); | |||
279 | startclocks(); | |||
280 | rtcinit(); | |||
281 | ||||
282 | printf("real mem = %lu (%luMB)\n", ptoa((psize_t)physmem)((paddr_t)((psize_t)physmem) << 12), | |||
283 | ptoa((psize_t)physmem)((paddr_t)((psize_t)physmem) << 12)/1024/1024); | |||
284 | ||||
285 | /* | |||
286 | * Allocate a submap for exec arguments. This map effectively | |||
287 | * limits the number of processes exec'ing at any time. | |||
288 | */ | |||
289 | minaddr = vm_map_min(kernel_map)((kernel_map)->min_offset); | |||
290 | exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | |||
291 | 16*NCARGS(512 * 1024), VM_MAP_PAGEABLE0x01, FALSE0, NULL((void *)0)); | |||
292 | ||||
293 | /* | |||
294 | * Allocate a submap for physio | |||
295 | */ | |||
296 | minaddr = vm_map_min(kernel_map)((kernel_map)->min_offset); | |||
297 | phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | |||
298 | VM_PHYS_SIZE(300*(1 << 12)), 0, FALSE0, NULL((void *)0)); | |||
299 | ||||
300 | printf("avail mem = %lu (%luMB)\n", ptoa((psize_t)uvmexp.free)((paddr_t)((psize_t)uvmexp.free) << 12), | |||
301 | ptoa((psize_t)uvmexp.free)((paddr_t)((psize_t)uvmexp.free) << 12)/1024/1024); | |||
302 | ||||
303 | bufinit(); | |||
304 | ||||
305 | if (boothowto & RB_CONFIG0x00400) { | |||
306 | #ifdef BOOT_CONFIG1 | |||
307 | user_config(); | |||
308 | #else | |||
309 | printf("kernel does not support -c; continuing..\n"); | |||
310 | #endif | |||
311 | } | |||
312 | ||||
313 | /* Safe for i/o port / memory space allocation to use malloc now. */ | |||
314 | x86_bus_space_mallocok(); | |||
315 | ||||
316 | #ifndef SMALL_KERNEL | |||
317 | cpu_ucode_setup(); | |||
318 | cpu_ucode_apply(&cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev)))); | |||
319 | #endif | |||
320 | cpu_tsx_disable(&cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev)))); | |||
321 | ||||
322 | /* enter the IDT and trampoline code in the u-k maps */ | |||
323 | enter_shared_special_pages(); | |||
324 | ||||
325 | /* initialize CPU0's TSS and GDT and put them in the u-k maps */ | |||
326 | cpu_enter_pages(&cpu_info_full_primary); | |||
327 | } | |||
328 | ||||
329 | /* | |||
330 | * enter_shared_special_pages | |||
331 | * | |||
332 | * Requests mapping of various special pages required in the Intel Meltdown | |||
333 | * case (to be entered into the U-K page table): | |||
334 | * | |||
335 | * 1 IDT page | |||
336 | * Various number of pages covering the U-K ".kutext" section. This section | |||
337 | * contains code needed during trampoline operation | |||
338 | * Various number of pages covering the U-K ".kudata" section. This section | |||
339 | * contains data accessed by the trampoline, before switching to U+K | |||
340 | * (for example, various shared global variables used by IPIs, etc) | |||
341 | * | |||
342 | * The linker script places the required symbols in the sections above. | |||
343 | * | |||
344 | * On CPUs not affected by Meltdown, the calls to pmap_enter_special below | |||
345 | * become no-ops. | |||
346 | */ | |||
347 | void | |||
348 | enter_shared_special_pages(void) | |||
349 | { | |||
350 | extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[]; | |||
351 | extern char __text_page_start[], __text_page_end[]; | |||
352 | extern char __kernel_kutext_page_phys[]; | |||
353 | extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[]; | |||
354 | vaddr_t va; | |||
355 | paddr_t pa; | |||
356 | ||||
357 | /* idt */ | |||
358 | pmap_enter_special(idt_vaddr, idt_paddr, PROT_READ0x01); | |||
359 | DPRINTF("%s: entered idt page va 0x%llx pa 0x%llx\n", __func__, | |||
360 | (uint64_t)idt_vaddr, (uint64_t)idt_paddr); | |||
361 | ||||
362 | /* .kutext section */ | |||
363 | va = (vaddr_t)__kutext_start; | |||
364 | pa = (paddr_t)__kernel_kutext_phys; | |||
365 | while (va < (vaddr_t)__kutext_end) { | |||
366 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_EXEC0x04); | |||
367 | DPRINTF("%s: entered kutext page va 0x%llx pa 0x%llx\n", | |||
368 | __func__, (uint64_t)va, (uint64_t)pa); | |||
369 | va += PAGE_SIZE(1 << 12); | |||
370 | pa += PAGE_SIZE(1 << 12); | |||
371 | } | |||
372 | ||||
373 | /* .kutext.page section */ | |||
374 | va = (vaddr_t)__text_page_start; | |||
375 | pa = (paddr_t)__kernel_kutext_page_phys; | |||
376 | while (va < (vaddr_t)__text_page_end) { | |||
377 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_EXEC0x04); | |||
378 | DPRINTF("%s: entered kutext.page va 0x%llx pa 0x%llx\n", | |||
379 | __func__, (uint64_t)va, (uint64_t)pa); | |||
380 | va += PAGE_SIZE(1 << 12); | |||
381 | pa += PAGE_SIZE(1 << 12); | |||
382 | } | |||
383 | ||||
384 | /* .kudata section */ | |||
385 | va = (vaddr_t)__kudata_start; | |||
386 | pa = (paddr_t)__kernel_kudata_phys; | |||
387 | while (va < (vaddr_t)__kudata_end) { | |||
388 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
389 | DPRINTF("%s: entered kudata page va 0x%llx pa 0x%llx\n", | |||
390 | __func__, (uint64_t)va, (uint64_t)pa); | |||
391 | va += PAGE_SIZE(1 << 12); | |||
392 | pa += PAGE_SIZE(1 << 12); | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | /* | |||
397 | * Set up proc0's PCB and the cpu's TSS. | |||
398 | */ | |||
399 | void | |||
400 | x86_64_proc0_tss_ldt_init(void) | |||
401 | { | |||
402 | struct pcb *pcb; | |||
403 | ||||
404 | cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_curpcb = pcb = &proc0.p_addr->u_pcb; | |||
405 | pcb->pcb_fsbase = 0; | |||
406 | pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE(6 * (1 << 12)) - 16; | |||
407 | proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1; | |||
408 | ||||
409 | ltr(GSYSSEL(GPROC0_SEL, SEL_KPL)((((0) << 4) + (6 << 3)) | 0)); | |||
410 | lldt(0); | |||
411 | } | |||
412 | ||||
413 | bios_diskinfo_t * | |||
414 | bios_getdiskinfo(dev_t dev) | |||
415 | { | |||
416 | bios_diskinfo_t *pdi; | |||
417 | ||||
418 | if (bios_diskinfo == NULL((void *)0)) | |||
419 | return NULL((void *)0); | |||
420 | ||||
421 | for (pdi = bios_diskinfo; pdi->bios_number != -1; pdi++) { | |||
422 | if ((dev & B_MAGICMASK0xf0000000) == B_DEVMAGIC0xa0000000) { /* search by bootdev */ | |||
423 | if (pdi->bsd_dev == dev) | |||
424 | break; | |||
425 | } else { | |||
426 | if (pdi->bios_number == dev) | |||
427 | break; | |||
428 | } | |||
429 | } | |||
430 | ||||
431 | if (pdi->bios_number == -1) | |||
432 | return NULL((void *)0); | |||
433 | else | |||
434 | return pdi; | |||
435 | } | |||
436 | ||||
437 | int | |||
438 | bios_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, | |||
439 | size_t newlen, struct proc *p) | |||
440 | { | |||
441 | bios_diskinfo_t *pdi; | |||
442 | extern dev_t bootdev; | |||
443 | int biosdev; | |||
444 | ||||
445 | /* all sysctl names at this level except diskinfo are terminal */ | |||
446 | if (namelen != 1 && name[0] != BIOS_DISKINFO2) | |||
447 | return (ENOTDIR20); /* overloaded */ | |||
448 | ||||
449 | if (!(bootapiver & BAPIV_VECTOR0x00000002)) | |||
450 | return EOPNOTSUPP45; | |||
451 | ||||
452 | switch (name[0]) { | |||
453 | case BIOS_DEV1: | |||
454 | if ((pdi = bios_getdiskinfo(bootdev)) == NULL((void *)0)) | |||
455 | return ENXIO6; | |||
456 | biosdev = pdi->bios_number; | |||
457 | return sysctl_rdint(oldp, oldlenp, newp, biosdev); | |||
458 | case BIOS_DISKINFO2: | |||
459 | if (namelen != 2) | |||
460 | return ENOTDIR20; | |||
461 | if ((pdi = bios_getdiskinfo(name[1])) == NULL((void *)0)) | |||
462 | return ENXIO6; | |||
463 | return sysctl_rdstruct(oldp, oldlenp, newp, pdi, sizeof(*pdi)); | |||
464 | case BIOS_CKSUMLEN3: | |||
465 | return sysctl_rdint(oldp, oldlenp, newp, bios_cksumlen); | |||
466 | default: | |||
467 | return EOPNOTSUPP45; | |||
468 | } | |||
469 | /* NOTREACHED */ | |||
470 | } | |||
471 | ||||
472 | extern int tsc_is_invariant; | |||
473 | extern int amd64_has_xcrypt; | |||
474 | ||||
475 | const struct sysctl_bounded_args cpuctl_vars[] = { | |||
476 | { CPU_LIDACTION14, &lid_action, 0, 2 }, | |||
477 | { CPU_PWRACTION18, &pwr_action, 0, 2 }, | |||
478 | { CPU_CPUID7, &cpu_id, SYSCTL_INT_READONLY1,0 }, | |||
479 | { CPU_CPUFEATURE8, &cpu_feature, SYSCTL_INT_READONLY1,0 }, | |||
480 | { CPU_XCRYPT12, &amd64_has_xcrypt, SYSCTL_INT_READONLY1,0 }, | |||
481 | { CPU_INVARIANTTSC17, &tsc_is_invariant, SYSCTL_INT_READONLY1,0 }, | |||
482 | }; | |||
483 | ||||
484 | /* | |||
485 | * machine dependent system variables. | |||
486 | */ | |||
487 | int | |||
488 | cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, | |||
489 | size_t newlen, struct proc *p) | |||
490 | { | |||
491 | extern uint64_t tsc_frequency; | |||
492 | dev_t consdev; | |||
493 | dev_t dev; | |||
494 | ||||
495 | switch (name[0]) { | |||
496 | case CPU_CONSDEV1: | |||
497 | if (namelen != 1) | |||
498 | return (ENOTDIR20); /* overloaded */ | |||
499 | if (cn_tab != NULL((void *)0)) | |||
500 | consdev = cn_tab->cn_dev; | |||
501 | else | |||
502 | consdev = NODEV(dev_t)(-1); | |||
503 | return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev, | |||
504 | sizeof consdev)); | |||
505 | case CPU_CHR2BLK4: | |||
506 | if (namelen != 2) | |||
507 | return (ENOTDIR20); /* overloaded */ | |||
508 | dev = chrtoblk((dev_t)name[1]); | |||
509 | return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev)); | |||
510 | case CPU_BIOS2: | |||
511 | return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp, | |||
512 | newp, newlen, p); | |||
513 | case CPU_CPUVENDOR6: | |||
514 | return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor)); | |||
515 | case CPU_KBDRESET10: | |||
516 | if (securelevel > 0) | |||
517 | return (sysctl_rdint(oldp, oldlenp, newp, | |||
518 | kbd_reset)); | |||
519 | else | |||
520 | return (sysctl_int(oldp, oldlenp, newp, newlen, | |||
521 | &kbd_reset)); | |||
522 | case CPU_ALLOWAPERTURE5: | |||
523 | if (namelen != 1) | |||
524 | return (ENOTDIR20); /* overloaded */ | |||
525 | #ifdef APERTURE1 | |||
526 | if (securelevel > 0) | |||
527 | return (sysctl_int_lower(oldp, oldlenp, newp, newlen, | |||
528 | &allowaperture)); | |||
529 | else | |||
530 | return (sysctl_int(oldp, oldlenp, newp, newlen, | |||
531 | &allowaperture)); | |||
532 | #else | |||
533 | return (sysctl_rdint(oldp, oldlenp, newp, 0)); | |||
534 | #endif | |||
535 | #if NPCKBC1 > 0 && NUKBD1 > 0 | |||
536 | case CPU_FORCEUKBD15: | |||
537 | { | |||
538 | int error; | |||
539 | ||||
540 | if (forceukbd) | |||
541 | return (sysctl_rdint(oldp, oldlenp, newp, forceukbd)); | |||
542 | ||||
543 | error = sysctl_int(oldp, oldlenp, newp, newlen, &forceukbd); | |||
544 | if (forceukbd) | |||
545 | pckbc_release_console(); | |||
546 | return (error); | |||
547 | } | |||
548 | #endif | |||
549 | case CPU_TSCFREQ16: | |||
550 | return (sysctl_rdquad(oldp, oldlenp, newp, tsc_frequency)); | |||
551 | default: | |||
552 | return (sysctl_bounded_arr(cpuctl_vars, nitems(cpuctl_vars)(sizeof((cpuctl_vars)) / sizeof((cpuctl_vars)[0])), | |||
553 | name, namelen, oldp, oldlenp, newp, newlen)); | |||
554 | } | |||
555 | /* NOTREACHED */ | |||
556 | } | |||
557 | ||||
558 | /* | |||
559 | * Send an interrupt to process. | |||
560 | * | |||
561 | * Stack is set up to allow sigcode to call routine, followed by | |||
562 | * syscall to sigreturn routine below. After sigreturn resets the | |||
563 | * signal mask, the stack, and the frame pointer, it returns to the | |||
564 | * user specified pc. | |||
565 | */ | |||
566 | int | |||
567 | sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip, | |||
568 | int info, int onstack) | |||
569 | { | |||
570 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
571 | struct trapframe *tf = p->p_md.md_regs; | |||
572 | struct sigcontext ksc; | |||
573 | struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu; | |||
574 | register_t sp, scp, sip; | |||
575 | u_long sss; | |||
576 | ||||
577 | memset(&ksc, 0, sizeof ksc)__builtin_memset((&ksc), (0), (sizeof ksc)); | |||
578 | ksc.sc_rdi = tf->tf_rdi; | |||
579 | ksc.sc_rsi = tf->tf_rsi; | |||
580 | ksc.sc_rdx = tf->tf_rdx; | |||
581 | ksc.sc_rcx = tf->tf_rcx; | |||
582 | ksc.sc_r8 = tf->tf_r8; | |||
583 | ksc.sc_r9 = tf->tf_r9; | |||
584 | ksc.sc_r10 = tf->tf_r10; | |||
585 | ksc.sc_r11 = tf->tf_r11; | |||
586 | ksc.sc_r12 = tf->tf_r12; | |||
587 | ksc.sc_r13 = tf->tf_r13; | |||
588 | ksc.sc_r14 = tf->tf_r14; | |||
589 | ksc.sc_r15 = tf->tf_r15; | |||
590 | ksc.sc_rbx = tf->tf_rbx; | |||
591 | ksc.sc_rax = tf->tf_rax; | |||
592 | ksc.sc_rbp = tf->tf_rbp; | |||
593 | ksc.sc_rip = tf->tf_rip; | |||
594 | ksc.sc_cs = tf->tf_cs; | |||
595 | ksc.sc_rflags = tf->tf_rflags; | |||
596 | ksc.sc_rsp = tf->tf_rsp; | |||
597 | ksc.sc_ss = tf->tf_ss; | |||
598 | ksc.sc_mask = mask; | |||
599 | ||||
600 | /* Allocate space for the signal handler context. */ | |||
601 | if ((p->p_sigstk.ss_flags & SS_DISABLE0x0004) == 0 && | |||
602 | !sigonstack(tf->tf_rsp) && onstack) | |||
603 | sp = trunc_page((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size)(((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size) & ~((1 << 12) - 1)); | |||
604 | else | |||
605 | sp = tf->tf_rsp - 128; | |||
606 | ||||
607 | sp &= ~15ULL; /* just in case */ | |||
608 | sss = (sizeof(ksc) + 15) & ~15; | |||
609 | ||||
610 | /* Save FPU state to PCB if necessary, then copy it out */ | |||
611 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_USERXSTATE0x0200) { | |||
612 | curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags &= ~CPUF_USERXSTATE0x0200; | |||
613 | fpusavereset(&p->p_addr->u_pcb.pcb_savefpu); | |||
614 | } | |||
615 | sp -= fpu_save_len; | |||
616 | ksc.sc_fpstate = (struct fxsave64 *)sp; | |||
617 | if (copyout(sfp, (void *)sp, fpu_save_len)) | |||
618 | return 1; | |||
619 | ||||
620 | /* Now reset the FPU state in PCB */ | |||
621 | memcpy(&p->p_addr->u_pcb.pcb_savefpu,__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)) | |||
622 | &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len)__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)); | |||
623 | ||||
624 | sip = 0; | |||
625 | if (info) { | |||
626 | sip = sp - ((sizeof(*ksip) + 15) & ~15); | |||
627 | sss += (sizeof(*ksip) + 15) & ~15; | |||
628 | ||||
629 | if (copyout(ksip, (void *)sip, sizeof(*ksip))) | |||
630 | return 1; | |||
631 | } | |||
632 | scp = sp - sss; | |||
633 | ||||
634 | ksc.sc_cookie = (long)scp ^ p->p_p->ps_sigcookie; | |||
635 | if (copyout(&ksc, (void *)scp, sizeof(ksc))) | |||
636 | return 1; | |||
637 | ||||
638 | /* | |||
639 | * Build context to run handler in. | |||
640 | */ | |||
641 | tf->tf_rax = (u_int64_t)catcher; | |||
642 | tf->tf_rdi = sig; | |||
643 | tf->tf_rsi = sip; | |||
644 | tf->tf_rdx = scp; | |||
645 | ||||
646 | tf->tf_rip = (u_int64_t)p->p_p->ps_sigcode; | |||
647 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL)(((5) << 3) | 3); | |||
648 | tf->tf_rflags &= ~(PSL_T0x00000100|PSL_D0x00000400|PSL_VM0x00020000|PSL_AC0x00040000); | |||
649 | tf->tf_rsp = scp; | |||
650 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL)(((4) << 3) | 3); | |||
651 | ||||
652 | /* The reset state _is_ the userspace state for this thread now */ | |||
653 | curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags |= CPUF_USERXSTATE0x0200; | |||
654 | ||||
655 | return 0; | |||
656 | } | |||
657 | ||||
658 | /* | |||
659 | * System call to cleanup state after a signal | |||
660 | * has been taken. Reset signal mask and | |||
661 | * stack state from context left by sendsig (above). | |||
662 | * Return to previous pc and psl as specified by | |||
663 | * context left by sendsig. Check carefully to | |||
664 | * make sure that the user has not modified the | |||
665 | * psl to gain improper privileges or to cause | |||
666 | * a machine fault. | |||
667 | */ | |||
668 | int | |||
669 | sys_sigreturn(struct proc *p, void *v, register_t *retval) | |||
670 | { | |||
671 | struct sys_sigreturn_args /* { | |||
672 | syscallarg(struct sigcontext *) sigcntxp; | |||
673 | } */ *uap = v; | |||
674 | struct sigcontext ksc, *scp = SCARG(uap, sigcntxp)((uap)->sigcntxp.le.datum); | |||
675 | struct trapframe *tf = p->p_md.md_regs; | |||
676 | int error; | |||
677 | ||||
678 | if (PROC_PC(p)((p)->p_md.md_regs->tf_rip) != p->p_p->ps_sigcoderet) { | |||
679 | sigexit(p, SIGILL4); | |||
680 | return (EPERM1); | |||
681 | } | |||
682 | ||||
683 | if ((error = copyin((caddr_t)scp, &ksc, sizeof ksc))) | |||
684 | return (error); | |||
685 | ||||
686 | if (ksc.sc_cookie != ((long)scp ^ p->p_p->ps_sigcookie)) { | |||
687 | sigexit(p, SIGILL4); | |||
688 | return (EFAULT14); | |||
689 | } | |||
690 | ||||
691 | /* Prevent reuse of the sigcontext cookie */ | |||
692 | ksc.sc_cookie = 0; | |||
693 | (void)copyout(&ksc.sc_cookie, (caddr_t)scp + | |||
694 | offsetof(struct sigcontext, sc_cookie)__builtin_offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie)); | |||
695 | ||||
696 | if (((ksc.sc_rflags ^ tf->tf_rflags) & PSL_USERSTATIC(0x00000002 | 0xffc08028 | 0x00000200 | 0x00003000 | 0x00004000 | 0x00020000 | 0x00080000 | 0x00100000)) != 0 || | |||
697 | !USERMODE(ksc.sc_cs, ksc.sc_eflags)(((ksc.sc_cs) & 3) == 3)) | |||
698 | return (EINVAL22); | |||
699 | ||||
700 | /* Current state is obsolete; toss it and force a reload */ | |||
701 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_USERXSTATE0x0200) { | |||
702 | curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags &= ~CPUF_USERXSTATE0x0200; | |||
703 | fpureset()xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask ); | |||
704 | } | |||
705 | ||||
706 | /* Copy in the FPU state to restore */ | |||
707 | if (__predict_true(ksc.sc_fpstate != NULL)__builtin_expect(((ksc.sc_fpstate != ((void *)0)) != 0), 1)) { | |||
708 | struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave; | |||
709 | ||||
710 | if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len))) | |||
711 | return (error); | |||
712 | fx->fx_mxcsr &= fpu_mxcsr_mask; | |||
713 | } else { | |||
714 | /* shouldn't happen, but handle it */ | |||
715 | memcpy(&p->p_addr->u_pcb.pcb_savefpu,__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)) | |||
716 | &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len)__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)); | |||
717 | } | |||
718 | ||||
719 | tf->tf_rdi = ksc.sc_rdi; | |||
720 | tf->tf_rsi = ksc.sc_rsi; | |||
721 | tf->tf_rdx = ksc.sc_rdx; | |||
722 | tf->tf_rcx = ksc.sc_rcx; | |||
723 | tf->tf_r8 = ksc.sc_r8; | |||
724 | tf->tf_r9 = ksc.sc_r9; | |||
725 | tf->tf_r10 = ksc.sc_r10; | |||
726 | tf->tf_r11 = ksc.sc_r11; | |||
727 | tf->tf_r12 = ksc.sc_r12; | |||
728 | tf->tf_r13 = ksc.sc_r13; | |||
729 | tf->tf_r14 = ksc.sc_r14; | |||
730 | tf->tf_r15 = ksc.sc_r15; | |||
731 | tf->tf_rbx = ksc.sc_rbx; | |||
732 | tf->tf_rax = ksc.sc_rax; | |||
733 | tf->tf_rbp = ksc.sc_rbp; | |||
734 | tf->tf_rip = ksc.sc_rip; | |||
735 | tf->tf_cs = ksc.sc_cs; | |||
736 | tf->tf_rflags = ksc.sc_rflags; | |||
737 | tf->tf_rsp = ksc.sc_rsp; | |||
738 | tf->tf_ss = ksc.sc_ss; | |||
739 | ||||
740 | /* Restore signal mask. */ | |||
741 | p->p_sigmask = ksc.sc_mask & ~sigcantmask((1U << ((9)-1)) | (1U << ((17)-1))); | |||
742 | ||||
743 | /* | |||
744 | * sigreturn() needs to return to userspace via the 'iretq' | |||
745 | * method, so that if the process was interrupted (by tick, | |||
746 | * an IPI, whatever) as opposed to already being in the kernel | |||
747 | * when a signal was being delivered, the process will be | |||
748 | * completely restored, including the userland %rcx and %r11 | |||
749 | * registers which the 'sysretq' instruction cannot restore. | |||
750 | * Also need to make sure we can handle faulting on xrstor. | |||
751 | */ | |||
752 | p->p_md.md_flags |= MDP_IRET0x0002; | |||
753 | ||||
754 | return (EJUSTRETURN-2); | |||
755 | } | |||
756 | ||||
757 | #ifdef MULTIPROCESSOR1 | |||
758 | /* force a CPU into the kernel, whether or not it's idle */ | |||
759 | void | |||
760 | cpu_kick(struct cpu_info *ci) | |||
761 | { | |||
762 | /* only need to kick other CPUs */ | |||
763 | if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) { | |||
764 | if (cpu_mwait_size > 0) { | |||
765 | /* | |||
766 | * If not idling, then send an IPI, else | |||
767 | * just clear the "keep idling" bit. | |||
768 | */ | |||
769 | if ((ci->ci_mwait & MWAIT_IN_IDLE0x1) == 0) | |||
770 | x86_send_ipi(ci, X86_IPI_NOP0x00000002); | |||
771 | else | |||
772 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_mwait, | |||
773 | MWAIT_KEEP_IDLING0x2); | |||
774 | } else { | |||
775 | /* no mwait, so need an IPI */ | |||
776 | x86_send_ipi(ci, X86_IPI_NOP0x00000002); | |||
777 | } | |||
778 | } | |||
779 | } | |||
780 | #endif | |||
781 | ||||
782 | /* | |||
783 | * Notify the current process (p) that it has a signal pending, | |||
784 | * process as soon as possible. | |||
785 | */ | |||
786 | void | |||
787 | signotify(struct proc *p) | |||
788 | { | |||
789 | aston(p)((p)->p_md.md_astpending = 1); | |||
790 | cpu_kick(p->p_cpu); | |||
791 | } | |||
792 | ||||
793 | #ifdef MULTIPROCESSOR1 | |||
794 | void | |||
795 | cpu_unidle(struct cpu_info *ci) | |||
796 | { | |||
797 | if (cpu_mwait_size > 0 && (ci->ci_mwait & MWAIT_ONLY0x4)) { | |||
798 | /* | |||
799 | * Just clear the "keep idling" bit; if it wasn't | |||
800 | * idling then we didn't need to do anything anyway. | |||
801 | */ | |||
802 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_mwait, MWAIT_KEEP_IDLING0x2); | |||
803 | return; | |||
804 | } | |||
805 | ||||
806 | if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) | |||
807 | x86_send_ipi(ci, X86_IPI_NOP0x00000002); | |||
808 | } | |||
809 | #endif | |||
810 | ||||
811 | int waittime = -1; | |||
812 | struct pcb dumppcb; | |||
813 | ||||
814 | __dead__attribute__((__noreturn__)) void | |||
815 | boot(int howto) | |||
816 | { | |||
817 | if ((howto & RB_POWERDOWN0x01000) != 0) | |||
818 | lid_action = 0; | |||
819 | ||||
820 | if ((howto & RB_RESET0x08000) != 0) | |||
821 | goto doreset; | |||
822 | ||||
823 | if (cold) { | |||
824 | if ((howto & RB_USERREQ0x04000) == 0) | |||
825 | howto |= RB_HALT0x00008; | |||
826 | goto haltsys; | |||
827 | } | |||
828 | ||||
829 | boothowto = howto; | |||
830 | if ((howto & RB_NOSYNC0x00004) == 0 && waittime < 0) { | |||
831 | waittime = 0; | |||
832 | vfs_shutdown(curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc); | |||
833 | ||||
834 | if ((howto & RB_TIMEBAD0x00800) == 0) { | |||
835 | resettodr(); | |||
836 | } else { | |||
837 | printf("WARNING: not updating battery clock\n"); | |||
838 | } | |||
839 | } | |||
840 | if_downall(); | |||
841 | ||||
842 | uvm_shutdown(); | |||
843 | splhigh()splraise(0xd); | |||
844 | cold = 1; | |||
845 | ||||
846 | if ((howto & RB_DUMP0x00100) != 0) | |||
847 | dumpsys(); | |||
848 | ||||
849 | haltsys: | |||
850 | config_suspend_all(DVACT_POWERDOWN6); | |||
851 | ||||
852 | #ifdef MULTIPROCESSOR1 | |||
853 | x86_broadcast_ipi(X86_IPI_HALT0x00000001); | |||
854 | #endif | |||
855 | ||||
856 | if ((howto & RB_HALT0x00008) != 0) { | |||
857 | #if NACPI1 > 0 && !defined(SMALL_KERNEL) | |||
858 | extern int acpi_enabled; | |||
859 | ||||
860 | if (acpi_enabled) { | |||
861 | delay(500000)(*delay_func)(500000); | |||
862 | if ((howto & RB_POWERDOWN0x01000) != 0) | |||
863 | acpi_powerdown(); | |||
864 | } | |||
865 | #endif | |||
866 | printf("\n"); | |||
867 | printf("The operating system has halted.\n"); | |||
868 | printf("Please press any key to reboot.\n\n"); | |||
869 | cnpollc(1); /* for proper keyboard command handling */ | |||
870 | cngetc(); | |||
871 | cnpollc(0); | |||
872 | } | |||
873 | ||||
874 | doreset: | |||
875 | printf("rebooting...\n"); | |||
876 | if (cpureset_delay > 0) | |||
877 | delay(cpureset_delay * 1000)(*delay_func)(cpureset_delay * 1000); | |||
878 | cpu_reset(); | |||
879 | for (;;) | |||
880 | continue; | |||
881 | /* NOTREACHED */ | |||
882 | } | |||
883 | ||||
884 | /* | |||
885 | * These variables are needed by /sbin/savecore | |||
886 | */ | |||
887 | u_long dumpmag = 0x8fca0101; /* magic number */ | |||
888 | int dumpsize = 0; /* pages */ | |||
889 | long dumplo = 0; /* blocks */ | |||
890 | ||||
891 | /* | |||
892 | * cpu_dump: dump the machine-dependent kernel core dump headers. | |||
893 | */ | |||
894 | int | |||
895 | cpu_dump(void) | |||
896 | { | |||
897 | int (*dump)(dev_t, daddr_t, caddr_t, size_t); | |||
898 | char buf[dbtob(1)((1) << 9)]; | |||
899 | kcore_seg_t *segp; | |||
900 | cpu_kcore_hdr_t *cpuhdrp; | |||
901 | phys_ram_seg_t *memsegp; | |||
902 | caddr_t va; | |||
903 | int i; | |||
904 | ||||
905 | dump = bdevsw[major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff)].d_dump; | |||
906 | ||||
907 | memset(buf, 0, sizeof buf)__builtin_memset((buf), (0), (sizeof buf)); | |||
908 | segp = (kcore_seg_t *)buf; | |||
909 | cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))(((unsigned long)(sizeof(*segp)) + (sizeof(long) - 1)) &~ (sizeof(long) - 1))]; | |||
910 | memsegp = (phys_ram_seg_t *)&buf[ALIGN(sizeof(*segp))(((unsigned long)(sizeof(*segp)) + (sizeof(long) - 1)) &~ (sizeof(long) - 1)) + | |||
911 | ALIGN(sizeof(*cpuhdrp))(((unsigned long)(sizeof(*cpuhdrp)) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))]; | |||
912 | ||||
913 | /* | |||
914 | * Generate a segment header. | |||
915 | */ | |||
916 | CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU)( (*segp).c_midmag = (__uint32_t)(__builtin_constant_p(( ((1) & 0x3f) << 26) | ( ((157) & 0x03ff) << 16 ) | ( ((0x8fca) & 0xffff) )) ? (__uint32_t)(((__uint32_t) (( ((1) & 0x3f) << 26) | ( ((157) & 0x03ff) << 16) | ( ((0x8fca) & 0xffff) )) & 0xff) << 24 | ((__uint32_t)(( ((1) & 0x3f) << 26) | ( ((157) & 0x03ff) << 16) | ( ((0x8fca) & 0xffff) )) & 0xff00 ) << 8 | ((__uint32_t)(( ((1) & 0x3f) << 26) | ( ((157) & 0x03ff) << 16) | ( ((0x8fca) & 0xffff ) )) & 0xff0000) >> 8 | ((__uint32_t)(( ((1) & 0x3f ) << 26) | ( ((157) & 0x03ff) << 16) | ( ((0x8fca ) & 0xffff) )) & 0xff000000) >> 24) : __swap32md (( ((1) & 0x3f) << 26) | ( ((157) & 0x03ff) << 16) | ( ((0x8fca) & 0xffff) ))) ); | |||
917 | segp->c_size = dbtob(1)((1) << 9) - ALIGN(sizeof(*segp))(((unsigned long)(sizeof(*segp)) + (sizeof(long) - 1)) &~ (sizeof(long) - 1)); | |||
918 | ||||
919 | /* | |||
920 | * Add the machine-dependent header info. | |||
921 | */ | |||
922 | cpuhdrp->ptdpaddr = proc0.p_addr->u_pcb.pcb_cr3; | |||
923 | cpuhdrp->nmemsegs = mem_cluster_cnt; | |||
924 | ||||
925 | /* | |||
926 | * Fill in the memory segment descriptors. | |||
927 | */ | |||
928 | for (i = 0; i < mem_cluster_cnt; i++) { | |||
929 | memsegp[i].start = mem_clusters[i].start; | |||
930 | memsegp[i].size = mem_clusters[i].size & ~PAGE_MASK((1 << 12) - 1); | |||
931 | } | |||
932 | ||||
933 | /* | |||
934 | * If we have dump memory then assume the kernel stack is in high | |||
935 | * memory and bounce | |||
936 | */ | |||
937 | if (dumpmem_vaddr != 0) { | |||
938 | memcpy((char *)dumpmem_vaddr, buf, sizeof(buf))__builtin_memcpy(((char *)dumpmem_vaddr), (buf), (sizeof(buf) )); | |||
939 | va = (caddr_t)dumpmem_vaddr; | |||
940 | } else { | |||
941 | va = (caddr_t)buf; | |||
942 | } | |||
943 | return (dump(dumpdev, dumplo, va, dbtob(1)((1) << 9))); | |||
944 | } | |||
945 | ||||
946 | /* | |||
947 | * This is called by main to set dumplo and dumpsize. | |||
948 | * Dumps always skip the first PAGE_SIZE of disk space | |||
949 | * in case there might be a disk label stored there. | |||
950 | * If there is extra space, put dump at the end to | |||
951 | * reduce the chance that swapping trashes it. | |||
952 | */ | |||
953 | void | |||
954 | dumpconf(void) | |||
955 | { | |||
956 | int nblks, dumpblks; /* size of dump area */ | |||
957 | ||||
958 | if (dumpdev == NODEV(dev_t)(-1) || | |||
959 | (nblks = (bdevsw[major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff)].d_psize)(dumpdev)) == 0) | |||
960 | return; | |||
961 | if (nblks <= ctod(1)((1) << (12 - 9))) | |||
962 | return; | |||
963 | ||||
964 | dumpblks = cpu_dumpsize(); | |||
965 | if (dumpblks < 0) | |||
966 | return; | |||
967 | dumpblks += ctod(cpu_dump_mempagecnt())((cpu_dump_mempagecnt()) << (12 - 9)); | |||
968 | ||||
969 | /* If dump won't fit (incl. room for possible label), punt. */ | |||
970 | if (dumpblks > (nblks - ctod(1)((1) << (12 - 9)))) | |||
971 | return; | |||
972 | ||||
973 | /* Put dump at end of partition */ | |||
974 | dumplo = nblks - dumpblks; | |||
975 | ||||
976 | /* dumpsize is in page units, and doesn't include headers. */ | |||
977 | dumpsize = cpu_dump_mempagecnt(); | |||
978 | } | |||
979 | ||||
980 | /* | |||
981 | * Doadump comes here after turning off memory management and | |||
982 | * getting on the dump stack, either when called above, or by | |||
983 | * the auto-restart code. | |||
984 | */ | |||
985 | #define BYTES_PER_DUMP(64 * 1024) MAXPHYS(64 * 1024) /* must be a multiple of pagesize */ | |||
986 | ||||
987 | void | |||
988 | dumpsys(void) | |||
989 | { | |||
990 | u_long totalbytesleft, bytes, i, n, memseg; | |||
991 | u_long maddr; | |||
992 | daddr_t blkno; | |||
993 | void *va; | |||
994 | int (*dump)(dev_t, daddr_t, caddr_t, size_t); | |||
995 | int error; | |||
996 | ||||
997 | /* Save registers. */ | |||
998 | savectx(&dumppcb); | |||
999 | ||||
1000 | if (dumpdev == NODEV(dev_t)(-1)) | |||
1001 | return; | |||
1002 | ||||
1003 | /* | |||
1004 | * For dumps during autoconfiguration, | |||
1005 | * if dump device has already configured... | |||
1006 | */ | |||
1007 | if (dumpsize == 0) | |||
1008 | dumpconf(); | |||
1009 | if (dumplo <= 0 || dumpsize == 0) { | |||
1010 | printf("\ndump to dev %u,%u not possible\n", major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff), | |||
1011 | minor(dumpdev)((unsigned)((dumpdev) & 0xff) | (((dumpdev) & 0xffff0000 ) >> 8))); | |||
1012 | return; | |||
1013 | } | |||
1014 | printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff), | |||
1015 | minor(dumpdev)((unsigned)((dumpdev) & 0xff) | (((dumpdev) & 0xffff0000 ) >> 8)), dumplo); | |||
1016 | ||||
1017 | error = (*bdevsw[major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff)].d_psize)(dumpdev); | |||
1018 | printf("dump "); | |||
1019 | if (error == -1) { | |||
1020 | printf("area unavailable\n"); | |||
1021 | return; | |||
1022 | } | |||
1023 | ||||
1024 | if ((error = cpu_dump()) != 0) | |||
1025 | goto err; | |||
1026 | ||||
1027 | totalbytesleft = ptoa(cpu_dump_mempagecnt())((paddr_t)(cpu_dump_mempagecnt()) << 12); | |||
1028 | blkno = dumplo + cpu_dumpsize(); | |||
1029 | dump = bdevsw[major(dumpdev)(((unsigned)(dumpdev) >> 8) & 0xff)].d_dump; | |||
1030 | error = 0; | |||
1031 | ||||
1032 | for (memseg = 0; memseg < mem_cluster_cnt; memseg++) { | |||
1033 | maddr = mem_clusters[memseg].start; | |||
1034 | bytes = mem_clusters[memseg].size; | |||
1035 | ||||
1036 | for (i = 0; i < bytes; i += n, totalbytesleft -= n) { | |||
1037 | /* Print out how many MBs we have left to go. */ | |||
1038 | if ((totalbytesleft % (1024*1024)) < BYTES_PER_DUMP(64 * 1024)) | |||
1039 | printf("%ld ", totalbytesleft / (1024 * 1024)); | |||
1040 | ||||
1041 | /* Limit size for next transfer. */ | |||
1042 | n = bytes - i; | |||
1043 | if (n > BYTES_PER_DUMP(64 * 1024)) | |||
1044 | n = BYTES_PER_DUMP(64 * 1024); | |||
1045 | if (maddr > 0xffffffff) { | |||
1046 | va = (void *)dumpmem_vaddr; | |||
1047 | if (n > dumpmem_sz) | |||
1048 | n = dumpmem_sz; | |||
1049 | memcpy(va, (void *)PMAP_DIRECT_MAP(maddr), n)__builtin_memcpy((va), ((void *)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000)) + (maddr))), (n)); | |||
1050 | } else { | |||
1051 | va = (void *)PMAP_DIRECT_MAP(maddr)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (maddr)); | |||
1052 | } | |||
1053 | ||||
1054 | error = (*dump)(dumpdev, blkno, va, n); | |||
1055 | if (error) | |||
1056 | goto err; | |||
1057 | maddr += n; | |||
1058 | blkno += btodb(n)((n) >> 9); /* XXX? */ | |||
1059 | ||||
1060 | #if 0 /* XXX this doesn't work. grr. */ | |||
1061 | /* operator aborting dump? */ | |||
1062 | if (sget() != NULL((void *)0)) { | |||
1063 | error = EINTR4; | |||
1064 | break; | |||
1065 | } | |||
1066 | #endif | |||
1067 | } | |||
1068 | } | |||
1069 | ||||
1070 | err: | |||
1071 | switch (error) { | |||
1072 | ||||
1073 | case ENXIO6: | |||
1074 | printf("device bad\n"); | |||
1075 | break; | |||
1076 | ||||
1077 | case EFAULT14: | |||
1078 | printf("device not ready\n"); | |||
1079 | break; | |||
1080 | ||||
1081 | case EINVAL22: | |||
1082 | printf("area improper\n"); | |||
1083 | break; | |||
1084 | ||||
1085 | case EIO5: | |||
1086 | printf("i/o error\n"); | |||
1087 | break; | |||
1088 | ||||
1089 | case EINTR4: | |||
1090 | printf("aborted from console\n"); | |||
1091 | break; | |||
1092 | ||||
1093 | case 0: | |||
1094 | printf("succeeded\n"); | |||
1095 | break; | |||
1096 | ||||
1097 | default: | |||
1098 | printf("error %d\n", error); | |||
1099 | break; | |||
1100 | } | |||
1101 | printf("\n\n"); | |||
1102 | delay(5000000)(*delay_func)(5000000); /* 5 seconds */ | |||
1103 | } | |||
1104 | ||||
1105 | /* | |||
1106 | * Force the userspace FS.base to be reloaded from the PCB on return from | |||
1107 | * the kernel, and reset the segment registers (%ds, %es, %fs, and %gs) | |||
1108 | * to their expected userspace value. | |||
1109 | */ | |||
1110 | void | |||
1111 | reset_segs(void) | |||
1112 | { | |||
1113 | /* | |||
1114 | * This operates like the cpu_switchto() sequence: if we | |||
1115 | * haven't reset %[defg]s already, do so now. | |||
1116 | */ | |||
1117 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_USERSEGS0x0080) { | |||
1118 | curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags &= ~CPUF_USERSEGS0x0080; | |||
1119 | __asm volatile( | |||
1120 | "movw %%ax,%%ds\n\t" | |||
1121 | "movw %%ax,%%es\n\t" | |||
1122 | "movw %%ax,%%fs\n\t" | |||
1123 | "cli\n\t" /* block intr when on user GS.base */ | |||
1124 | "swapgs\n\t" /* swap from kernel to user GS.base */ | |||
1125 | "movw %%ax,%%gs\n\t"/* set %gs to UDATA and GS.base to 0 */ | |||
1126 | "swapgs\n\t" /* back to kernel GS.base */ | |||
1127 | "sti" : : "a"(GSEL(GUDATA_SEL, SEL_UPL)(((4) << 3) | 3))); | |||
1128 | } | |||
1129 | } | |||
1130 | ||||
1131 | /* | |||
1132 | * Clear registers on exec | |||
1133 | */ | |||
1134 | void | |||
1135 | setregs(struct proc *p, struct exec_package *pack, u_long stack, | |||
1136 | register_t *retval) | |||
1137 | { | |||
1138 | struct trapframe *tf; | |||
1139 | ||||
1140 | /* Reset FPU state in PCB */ | |||
1141 | memcpy(&p->p_addr->u_pcb.pcb_savefpu,__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)) | |||
1142 | &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len)__builtin_memcpy((&p->p_addr->u_pcb.pcb_savefpu), ( &proc0.p_addr->u_pcb.pcb_savefpu), (fpu_save_len)); | |||
1143 | ||||
1144 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_USERXSTATE0x0200) { | |||
1145 | /* state in CPU is obsolete; reset it */ | |||
1146 | fpureset()xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask ); | |||
1147 | } else { | |||
1148 | /* the reset state _is_ the userspace state now */ | |||
1149 | curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags |= CPUF_USERXSTATE0x0200; | |||
1150 | } | |||
1151 | ||||
1152 | /* To reset all registers we have to return via iretq */ | |||
1153 | p->p_md.md_flags |= MDP_IRET0x0002; | |||
1154 | ||||
1155 | reset_segs(); | |||
1156 | p->p_addr->u_pcb.pcb_fsbase = 0; | |||
1157 | ||||
1158 | tf = p->p_md.md_regs; | |||
1159 | tf->tf_rdi = 0; | |||
1160 | tf->tf_rsi = 0; | |||
1161 | tf->tf_rbp = 0; | |||
1162 | tf->tf_rbx = 0; | |||
1163 | tf->tf_rdx = 0; | |||
1164 | tf->tf_rcx = 0; | |||
1165 | tf->tf_rax = 0; | |||
1166 | tf->tf_r8 = 0; | |||
1167 | tf->tf_r9 = 0; | |||
1168 | tf->tf_r10 = 0; | |||
1169 | tf->tf_r11 = 0; | |||
1170 | tf->tf_r12 = 0; | |||
1171 | tf->tf_r13 = 0; | |||
1172 | tf->tf_r14 = 0; | |||
1173 | tf->tf_r15 = 0; | |||
1174 | tf->tf_rip = pack->ep_entry; | |||
1175 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL)(((5) << 3) | 3); | |||
1176 | tf->tf_rflags = PSL_USERSET(0x00000002 | 0x00000200); | |||
1177 | tf->tf_rsp = stack; | |||
1178 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL)(((4) << 3) | 3); | |||
1179 | ||||
1180 | retval[1] = 0; | |||
1181 | } | |||
1182 | ||||
1183 | /* | |||
1184 | * Initialize segments and descriptor tables | |||
1185 | */ | |||
1186 | ||||
1187 | struct gate_descriptor *idt; | |||
1188 | char idt_allocmap[NIDT256]; | |||
1189 | extern struct user *proc0paddr; | |||
1190 | ||||
1191 | void | |||
1192 | setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, | |||
1193 | int sel) | |||
1194 | { | |||
1195 | gd->gd_looffset = (u_int64_t)func & 0xffff; | |||
1196 | gd->gd_selector = sel; | |||
1197 | gd->gd_ist = ist; | |||
1198 | gd->gd_type = type; | |||
1199 | gd->gd_dpl = dpl; | |||
1200 | gd->gd_p = 1; | |||
1201 | gd->gd_hioffset = (u_int64_t)func >> 16; | |||
1202 | gd->gd_zero = 0; | |||
1203 | gd->gd_xx1 = 0; | |||
1204 | gd->gd_xx2 = 0; | |||
1205 | gd->gd_xx3 = 0; | |||
1206 | } | |||
1207 | ||||
1208 | void | |||
1209 | unsetgate(struct gate_descriptor *gd) | |||
1210 | { | |||
1211 | memset(gd, 0, sizeof (*gd))__builtin_memset((gd), (0), (sizeof (*gd))); | |||
1212 | } | |||
1213 | ||||
1214 | void | |||
1215 | setregion(struct region_descriptor *rd, void *base, u_int16_t limit) | |||
1216 | { | |||
1217 | rd->rd_limit = limit; | |||
1218 | rd->rd_base = (u_int64_t)base; | |||
1219 | } | |||
1220 | ||||
1221 | /* | |||
1222 | * Note that the base and limit fields are ignored in long mode. | |||
1223 | */ | |||
1224 | void | |||
1225 | set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, | |||
1226 | int type, int dpl, int gran, int def32, int is64) | |||
1227 | { | |||
1228 | sd->sd_lolimit = (unsigned)limit; | |||
1229 | sd->sd_lobase = (unsigned long)base; | |||
1230 | sd->sd_type = type; | |||
1231 | sd->sd_dpl = dpl; | |||
1232 | sd->sd_p = 1; | |||
1233 | sd->sd_hilimit = (unsigned)limit >> 16; | |||
1234 | sd->sd_avl = 0; | |||
1235 | sd->sd_long = is64; | |||
1236 | sd->sd_def32 = def32; | |||
1237 | sd->sd_gran = gran; | |||
1238 | sd->sd_hibase = (unsigned long)base >> 24; | |||
1239 | } | |||
1240 | ||||
1241 | void | |||
1242 | set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, | |||
1243 | int type, int dpl, int gran) | |||
1244 | { | |||
1245 | memset(sd, 0, sizeof *sd)__builtin_memset((sd), (0), (sizeof *sd)); | |||
1246 | sd->sd_lolimit = (unsigned)limit; | |||
1247 | sd->sd_lobase = (u_int64_t)base; | |||
1248 | sd->sd_type = type; | |||
1249 | sd->sd_dpl = dpl; | |||
1250 | sd->sd_p = 1; | |||
1251 | sd->sd_hilimit = (unsigned)limit >> 16; | |||
1252 | sd->sd_gran = gran; | |||
1253 | sd->sd_hibase = (u_int64_t)base >> 24; | |||
1254 | } | |||
1255 | ||||
1256 | void cpu_init_idt(void) | |||
1257 | { | |||
1258 | struct region_descriptor region; | |||
1259 | ||||
1260 | setregion(®ion, idt, NIDT256 * sizeof(idt[0]) - 1); | |||
1261 | lidt(®ion); | |||
1262 | } | |||
1263 | ||||
1264 | void | |||
1265 | cpu_init_extents(void) | |||
1266 | { | |||
1267 | extern struct extent *iomem_ex; | |||
1268 | static int already_done; | |||
1269 | int i; | |||
1270 | ||||
1271 | /* We get called for each CPU, only first should do this */ | |||
1272 | if (already_done) | |||
1273 | return; | |||
1274 | ||||
1275 | /* | |||
1276 | * Allocate the physical addresses used by RAM from the iomem | |||
1277 | * extent map. | |||
1278 | */ | |||
1279 | for (i = 0; i < mem_cluster_cnt; i++) { | |||
1280 | if (extent_alloc_region(iomem_ex, mem_clusters[i].start, | |||
1281 | mem_clusters[i].size, EX_NOWAIT0x0000)) { | |||
1282 | /* XXX What should we do? */ | |||
1283 | printf("WARNING: CAN'T ALLOCATE RAM (%llx-%llx)" | |||
1284 | " FROM IOMEM EXTENT MAP!\n", mem_clusters[i].start, | |||
1285 | mem_clusters[i].start + mem_clusters[i].size - 1); | |||
1286 | } | |||
1287 | } | |||
1288 | ||||
1289 | already_done = 1; | |||
1290 | } | |||
1291 | ||||
1292 | void | |||
1293 | map_tramps(void) | |||
1294 | { | |||
1295 | #if defined(MULTIPROCESSOR1) || \ | |||
1296 | (NACPI1 > 0 && !defined(SMALL_KERNEL)) | |||
1297 | struct pmap *kmp = pmap_kernel()(&kernel_pmap_store); | |||
1298 | extern paddr_t tramp_pdirpa; | |||
1299 | #ifdef MULTIPROCESSOR1 | |||
1300 | extern u_char cpu_spinup_trampoline[]; | |||
1301 | extern u_char cpu_spinup_trampoline_end[]; | |||
1302 | extern u_char mp_tramp_data_start[]; | |||
1303 | extern u_char mp_tramp_data_end[]; | |||
1304 | extern u_int32_t mp_pdirpa; | |||
1305 | #endif | |||
1306 | ||||
1307 | /* | |||
1308 | * The initial PML4 pointer must be below 4G, so if the | |||
1309 | * current one isn't, use a "bounce buffer" and save it | |||
1310 | * for tramps to use. | |||
1311 | */ | |||
1312 | if (kmp->pm_pdirpa > 0xffffffff) { | |||
1313 | pmap_kenter_pa(lo32_vaddr, lo32_paddr, PROT_READ0x01 | PROT_WRITE0x02); | |||
1314 | memcpy((void *)lo32_vaddr, kmp->pm_pdir, PAGE_SIZE)__builtin_memcpy(((void *)lo32_vaddr), (kmp->pm_pdir), ((1 << 12))); | |||
1315 | tramp_pdirpa = lo32_paddr; | |||
1316 | pmap_kremove(lo32_vaddr, PAGE_SIZE(1 << 12)); | |||
1317 | } else | |||
1318 | tramp_pdirpa = kmp->pm_pdirpa; | |||
1319 | ||||
1320 | ||||
1321 | #ifdef MULTIPROCESSOR1 | |||
1322 | /* Map MP tramp code and data pages RW for copy */ | |||
1323 | pmap_kenter_pa(MP_TRAMPOLINE(16 * (1 << 12)), MP_TRAMPOLINE(16 * (1 << 12)), | |||
1324 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1325 | ||||
1326 | pmap_kenter_pa(MP_TRAMP_DATA(17 * (1 << 12)), MP_TRAMP_DATA(17 * (1 << 12)), | |||
1327 | PROT_READ0x01 | PROT_WRITE0x02); | |||
1328 | ||||
1329 | memset((caddr_t)MP_TRAMPOLINE, 0xcc, PAGE_SIZE)__builtin_memset(((caddr_t)(16 * (1 << 12))), (0xcc), ( (1 << 12))); | |||
1330 | memset((caddr_t)MP_TRAMP_DATA, 0xcc, PAGE_SIZE)__builtin_memset(((caddr_t)(17 * (1 << 12))), (0xcc), ( (1 << 12))); | |||
1331 | ||||
1332 | memcpy((caddr_t)MP_TRAMPOLINE,__builtin_memcpy(((caddr_t)(16 * (1 << 12))), (cpu_spinup_trampoline ), (cpu_spinup_trampoline_end-cpu_spinup_trampoline)) | |||
1333 | cpu_spinup_trampoline,__builtin_memcpy(((caddr_t)(16 * (1 << 12))), (cpu_spinup_trampoline ), (cpu_spinup_trampoline_end-cpu_spinup_trampoline)) | |||
1334 | cpu_spinup_trampoline_end-cpu_spinup_trampoline)__builtin_memcpy(((caddr_t)(16 * (1 << 12))), (cpu_spinup_trampoline ), (cpu_spinup_trampoline_end-cpu_spinup_trampoline)); | |||
1335 | ||||
1336 | memcpy((caddr_t)MP_TRAMP_DATA,__builtin_memcpy(((caddr_t)(17 * (1 << 12))), (mp_tramp_data_start ), (mp_tramp_data_end - mp_tramp_data_start)) | |||
1337 | mp_tramp_data_start,__builtin_memcpy(((caddr_t)(17 * (1 << 12))), (mp_tramp_data_start ), (mp_tramp_data_end - mp_tramp_data_start)) | |||
1338 | mp_tramp_data_end - mp_tramp_data_start)__builtin_memcpy(((caddr_t)(17 * (1 << 12))), (mp_tramp_data_start ), (mp_tramp_data_end - mp_tramp_data_start)); | |||
1339 | ||||
1340 | /* | |||
1341 | * We need to patch this after we copy the tramp data, | |||
1342 | * the symbol points into the copied tramp data page. | |||
1343 | */ | |||
1344 | mp_pdirpa = tramp_pdirpa; | |||
1345 | ||||
1346 | /* Unmap, will be remapped in cpu_start_secondary */ | |||
1347 | pmap_kremove(MP_TRAMPOLINE(16 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
1348 | pmap_kremove(MP_TRAMP_DATA(17 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
1349 | #endif /* MULTIPROCESSOR */ | |||
1350 | #endif | |||
1351 | } | |||
1352 | ||||
1353 | #define IDTVEC(name)Xname __CONCAT(X, name)Xname | |||
1354 | typedef void (vector)(void); | |||
1355 | extern vector *IDTVEC(exceptions)Xexceptions[]; | |||
1356 | ||||
1357 | paddr_t early_pte_pages; | |||
1358 | ||||
1359 | void | |||
1360 | init_x86_64(paddr_t first_avail) | |||
1361 | { | |||
1362 | struct region_descriptor region; | |||
1363 | bios_memmap_t *bmp; | |||
1364 | int x, ist; | |||
1365 | uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT4) << 30; | |||
1366 | ||||
1367 | /* | |||
1368 | * locore0 mapped 3 pages for use before the pmap is initialized | |||
1369 | * starting at first_avail. These pages are currently used by | |||
1370 | * efifb to create early-use VAs for the framebuffer before efifb | |||
1371 | * is attached. | |||
1372 | */ | |||
1373 | early_pte_pages = first_avail; | |||
1374 | first_avail += 3 * NBPG(1 << 12); | |||
1375 | ||||
1376 | cpu_init_msrs(&cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev)))); | |||
1377 | ||||
1378 | proc0.p_addr = proc0paddr; | |||
1379 | cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_curpcb = &proc0.p_addr->u_pcb; | |||
1380 | ||||
1381 | x86_bus_space_init(); | |||
1382 | ||||
1383 | i8254_startclock(); | |||
1384 | ||||
1385 | /* | |||
1386 | * Initialize PAGE_SIZE-dependent variables. | |||
1387 | */ | |||
1388 | uvm_setpagesize(); | |||
1389 | ||||
1390 | /* | |||
1391 | * Boot arguments are in a single page specified by /boot. | |||
1392 | * | |||
1393 | * We require the "new" vector form, as well as memory ranges | |||
1394 | * to be given in bytes rather than KB. | |||
1395 | * | |||
1396 | * locore copies the data into bootinfo[] for us. | |||
1397 | */ | |||
1398 | if ((bootapiver & (BAPIV_VECTOR0x00000002 | BAPIV_BMEMMAP0x00000008)) == | |||
| ||||
1399 | (BAPIV_VECTOR0x00000002 | BAPIV_BMEMMAP0x00000008)) { | |||
1400 | if (bootinfo_size >= sizeof(bootinfo)) | |||
1401 | panic("boot args too big"); | |||
1402 | ||||
1403 | getbootinfo(bootinfo, bootinfo_size); | |||
1404 | } else | |||
1405 | panic("invalid /boot"); | |||
1406 | ||||
1407 | cninit(); | |||
1408 | ||||
1409 | /* | |||
1410 | * Memory on the AMD64 port is described by three different things. | |||
1411 | * | |||
1412 | * 1. biosbasemem - This is outdated, and should really only be used to | |||
1413 | * sanitize the other values. This is what we get back from the BIOS | |||
1414 | * using the legacy routines, describing memory below 640KB. | |||
1415 | * | |||
1416 | * 2. bios_memmap[] - This is the memory map as the bios has returned | |||
1417 | * it to us. It includes memory the kernel occupies, etc. | |||
1418 | * | |||
1419 | * 3. mem_cluster[] - This is the massaged free memory segments after | |||
1420 | * taking into account the contents of bios_memmap, biosbasemem, | |||
1421 | * and locore/machdep/pmap kernel allocations of physical | |||
1422 | * pages. | |||
1423 | * | |||
1424 | * The other thing is that the physical page *RANGE* is described by | |||
1425 | * three more variables: | |||
1426 | * | |||
1427 | * avail_start - This is a physical address of the start of available | |||
1428 | * pages, until IOM_BEGIN. This is basically the start | |||
1429 | * of the UVM managed range of memory, with some holes... | |||
1430 | * | |||
1431 | * avail_end - This is the end of physical pages. All physical pages | |||
1432 | * that UVM manages are between avail_start and avail_end. | |||
1433 | * There are holes... | |||
1434 | * | |||
1435 | * first_avail - This is the first available physical page after the | |||
1436 | * kernel, page tables, etc. | |||
1437 | * | |||
1438 | * We skip the first few pages for trampolines, hibernate, and to avoid | |||
1439 | * buggy SMI implementations that could corrupt the first 64KB. | |||
1440 | */ | |||
1441 | avail_start = 16*PAGE_SIZE(1 << 12); | |||
1442 | ||||
1443 | #ifdef MULTIPROCESSOR1 | |||
1444 | if (avail_start < MP_TRAMPOLINE(16 * (1 << 12)) + PAGE_SIZE(1 << 12)) | |||
1445 | avail_start = MP_TRAMPOLINE(16 * (1 << 12)) + PAGE_SIZE(1 << 12); | |||
1446 | if (avail_start < MP_TRAMP_DATA(17 * (1 << 12)) + PAGE_SIZE(1 << 12)) | |||
1447 | avail_start = MP_TRAMP_DATA(17 * (1 << 12)) + PAGE_SIZE(1 << 12); | |||
1448 | #endif | |||
1449 | ||||
1450 | #if (NACPI1 > 0 && !defined(SMALL_KERNEL)) | |||
1451 | if (avail_start < ACPI_TRAMPOLINE(19 * (1 << 12)) + PAGE_SIZE(1 << 12)) | |||
1452 | avail_start = ACPI_TRAMPOLINE(19 * (1 << 12)) + PAGE_SIZE(1 << 12); | |||
1453 | if (avail_start < ACPI_TRAMP_DATA(20 * (1 << 12)) + PAGE_SIZE(1 << 12)) | |||
1454 | avail_start = ACPI_TRAMP_DATA(20 * (1 << 12)) + PAGE_SIZE(1 << 12); | |||
1455 | #endif | |||
1456 | ||||
1457 | #ifdef HIBERNATE1 | |||
1458 | if (avail_start < HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34) + PAGE_SIZE(1 << 12)) | |||
1459 | avail_start = HIBERNATE_HIBALLOC_PAGE((1 << 12) * 34) + PAGE_SIZE(1 << 12); | |||
1460 | #endif /* HIBERNATE */ | |||
1461 | ||||
1462 | /* | |||
1463 | * We need to go through the BIOS memory map given, and | |||
1464 | * fill out mem_clusters and mem_cluster_cnt stuff, taking | |||
1465 | * into account all the points listed above. | |||
1466 | */ | |||
1467 | avail_end = mem_cluster_cnt = 0; | |||
1468 | for (bmp = bios_memmap; bmp->type != BIOS_MAP_END0x00; bmp++) { | |||
1469 | paddr_t s1, s2, e1, e2; | |||
1470 | ||||
1471 | /* Ignore non-free memory */ | |||
1472 | if (bmp->type != BIOS_MAP_FREE0x01) | |||
1473 | continue; | |||
1474 | if (bmp->size < PAGE_SIZE(1 << 12)) | |||
1475 | continue; | |||
1476 | ||||
1477 | /* Init our segment(s), round/trunc to pages */ | |||
1478 | s1 = round_page(bmp->addr)(((bmp->addr) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
1479 | e1 = trunc_page(bmp->addr + bmp->size)((bmp->addr + bmp->size) & ~((1 << 12) - 1)); | |||
1480 | s2 = e2 = 0; | |||
1481 | ||||
1482 | /* | |||
1483 | * XXX Some buggy ACPI BIOSes use memory that they | |||
1484 | * declare as free. Typically the affected memory | |||
1485 | * areas are small blocks between areas reserved for | |||
1486 | * ACPI and other BIOS goo. So skip areas smaller | |||
1487 | * than 1 MB above the 16 MB boundary (to avoid | |||
1488 | * affecting legacy stuff). | |||
1489 | */ | |||
1490 | if (s1 > 16*1024*1024 && (e1 - s1) < 1*1024*1024) | |||
1491 | continue; | |||
1492 | ||||
1493 | /* Check and adjust our segment(s) */ | |||
1494 | /* Nuke low pages */ | |||
1495 | if (s1 < avail_start) { | |||
1496 | s1 = avail_start; | |||
1497 | if (s1 > e1) | |||
1498 | continue; | |||
1499 | } | |||
1500 | ||||
1501 | /* | |||
1502 | * The direct map is limited to 512GB * NUM_L4_SLOT_DIRECT of | |||
1503 | * memory, so discard anything above that. | |||
1504 | */ | |||
1505 | if (e1 >= max_dm_size) { | |||
1506 | e1 = max_dm_size; | |||
1507 | if (s1 > e1) | |||
1508 | continue; | |||
1509 | } | |||
1510 | ||||
1511 | /* Crop stuff into "640K hole" */ | |||
1512 | if (s1 < IOM_BEGIN0x0a0000 && e1 > IOM_BEGIN0x0a0000) | |||
1513 | e1 = IOM_BEGIN0x0a0000; | |||
1514 | if (s1 < biosbasemem && e1 > biosbasemem) | |||
1515 | e1 = biosbasemem; | |||
1516 | ||||
1517 | /* Split any segments straddling the 16MB boundary */ | |||
1518 | if (s1 < 16*1024*1024 && e1 > 16*1024*1024) { | |||
1519 | e2 = e1; | |||
1520 | s2 = e1 = 16*1024*1024; | |||
1521 | } | |||
1522 | ||||
1523 | /* Store segment(s) */ | |||
1524 | if (e1 - s1 >= PAGE_SIZE(1 << 12)) { | |||
1525 | mem_clusters[mem_cluster_cnt].start = s1; | |||
1526 | mem_clusters[mem_cluster_cnt].size = e1 - s1; | |||
1527 | mem_cluster_cnt++; | |||
1528 | } | |||
1529 | if (e2 - s2 >= PAGE_SIZE(1 << 12)) { | |||
1530 | mem_clusters[mem_cluster_cnt].start = s2; | |||
1531 | mem_clusters[mem_cluster_cnt].size = e2 - s2; | |||
1532 | mem_cluster_cnt++; | |||
1533 | } | |||
1534 | if (avail_end < e1) avail_end = e1; | |||
1535 | if (avail_end < e2) avail_end = e2; | |||
1536 | } | |||
1537 | ||||
1538 | /* | |||
1539 | * Call pmap initialization to make new kernel address space. | |||
1540 | * We must do this before loading pages into the VM system. | |||
1541 | */ | |||
1542 | first_avail = pmap_bootstrap(first_avail, trunc_page(avail_end)((avail_end) & ~((1 << 12) - 1))); | |||
1543 | ||||
1544 | /* Allocate these out of the 640KB base memory */ | |||
1545 | if (avail_start != PAGE_SIZE(1 << 12)) | |||
1546 | avail_start = pmap_prealloc_lowmem_ptps(avail_start); | |||
1547 | ||||
1548 | cpu_init_extents(); | |||
1549 | ||||
1550 | /* Make sure the end of the space used by the kernel is rounded. */ | |||
1551 | first_avail = round_page(first_avail)(((first_avail) + ((1 << 12) - 1)) & ~((1 << 12 ) - 1)); | |||
1552 | kern_end = KERNBASE0xffffffff80000000 + first_avail; | |||
1553 | ||||
1554 | /* | |||
1555 | * Now, load the memory clusters (which have already been | |||
1556 | * flensed) into the VM system. | |||
1557 | */ | |||
1558 | for (x = 0; x < mem_cluster_cnt; x++) { | |||
1559 | paddr_t seg_start = mem_clusters[x].start; | |||
1560 | paddr_t seg_end = seg_start + mem_clusters[x].size; | |||
1561 | ||||
1562 | if (seg_start < first_avail) seg_start = first_avail; | |||
1563 | if (seg_start > seg_end) continue; | |||
1564 | if (seg_end - seg_start < PAGE_SIZE(1 << 12)) continue; | |||
1565 | ||||
1566 | physmem += atop(mem_clusters[x].size)((mem_clusters[x].size) >> 12); | |||
1567 | ||||
1568 | #if DEBUG_MEMLOAD | |||
1569 | printf("loading 0x%lx-0x%lx (0x%lx-0x%lx)\n", | |||
1570 | seg_start, seg_end, atop(seg_start)((seg_start) >> 12), atop(seg_end)((seg_end) >> 12)); | |||
1571 | #endif | |||
1572 | uvm_page_physload(atop(seg_start)((seg_start) >> 12), atop(seg_end)((seg_end) >> 12), | |||
1573 | atop(seg_start)((seg_start) >> 12), atop(seg_end)((seg_end) >> 12), 0); | |||
1574 | } | |||
1575 | ||||
1576 | /* | |||
1577 | * Now, load the memory between the end of I/O memory "hole" | |||
1578 | * and the kernel. | |||
1579 | */ | |||
1580 | { | |||
1581 | paddr_t seg_start = round_page(IOM_END)(((0x100000) + ((1 << 12) - 1)) & ~((1 << 12) - 1)); | |||
1582 | paddr_t seg_end = trunc_page(KERNTEXTOFF - KERNBASE)(((0xffffffff80000000 +0x1000000) - 0xffffffff80000000) & ~((1 << 12) - 1)); | |||
1583 | ||||
1584 | if (seg_start
| |||
1585 | #if DEBUG_MEMLOAD | |||
1586 | printf("loading 0x%lx-0x%lx\n", seg_start, seg_end); | |||
1587 | #endif | |||
1588 | uvm_page_physload(atop(seg_start)((seg_start) >> 12), atop(seg_end)((seg_end) >> 12), | |||
1589 | atop(seg_start)((seg_start) >> 12), atop(seg_end)((seg_end) >> 12), 0); | |||
1590 | } | |||
1591 | } | |||
1592 | ||||
1593 | #if DEBUG_MEMLOAD | |||
1594 | printf("avail_start = 0x%lx\n", avail_start); | |||
1595 | printf("avail_end = 0x%lx\n", avail_end); | |||
1596 | printf("first_avail = 0x%lx\n", first_avail); | |||
1597 | #endif | |||
1598 | ||||
1599 | /* | |||
1600 | * Steal memory for the message buffer (at end of core). | |||
1601 | */ | |||
1602 | { | |||
1603 | struct vm_physseg *vps = NULL((void *)0); | |||
1604 | psize_t sz = round_page(MSGBUFSIZE)((((32 * (1 << 12))) + ((1 << 12) - 1)) & ~(( 1 << 12) - 1)); | |||
1605 | psize_t reqsz = sz; | |||
1606 | ||||
1607 | for (x = 0; x < vm_nphysseg; x++) { | |||
1608 | vps = &vm_physmem[x]; | |||
1609 | if (ptoa(vps->avail_end)((paddr_t)(vps->avail_end) << 12) == avail_end) | |||
1610 | break; | |||
1611 | } | |||
1612 | if (x == vm_nphysseg) | |||
1613 | panic("init_x86_64: can't find end of memory"); | |||
1614 | ||||
1615 | /* Shrink so it'll fit in the last segment. */ | |||
1616 | if ((vps->avail_end - vps->avail_start) < atop(sz)((sz) >> 12)) | |||
| ||||
1617 | sz = ptoa(vps->avail_end - vps->avail_start)((paddr_t)(vps->avail_end - vps->avail_start) << 12 ); | |||
1618 | ||||
1619 | vps->avail_end -= atop(sz)((sz) >> 12); | |||
1620 | vps->end -= atop(sz)((sz) >> 12); | |||
1621 | msgbuf_paddr = ptoa(vps->avail_end)((paddr_t)(vps->avail_end) << 12); | |||
1622 | ||||
1623 | /* Remove the last segment if it now has no pages. */ | |||
1624 | if (vps->start == vps->end) { | |||
1625 | for (vm_nphysseg--; x < vm_nphysseg; x++) | |||
1626 | vm_physmem[x] = vm_physmem[x + 1]; | |||
1627 | } | |||
1628 | ||||
1629 | /* Now find where the new avail_end is. */ | |||
1630 | for (avail_end = 0, x = 0; x < vm_nphysseg; x++) | |||
1631 | if (vm_physmem[x].avail_end > avail_end) | |||
1632 | avail_end = vm_physmem[x].avail_end; | |||
1633 | avail_end = ptoa(avail_end)((paddr_t)(avail_end) << 12); | |||
1634 | ||||
1635 | /* Warn if the message buffer had to be shrunk. */ | |||
1636 | if (sz != reqsz) | |||
1637 | printf("WARNING: %ld bytes not available for msgbuf " | |||
1638 | "in last cluster (%ld used)\n", reqsz, sz); | |||
1639 | } | |||
1640 | ||||
1641 | /* | |||
1642 | * Steal some memory for a dump bouncebuffer if we have memory over | |||
1643 | * the 32-bit barrier. | |||
1644 | */ | |||
1645 | if (avail_end > 0xffffffff) { | |||
1646 | struct vm_physseg *vps = NULL((void *)0); | |||
1647 | psize_t sz = round_page(MAX(BYTES_PER_DUMP, dbtob(1)))(((((((64 * 1024))>(((1) << 9)))?((64 * 1024)):(((1) << 9)))) + ((1 << 12) - 1)) & ~((1 << 12 ) - 1)); | |||
1648 | ||||
1649 | /* XXX assumes segments are ordered */ | |||
1650 | for (x = 0; x < vm_nphysseg; x++) { | |||
1651 | vps = &vm_physmem[x]; | |||
1652 | /* Find something between 16meg and 4gig */ | |||
1653 | if (ptoa(vps->avail_end)((paddr_t)(vps->avail_end) << 12) <= 0xffffffff && | |||
1654 | ptoa(vps->avail_start)((paddr_t)(vps->avail_start) << 12) >= 0xffffff) | |||
1655 | break; | |||
1656 | } | |||
1657 | if (x == vm_nphysseg) | |||
1658 | panic("init_x86_64: no memory between " | |||
1659 | "0xffffff-0xffffffff"); | |||
1660 | ||||
1661 | /* Shrink so it'll fit in the segment. */ | |||
1662 | if ((vps->avail_end - vps->avail_start) < atop(sz)((sz) >> 12)) | |||
1663 | sz = ptoa(vps->avail_end - vps->avail_start)((paddr_t)(vps->avail_end - vps->avail_start) << 12 ); | |||
1664 | ||||
1665 | vps->avail_end -= atop(sz)((sz) >> 12); | |||
1666 | vps->end -= atop(sz)((sz) >> 12); | |||
1667 | dumpmem_paddr = ptoa(vps->avail_end)((paddr_t)(vps->avail_end) << 12); | |||
1668 | dumpmem_vaddr = PMAP_DIRECT_MAP(dumpmem_paddr)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (dumpmem_paddr)); | |||
1669 | dumpmem_sz = sz; | |||
1670 | ||||
1671 | /* Remove the last segment if it now has no pages. */ | |||
1672 | if (vps->start == vps->end) { | |||
1673 | for (vm_nphysseg--; x < vm_nphysseg; x++) | |||
1674 | vm_physmem[x] = vm_physmem[x + 1]; | |||
1675 | } | |||
1676 | } | |||
1677 | ||||
1678 | pmap_growkernel(VM_MIN_KERNEL_ADDRESS0xffff800000000000 + 32 * 1024 * 1024); | |||
1679 | ||||
1680 | pmap_kenter_pa(idt_vaddr, idt_paddr, PROT_READ0x01 | PROT_WRITE0x02); | |||
1681 | ||||
1682 | idt = (struct gate_descriptor *)idt_vaddr; | |||
1683 | cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_tss = &cpu_info_full_primary.cif_tsscif_RO.u_tssgdt.uu_tss; | |||
1684 | cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_gdt = &cpu_info_full_primary.cif_gdtcif_RO.u_tssgdt.uu_gdt; | |||
1685 | ||||
1686 | /* make gdt gates and memory segments */ | |||
1687 | set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GCODE_SEL)((struct mem_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((1) << 3))), 0, | |||
1688 | 0xfffff, SDT_MEMERA27, SEL_KPL0, 1, 0, 1); | |||
1689 | ||||
1690 | set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GDATA_SEL)((struct mem_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((2) << 3))), 0, | |||
1691 | 0xfffff, SDT_MEMRWA19, SEL_KPL0, 1, 0, 1); | |||
1692 | ||||
1693 | set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUCODE32_SEL)((struct mem_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((3) << 3))), 0, | |||
1694 | atop(VM_MAXUSER_ADDRESS32)((0xffffc000) >> 12) - 1, SDT_MEMERA27, SEL_UPL3, 1, 1, 0); | |||
1695 | ||||
1696 | set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUDATA_SEL)((struct mem_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((4) << 3))), 0, | |||
1697 | atop(VM_MAXUSER_ADDRESS)((0x00007f7fffffc000) >> 12) - 1, SDT_MEMRWA19, SEL_UPL3, 1, 0, 1); | |||
1698 | ||||
1699 | set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUCODE_SEL)((struct mem_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((5) << 3))), 0, | |||
1700 | atop(VM_MAXUSER_ADDRESS)((0x00007f7fffffc000) >> 12) - 1, SDT_MEMERA27, SEL_UPL3, 1, 0, 1); | |||
1701 | ||||
1702 | set_sys_segment(GDT_ADDR_SYS(cpu_info_primary.ci_gdt, GPROC0_SEL)((struct sys_segment_descriptor *)((char *)((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof (struct cpu_info, ci_dev))).ci_gdt) + ((0) << 4) + (6 << 3))), | |||
1703 | cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_tss, sizeof (struct x86_64_tss)-1, | |||
1704 | SDT_SYS386TSS9, SEL_KPL0, 0); | |||
1705 | ||||
1706 | /* exceptions */ | |||
1707 | for (x = 0; x < 32; x++) { | |||
1708 | /* trap2 == NMI, trap8 == double fault */ | |||
1709 | ist = (x == 2) ? 2 : (x == 8) ? 1 : 0; | |||
1710 | setgate(&idt[x], IDTVEC(exceptions)Xexceptions[x], ist, SDT_SYS386IGT14, | |||
1711 | (x == 3) ? SEL_UPL3 : SEL_KPL0, | |||
1712 | GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0)); | |||
1713 | idt_allocmap[x] = 1; | |||
1714 | } | |||
1715 | ||||
1716 | setregion(®ion, cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1); | |||
1717 | lgdt(®ion); | |||
1718 | ||||
1719 | cpu_init_idt(); | |||
1720 | ||||
1721 | intr_default_setup(); | |||
1722 | ||||
1723 | fpuinit(&cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev)))); | |||
1724 | ||||
1725 | softintr_init(); | |||
1726 | splraise(IPL_IPI0xe); | |||
1727 | intr_enable(); | |||
1728 | ||||
1729 | #ifdef DDB1 | |||
1730 | db_machine_init(); | |||
1731 | ddb_init(); | |||
1732 | if (boothowto & RB_KDB0x00040) | |||
1733 | db_enter(); | |||
1734 | #endif | |||
1735 | } | |||
1736 | ||||
1737 | void | |||
1738 | cpu_reset(void) | |||
1739 | { | |||
1740 | intr_disable(); | |||
1741 | ||||
1742 | if (cpuresetfn) | |||
1743 | (*cpuresetfn)(); | |||
1744 | ||||
1745 | /* | |||
1746 | * The keyboard controller has 4 random output pins, one of which is | |||
1747 | * connected to the RESET pin on the CPU in many PCs. We tell the | |||
1748 | * keyboard controller to pulse this line a couple of times. | |||
1749 | */ | |||
1750 | outb(IO_KBD + KBCMDP, KBC_PULSE0)( (__builtin_constant_p((0x060 + 4)) && (0x060 + 4) < 0x100) ? __outbc(0x060 + 4, 0xfe) : __outb(0x060 + 4, 0xfe)); | |||
1751 | delay(100000)(*delay_func)(100000); | |||
1752 | outb(IO_KBD + KBCMDP, KBC_PULSE0)( (__builtin_constant_p((0x060 + 4)) && (0x060 + 4) < 0x100) ? __outbc(0x060 + 4, 0xfe) : __outb(0x060 + 4, 0xfe)); | |||
1753 | delay(100000)(*delay_func)(100000); | |||
1754 | ||||
1755 | /* | |||
1756 | * Try to cause a triple fault and watchdog reset by making the IDT | |||
1757 | * invalid and causing a fault. | |||
1758 | */ | |||
1759 | memset((caddr_t)idt, 0, NIDT * sizeof(idt[0]))__builtin_memset(((caddr_t)idt), (0), (256 * sizeof(idt[0]))); | |||
1760 | __asm volatile("divl %0,%1" : : "q" (0), "a" (0)); | |||
1761 | ||||
1762 | for (;;) | |||
1763 | continue; | |||
1764 | /* NOTREACHED */ | |||
1765 | } | |||
1766 | ||||
1767 | /* | |||
1768 | * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers. | |||
1769 | */ | |||
1770 | int | |||
1771 | cpu_dumpsize(void) | |||
1772 | { | |||
1773 | int size; | |||
1774 | ||||
1775 | size = ALIGN(sizeof(kcore_seg_t))(((unsigned long)(sizeof(kcore_seg_t)) + (sizeof(long) - 1)) & ~(sizeof(long) - 1)) + | |||
1776 | ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t))(((unsigned long)(mem_cluster_cnt * sizeof(phys_ram_seg_t)) + (sizeof(long) - 1)) &~(sizeof(long) - 1)); | |||
1777 | if (roundup(size, dbtob(1))((((size)+((((1) << 9))-1))/(((1) << 9)))*(((1) << 9))) != dbtob(1)((1) << 9)) | |||
1778 | return (-1); | |||
1779 | ||||
1780 | return (1); | |||
1781 | } | |||
1782 | ||||
1783 | /* | |||
1784 | * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped. | |||
1785 | */ | |||
1786 | u_long | |||
1787 | cpu_dump_mempagecnt(void) | |||
1788 | { | |||
1789 | u_long i, n; | |||
1790 | ||||
1791 | n = 0; | |||
1792 | for (i = 0; i < mem_cluster_cnt; i++) | |||
1793 | n += atop(mem_clusters[i].size)((mem_clusters[i].size) >> 12); | |||
1794 | return (n); | |||
1795 | } | |||
1796 | ||||
1797 | /* | |||
1798 | * Figure out which portions of memory are used by the kernel/system. | |||
1799 | */ | |||
1800 | int | |||
1801 | amd64_pa_used(paddr_t addr) | |||
1802 | { | |||
1803 | struct vm_page *pg; | |||
1804 | ||||
1805 | /* Kernel manages these */ | |||
1806 | if ((pg = PHYS_TO_VM_PAGE(addr)) && (pg->pg_flags & PG_DEV0x00000200) == 0) | |||
1807 | return 1; | |||
1808 | ||||
1809 | /* Kernel is loaded here */ | |||
1810 | if (addr > IOM_END0x100000 && addr < (kern_end - KERNBASE0xffffffff80000000)) | |||
1811 | return 1; | |||
1812 | ||||
1813 | /* Low memory used for various bootstrap things */ | |||
1814 | if (addr < avail_start) | |||
1815 | return 1; | |||
1816 | ||||
1817 | /* | |||
1818 | * The only regions I can think of that are left are the things | |||
1819 | * we steal away from UVM. The message buffer? | |||
1820 | * XXX - ignore these for now. | |||
1821 | */ | |||
1822 | ||||
1823 | return 0; | |||
1824 | } | |||
1825 | ||||
1826 | void | |||
1827 | cpu_initclocks(void) | |||
1828 | { | |||
1829 | (*initclock_func)(); | |||
1830 | } | |||
1831 | ||||
1832 | void | |||
1833 | need_resched(struct cpu_info *ci) | |||
1834 | { | |||
1835 | ci->ci_want_resched = 1; | |||
1836 | ||||
1837 | /* There's a risk we'll be called before the idle threads start */ | |||
1838 | if (ci->ci_curproc) { | |||
1839 | aston(ci->ci_curproc)((ci->ci_curproc)->p_md.md_astpending = 1); | |||
1840 | cpu_kick(ci); | |||
1841 | } | |||
1842 | } | |||
1843 | ||||
1844 | /* | |||
1845 | * Allocate an IDT vector slot within the given range. | |||
1846 | * XXX needs locking to avoid MP allocation races. | |||
1847 | */ | |||
1848 | ||||
1849 | int | |||
1850 | idt_vec_alloc(int low, int high) | |||
1851 | { | |||
1852 | int vec; | |||
1853 | ||||
1854 | for (vec = low; vec <= high; vec++) { | |||
1855 | if (idt_allocmap[vec] == 0) { | |||
1856 | idt_allocmap[vec] = 1; | |||
1857 | return vec; | |||
1858 | } | |||
1859 | } | |||
1860 | return 0; | |||
1861 | } | |||
1862 | ||||
1863 | void | |||
1864 | idt_vec_set(int vec, void (*function)(void)) | |||
1865 | { | |||
1866 | /* | |||
1867 | * Vector should be allocated, so no locking needed. | |||
1868 | */ | |||
1869 | KASSERT(idt_allocmap[vec] == 1)((idt_allocmap[vec] == 1) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/machdep.c", 1869, "idt_allocmap[vec] == 1" )); | |||
1870 | setgate(&idt[vec], function, 0, SDT_SYS386IGT14, SEL_KPL0, | |||
1871 | GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0)); | |||
1872 | } | |||
1873 | ||||
1874 | void | |||
1875 | idt_vec_free(int vec) | |||
1876 | { | |||
1877 | unsetgate(&idt[vec]); | |||
1878 | idt_allocmap[vec] = 0; | |||
1879 | } | |||
1880 | ||||
1881 | #ifdef DIAGNOSTIC1 | |||
1882 | void | |||
1883 | splassert_check(int wantipl, const char *func) | |||
1884 | { | |||
1885 | int cpl = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_ilevel; | |||
1886 | int floor = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_handled_intr_level; | |||
1887 | ||||
1888 | if (cpl < wantipl) { | |||
1889 | splassert_fail(wantipl, cpl, func); | |||
1890 | } | |||
1891 | if (floor > wantipl) { | |||
1892 | splassert_fail(wantipl, floor, func); | |||
1893 | } | |||
1894 | ||||
1895 | } | |||
1896 | #endif | |||
1897 | ||||
1898 | int | |||
1899 | copyin32(const uint32_t *uaddr, uint32_t *kaddr) | |||
1900 | { | |||
1901 | if ((vaddr_t)uaddr & 0x3) | |||
1902 | return EFAULT14; | |||
1903 | ||||
1904 | /* copyin(9) is atomic */ | |||
1905 | return copyin(uaddr, kaddr, sizeof(uint32_t)); | |||
1906 | } | |||
1907 | ||||
1908 | void | |||
1909 | getbootinfo(char *bootinfo, int bootinfo_size) | |||
1910 | { | |||
1911 | bootarg32_t *q; | |||
1912 | bios_ddb_t *bios_ddb; | |||
1913 | bios_bootduid_t *bios_bootduid; | |||
1914 | bios_bootsr_t *bios_bootsr; | |||
1915 | #undef BOOTINFO_DEBUG | |||
1916 | #ifdef BOOTINFO_DEBUG | |||
1917 | printf("bootargv:"); | |||
1918 | #endif | |||
1919 | ||||
1920 | for (q = (bootarg32_t *)bootinfo; | |||
1921 | (q->ba_type != BOOTARG_END-1) && | |||
1922 | ((((char *)q) - bootinfo) < bootinfo_size); | |||
1923 | q = (bootarg32_t *)(((char *)q) + q->ba_size)) { | |||
1924 | ||||
1925 | switch (q->ba_type) { | |||
1926 | case BOOTARG_MEMMAP0: | |||
1927 | bios_memmap = (bios_memmap_t *)q->ba_arg; | |||
1928 | #ifdef BOOTINFO_DEBUG | |||
1929 | printf(" memmap %p", bios_memmap); | |||
1930 | #endif | |||
1931 | break; | |||
1932 | case BOOTARG_DISKINFO1: | |||
1933 | bios_diskinfo = (bios_diskinfo_t *)q->ba_arg; | |||
1934 | #ifdef BOOTINFO_DEBUG | |||
1935 | printf(" diskinfo %p", bios_diskinfo); | |||
1936 | #endif | |||
1937 | break; | |||
1938 | case BOOTARG_APMINFO2: | |||
1939 | /* generated by i386 boot loader */ | |||
1940 | break; | |||
1941 | case BOOTARG_CKSUMLEN3: | |||
1942 | bios_cksumlen = *(u_int32_t *)q->ba_arg; | |||
1943 | #ifdef BOOTINFO_DEBUG | |||
1944 | printf(" cksumlen %d", bios_cksumlen); | |||
1945 | #endif | |||
1946 | break; | |||
1947 | case BOOTARG_PCIINFO4: | |||
1948 | /* generated by i386 boot loader */ | |||
1949 | break; | |||
1950 | case BOOTARG_CONSDEV5: | |||
1951 | if (q->ba_size >= sizeof(bios_consdev_t) + | |||
1952 | offsetof(struct _boot_args32, ba_arg)__builtin_offsetof(struct _boot_args32, ba_arg)) { | |||
1953 | #if NCOM1 > 0 | |||
1954 | bios_consdev_t *cdp = | |||
1955 | (bios_consdev_t*)q->ba_arg; | |||
1956 | static const int ports[] = | |||
1957 | { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; | |||
1958 | int unit = minor(cdp->consdev)((unsigned)((cdp->consdev) & 0xff) | (((cdp->consdev ) & 0xffff0000) >> 8)); | |||
1959 | int consaddr = cdp->consaddr; | |||
1960 | if (consaddr == -1 && unit >= 0 && | |||
1961 | unit < nitems(ports)(sizeof((ports)) / sizeof((ports)[0]))) | |||
1962 | consaddr = ports[unit]; | |||
1963 | if (major(cdp->consdev)(((unsigned)(cdp->consdev) >> 8) & 0xff) == 8 && | |||
1964 | consaddr != -1) { | |||
1965 | comconsunit = unit; | |||
1966 | comconsaddr = consaddr; | |||
1967 | comconsrate = cdp->conspeed; | |||
1968 | comconsiot = X86_BUS_SPACE_IO(&x86_bus_space_io_ops); | |||
1969 | } | |||
1970 | #endif | |||
1971 | #ifdef BOOTINFO_DEBUG | |||
1972 | printf(" console 0x%x:%d", | |||
1973 | cdp->consdev, cdp->conspeed); | |||
1974 | #endif | |||
1975 | } | |||
1976 | break; | |||
1977 | case BOOTARG_BOOTMAC7: | |||
1978 | bios_bootmac = (bios_bootmac_t *)q->ba_arg; | |||
1979 | break; | |||
1980 | ||||
1981 | case BOOTARG_DDB8: | |||
1982 | bios_ddb = (bios_ddb_t *)q->ba_arg; | |||
1983 | #ifdef DDB1 | |||
1984 | db_console = bios_ddb->db_console; | |||
1985 | #endif | |||
1986 | break; | |||
1987 | ||||
1988 | case BOOTARG_BOOTDUID9: | |||
1989 | bios_bootduid = (bios_bootduid_t *)q->ba_arg; | |||
1990 | memcpy(bootduid, bios_bootduid, sizeof(bootduid))__builtin_memcpy((bootduid), (bios_bootduid), (sizeof(bootduid ))); | |||
1991 | break; | |||
1992 | ||||
1993 | case BOOTARG_BOOTSR10: | |||
1994 | bios_bootsr = (bios_bootsr_t *)q->ba_arg; | |||
1995 | #if NSOFTRAID1 > 0 | |||
1996 | memcpy(&sr_bootuuid, &bios_bootsr->uuid,__builtin_memcpy((&sr_bootuuid), (&bios_bootsr->uuid ), (sizeof(sr_bootuuid))) | |||
1997 | sizeof(sr_bootuuid))__builtin_memcpy((&sr_bootuuid), (&bios_bootsr->uuid ), (sizeof(sr_bootuuid))); | |||
1998 | memcpy(&sr_bootkey, &bios_bootsr->maskkey,__builtin_memcpy((&sr_bootkey), (&bios_bootsr->maskkey ), (sizeof(sr_bootkey))) | |||
1999 | sizeof(sr_bootkey))__builtin_memcpy((&sr_bootkey), (&bios_bootsr->maskkey ), (sizeof(sr_bootkey))); | |||
2000 | #endif | |||
2001 | explicit_bzero(bios_bootsr, sizeof(bios_bootsr_t)); | |||
2002 | break; | |||
2003 | ||||
2004 | case BOOTARG_EFIINFO11: | |||
2005 | bios_efiinfo = (bios_efiinfo_t *)q->ba_arg; | |||
2006 | break; | |||
2007 | ||||
2008 | case BOOTARG_UCODE12: | |||
2009 | bios_ucode = (bios_ucode_t *)q->ba_arg; | |||
2010 | break; | |||
2011 | ||||
2012 | default: | |||
2013 | #ifdef BOOTINFO_DEBUG | |||
2014 | printf(" unsupported arg (%d) %p", q->ba_type, | |||
2015 | q->ba_arg); | |||
2016 | #endif | |||
2017 | break; | |||
2018 | } | |||
2019 | } | |||
2020 | #ifdef BOOTINFO_DEBUG | |||
2021 | printf("\n"); | |||
2022 | #endif | |||
2023 | } | |||
2024 | ||||
2025 | int | |||
2026 | check_context(const struct reg *regs, struct trapframe *tf) | |||
2027 | { | |||
2028 | uint16_t sel; | |||
2029 | ||||
2030 | if (((regs->r_rflags ^ tf->tf_rflags) & PSL_USERSTATIC(0x00000002 | 0xffc08028 | 0x00000200 | 0x00003000 | 0x00004000 | 0x00020000 | 0x00080000 | 0x00100000)) != 0) | |||
2031 | return EINVAL22; | |||
2032 | ||||
2033 | sel = regs->r_ss & 0xffff; | |||
2034 | if (!VALID_USER_DSEL(sel)((sel) == (((4) << 3) | 3))) | |||
2035 | return EINVAL22; | |||
2036 | ||||
2037 | sel = regs->r_cs & 0xffff; | |||
2038 | if (!VALID_USER_CSEL(sel)((sel) == (((5) << 3) | 3))) | |||
2039 | return EINVAL22; | |||
2040 | ||||
2041 | if (regs->r_rip >= VM_MAXUSER_ADDRESS0x00007f7fffffc000) | |||
2042 | return EINVAL22; | |||
2043 | ||||
2044 | return 0; | |||
2045 | } |