File: | sys/syscall_mi.h |
Warning: | line 123, column 2 4th function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: trap.c,v 1.102 2023/12/12 15:30:55 deraadt Exp $ */ | |||
2 | /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ | |||
3 | ||||
4 | /*- | |||
5 | * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. | |||
6 | * All rights reserved. | |||
7 | * | |||
8 | * This code is derived from software contributed to The NetBSD Foundation | |||
9 | * by Charles M. Hannum. | |||
10 | * | |||
11 | * Redistribution and use in source and binary forms, with or without | |||
12 | * modification, are permitted provided that the following conditions | |||
13 | * are met: | |||
14 | * 1. Redistributions of source code must retain the above copyright | |||
15 | * notice, this list of conditions and the following disclaimer. | |||
16 | * 2. Redistributions in binary form must reproduce the above copyright | |||
17 | * notice, this list of conditions and the following disclaimer in the | |||
18 | * documentation and/or other materials provided with the distribution. | |||
19 | * | |||
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
30 | * POSSIBILITY OF SUCH DAMAGE. | |||
31 | */ | |||
32 | ||||
33 | /*- | |||
34 | * Copyright (c) 1990 The Regents of the University of California. | |||
35 | * All rights reserved. | |||
36 | * | |||
37 | * This code is derived from software contributed to Berkeley by | |||
38 | * the University of Utah, and William Jolitz. | |||
39 | * | |||
40 | * Redistribution and use in source and binary forms, with or without | |||
41 | * modification, are permitted provided that the following conditions | |||
42 | * are met: | |||
43 | * 1. Redistributions of source code must retain the above copyright | |||
44 | * notice, this list of conditions and the following disclaimer. | |||
45 | * 2. Redistributions in binary form must reproduce the above copyright | |||
46 | * notice, this list of conditions and the following disclaimer in the | |||
47 | * documentation and/or other materials provided with the distribution. | |||
48 | * 3. Neither the name of the University nor the names of its contributors | |||
49 | * may be used to endorse or promote products derived from this software | |||
50 | * without specific prior written permission. | |||
51 | * | |||
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
62 | * SUCH DAMAGE. | |||
63 | * | |||
64 | * @(#)trap.c 7.4 (Berkeley) 5/13/91 | |||
65 | */ | |||
66 | ||||
67 | /* | |||
68 | * amd64 Trap and System call handling | |||
69 | */ | |||
70 | #undef TRAP_SIGDEBUG | |||
71 | ||||
72 | #include <sys/param.h> | |||
73 | #include <sys/systm.h> | |||
74 | #include <sys/proc.h> | |||
75 | #include <sys/signalvar.h> | |||
76 | #include <sys/user.h> | |||
77 | #include <sys/signal.h> | |||
78 | #include <sys/syscall.h> | |||
79 | #include <sys/syscall_mi.h> | |||
80 | #include <sys/stdarg.h> | |||
81 | ||||
82 | #include <uvm/uvm_extern.h> | |||
83 | ||||
84 | #include <machine/cpu.h> | |||
85 | #include <machine/cpufunc.h> | |||
86 | #include <machine/fpu.h> | |||
87 | #include <machine/psl.h> | |||
88 | #include <machine/trap.h> | |||
89 | #ifdef DDB1 | |||
90 | #include <ddb/db_output.h> | |||
91 | #include <machine/db_machdep.h> | |||
92 | #endif | |||
93 | ||||
94 | #include "isa.h" | |||
95 | ||||
96 | int upageflttrap(struct trapframe *, uint64_t); | |||
97 | int kpageflttrap(struct trapframe *, uint64_t); | |||
98 | void kerntrap(struct trapframe *); | |||
99 | void usertrap(struct trapframe *); | |||
100 | void ast(struct trapframe *); | |||
101 | void syscall(struct trapframe *); | |||
102 | ||||
103 | const char * const trap_type[] = { | |||
104 | "privileged instruction fault", /* 0 T_PRIVINFLT */ | |||
105 | "breakpoint trap", /* 1 T_BPTFLT */ | |||
106 | "arithmetic trap", /* 2 T_ARITHTRAP */ | |||
107 | "reserved trap", /* 3 T_RESERVED */ | |||
108 | "protection fault", /* 4 T_PROTFLT */ | |||
109 | "trace trap", /* 5 T_TRCTRAP */ | |||
110 | "page fault", /* 6 T_PAGEFLT */ | |||
111 | "alignment fault", /* 7 T_ALIGNFLT */ | |||
112 | "integer divide fault", /* 8 T_DIVIDE */ | |||
113 | "non-maskable interrupt", /* 9 T_NMI */ | |||
114 | "overflow trap", /* 10 T_OFLOW */ | |||
115 | "bounds check fault", /* 11 T_BOUND */ | |||
116 | "FPU not available fault", /* 12 T_DNA */ | |||
117 | "double fault", /* 13 T_DOUBLEFLT */ | |||
118 | "FPU operand fetch fault", /* 14 T_FPOPFLT */ | |||
119 | "invalid TSS fault", /* 15 T_TSSFLT */ | |||
120 | "segment not present fault", /* 16 T_SEGNPFLT */ | |||
121 | "stack fault", /* 17 T_STKFLT */ | |||
122 | "machine check", /* 18 T_MCA */ | |||
123 | "SSE FP exception", /* 19 T_XMM */ | |||
124 | "virtualization exception", /* 20 T_VE */ | |||
125 | "control protection exception", /* 21 T_CP */ | |||
126 | }; | |||
127 | const int trap_types = nitems(trap_type)(sizeof((trap_type)) / sizeof((trap_type)[0])); | |||
128 | ||||
129 | #ifdef DEBUG | |||
130 | int trapdebug = 0; | |||
131 | #endif | |||
132 | ||||
133 | static void trap_print(struct trapframe *, int _type); | |||
134 | static inline void frame_dump(struct trapframe *_tf, struct proc *_p, | |||
135 | const char *_sig, uint64_t _cr2); | |||
136 | static inline void verify_smap(const char *_func); | |||
137 | static inline int verify_pkru(struct proc *); | |||
138 | static inline void debug_trap(struct trapframe *_frame, struct proc *_p, | |||
139 | long _type); | |||
140 | ||||
141 | static inline void | |||
142 | fault(const char *fmt, ...) | |||
143 | { | |||
144 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); | |||
145 | va_list ap; | |||
146 | ||||
147 | atomic_cas_ptr(&panicstr, NULL, ci->ci_panicbuf)_atomic_cas_ptr((&panicstr), (((void *)0)), (ci->ci_panicbuf )); | |||
148 | ||||
149 | va_start(ap, fmt)__builtin_va_start((ap), fmt); | |||
150 | vsnprintf(ci->ci_panicbuf, sizeof(ci->ci_panicbuf), fmt, ap); | |||
151 | va_end(ap)__builtin_va_end((ap)); | |||
152 | #ifdef DDB1 | |||
153 | db_printf("%s\n", ci->ci_panicbuf); | |||
154 | #else | |||
155 | printf("%s\n", ci->ci_panicbuf); | |||
156 | #endif | |||
157 | } | |||
158 | ||||
159 | static inline int | |||
160 | pgex2access(int pgex) | |||
161 | { | |||
162 | if (pgex & PGEX_W0x02) | |||
163 | return PROT_WRITE0x02; | |||
164 | else if (pgex & PGEX_I0x10) | |||
165 | return PROT_EXEC0x04; | |||
166 | return PROT_READ0x01; | |||
167 | } | |||
168 | ||||
169 | /* | |||
170 | * upageflttrap(frame, usermode): page fault handler | |||
171 | * Returns non-zero if the fault was handled (possibly by generating | |||
172 | * a signal). Returns zero, possibly still holding the kernel lock, | |||
173 | * if something was so broken that we should panic. | |||
174 | */ | |||
175 | int | |||
176 | upageflttrap(struct trapframe *frame, uint64_t cr2) | |||
177 | { | |||
178 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
179 | vaddr_t va = trunc_page((vaddr_t)cr2)(((vaddr_t)cr2) & ~((1 << 12) - 1)); | |||
180 | vm_prot_t access_type = pgex2access(frame->tf_err); | |||
181 | union sigval sv; | |||
182 | int signal, sicode, error; | |||
183 | ||||
184 | /* | |||
185 | * If NX is not enabled, we cant distinguish between PROT_READ | |||
186 | * and PROT_EXEC access, so try both. | |||
187 | */ | |||
188 | error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); | |||
189 | if (pg_nx == 0 && error == EACCES13 && access_type == PROT_READ0x01) | |||
190 | error = uvm_fault(&p->p_vmspace->vm_map, va, 0, PROT_EXEC0x04); | |||
191 | if (error == 0) { | |||
192 | uvm_grow(p, va); | |||
193 | return 1; | |||
194 | } | |||
195 | ||||
196 | signal = SIGSEGV11; | |||
197 | sicode = SEGV_MAPERR1; | |||
198 | if (error == ENOMEM12) { | |||
199 | printf("UVM: pid %d (%s), uid %d killed:" | |||
200 | " out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm, | |||
201 | p->p_ucred ? (int)p->p_ucred->cr_uid : -1); | |||
202 | signal = SIGKILL9; | |||
203 | } else { | |||
204 | if (error == EACCES13) | |||
205 | sicode = SEGV_ACCERR2; | |||
206 | else if (error == EIO5) { | |||
207 | signal = SIGBUS10; | |||
208 | sicode = BUS_OBJERR3; | |||
209 | } | |||
210 | } | |||
211 | sv.sival_ptr = (void *)cr2; | |||
212 | trapsignal(p, signal, T_PAGEFLT6, sicode, sv); | |||
213 | return 1; | |||
214 | } | |||
215 | ||||
216 | ||||
217 | /* | |||
218 | * kpageflttrap(frame, usermode): page fault handler | |||
219 | * Returns non-zero if the fault was handled (possibly by generating a signal). | |||
220 | * Returns zero if something was so broken that we should panic. | |||
221 | */ | |||
222 | int | |||
223 | kpageflttrap(struct trapframe *frame, uint64_t cr2) | |||
224 | { | |||
225 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
226 | struct pcb *pcb; | |||
227 | vaddr_t va = trunc_page((vaddr_t)cr2)(((vaddr_t)cr2) & ~((1 << 12) - 1)); | |||
228 | struct vm_map *map; | |||
229 | vm_prot_t access_type = pgex2access(frame->tf_err); | |||
230 | caddr_t onfault; | |||
231 | int error; | |||
232 | ||||
233 | if (p == NULL((void *)0) || p->p_addr == NULL((void *)0) || p->p_vmspace == NULL((void *)0)) | |||
234 | return 0; | |||
235 | ||||
236 | pcb = &p->p_addr->u_pcb; | |||
237 | if (pcb->pcb_onfault != NULL((void *)0)) { | |||
238 | extern caddr_t __nofault_start[], __nofault_end[]; | |||
239 | caddr_t *nf = __nofault_start; | |||
240 | while (*nf++ != pcb->pcb_onfault) { | |||
241 | if (nf >= __nofault_end) { | |||
242 | fault("invalid pcb_nofault=%lx", | |||
243 | (long)pcb->pcb_onfault); | |||
244 | return 0; | |||
245 | } | |||
246 | } | |||
247 | } | |||
248 | ||||
249 | /* This will only trigger if SMEP is enabled */ | |||
250 | if (pcb->pcb_onfault == NULL((void *)0) && cr2 <= VM_MAXUSER_ADDRESS0x00007f7fffffc000 && | |||
251 | frame->tf_err & PGEX_I0x10) { | |||
252 | fault("attempt to execute user address %p " | |||
253 | "in supervisor mode", (void *)cr2); | |||
254 | return 0; | |||
255 | } | |||
256 | /* This will only trigger if SMAP is enabled */ | |||
257 | if (pcb->pcb_onfault == NULL((void *)0) && cr2 <= VM_MAXUSER_ADDRESS0x00007f7fffffc000 && | |||
258 | frame->tf_err & PGEX_P0x01) { | |||
259 | fault("attempt to access user address %p " | |||
260 | "in supervisor mode", (void *)cr2); | |||
261 | return 0; | |||
262 | } | |||
263 | ||||
264 | /* | |||
265 | * It is only a kernel address space fault iff: | |||
266 | * 1. when running in ring 0 and | |||
267 | * 2. pcb_onfault not set or | |||
268 | * 3. pcb_onfault set but supervisor space fault | |||
269 | * The last can occur during an exec() copyin where the | |||
270 | * argument space is lazy-allocated. | |||
271 | */ | |||
272 | map = &p->p_vmspace->vm_map; | |||
273 | if (va >= VM_MIN_KERNEL_ADDRESS0xffff800000000000) | |||
274 | map = kernel_map; | |||
275 | ||||
276 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_inatomic == 0 || map == kernel_map) { | |||
277 | onfault = pcb->pcb_onfault; | |||
278 | pcb->pcb_onfault = NULL((void *)0); | |||
279 | error = uvm_fault(map, va, 0, access_type); | |||
280 | pcb->pcb_onfault = onfault; | |||
281 | ||||
282 | if (error == 0 && map != kernel_map) | |||
283 | uvm_grow(p, va); | |||
284 | } else | |||
285 | error = EFAULT14; | |||
286 | ||||
287 | if (error) { | |||
288 | if (pcb->pcb_onfault == NULL((void *)0)) { | |||
289 | /* bad memory access in the kernel */ | |||
290 | fault("uvm_fault(%p, 0x%llx, 0, %d) -> %x", | |||
291 | map, cr2, access_type, error); | |||
292 | return 0; | |||
293 | } | |||
294 | frame->tf_rip = (u_int64_t)pcb->pcb_onfault; | |||
295 | } | |||
296 | ||||
297 | return 1; | |||
298 | } | |||
299 | ||||
300 | ||||
301 | /* | |||
302 | * kerntrap(frame): | |||
303 | * Exception, fault, and trap interface to BSD kernel. This | |||
304 | * common code is called from assembly language IDT gate entry | |||
305 | * routines that prepare a suitable stack frame, and restore this | |||
306 | * frame after the exception has been processed. | |||
307 | */ | |||
308 | void | |||
309 | kerntrap(struct trapframe *frame) | |||
310 | { | |||
311 | int type = (int)frame->tf_trapno; | |||
312 | uint64_t cr2 = rcr2(); | |||
313 | ||||
314 | verify_smap(__func__); | |||
315 | uvmexp.traps++; | |||
316 | debug_trap(frame, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc, type); | |||
317 | ||||
318 | switch (type) { | |||
319 | ||||
320 | default: | |||
321 | we_re_toast: | |||
322 | #ifdef DDB1 | |||
323 | if (db_ktrap(type, frame->tf_err, frame)) | |||
324 | return; | |||
325 | #endif | |||
326 | trap_print(frame, type); | |||
327 | panic("trap type %d, code=%llx, pc=%llx", | |||
328 | type, frame->tf_err, frame->tf_rip); | |||
329 | /*NOTREACHED*/ | |||
330 | ||||
331 | case T_PAGEFLT6: /* allow page faults in kernel mode */ | |||
332 | if (kpageflttrap(frame, cr2)) | |||
333 | return; | |||
334 | goto we_re_toast; | |||
335 | ||||
336 | #if NISA1 > 0 | |||
337 | case T_NMI9: | |||
338 | #ifdef DDB1 | |||
339 | /* NMI can be hooked up to a pushbutton for debugging */ | |||
340 | printf ("NMI ... going to debugger\n"); | |||
341 | if (db_ktrap(type, 0, frame)) | |||
342 | return; | |||
343 | #endif | |||
344 | /* machine/parity/power fail/"kitchen sink" faults */ | |||
345 | ||||
346 | if (x86_nmi() != 0) | |||
347 | goto we_re_toast; | |||
348 | else | |||
349 | return; | |||
350 | #endif /* NISA > 0 */ | |||
351 | } | |||
352 | } | |||
353 | ||||
354 | /* If we find out userland changed the pkru register, punish the process */ | |||
355 | static inline int | |||
356 | verify_pkru(struct proc *p) | |||
357 | { | |||
358 | if (pg_xo == 0 || rdpkru(0) == PGK_VALUE0xfffffffc) | |||
359 | return 0; | |||
360 | KERNEL_LOCK()_kernel_lock(); | |||
361 | sigabort(p); | |||
362 | KERNEL_UNLOCK()_kernel_unlock(); | |||
363 | return 1; | |||
364 | } | |||
365 | ||||
366 | /* | |||
367 | * usertrap(frame): handler for exceptions, faults, and traps from userspace | |||
368 | * This is called from the assembly language IDT gate entries | |||
369 | * which prepare a suitable stack frame and restores the CPU state | |||
370 | * after the fault has been processed. | |||
371 | */ | |||
372 | void | |||
373 | usertrap(struct trapframe *frame) | |||
374 | { | |||
375 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
376 | int type = (int)frame->tf_trapno; | |||
377 | uint64_t cr2 = rcr2(); | |||
378 | union sigval sv; | |||
379 | int sig, code; | |||
380 | ||||
381 | verify_smap(__func__); | |||
382 | uvmexp.traps++; | |||
383 | debug_trap(frame, p, type); | |||
384 | ||||
385 | p->p_md.md_regs = frame; | |||
386 | refreshcreds(p); | |||
387 | ||||
388 | if (verify_pkru(p)) | |||
389 | goto out; | |||
390 | ||||
391 | switch (type) { | |||
392 | case T_TSSFLT15: | |||
393 | sig = SIGBUS10; | |||
394 | code = BUS_OBJERR3; | |||
395 | break; | |||
396 | case T_PROTFLT4: /* protection fault */ | |||
397 | case T_SEGNPFLT16: | |||
398 | case T_STKFLT17: | |||
399 | frame_dump(frame, p, "SEGV", 0); | |||
400 | sig = SIGSEGV11; | |||
401 | code = SEGV_MAPERR1; | |||
402 | break; | |||
403 | case T_ALIGNFLT7: | |||
404 | sig = SIGBUS10; | |||
405 | code = BUS_ADRALN1; | |||
406 | break; | |||
407 | case T_PRIVINFLT0: /* privileged instruction fault */ | |||
408 | sig = SIGILL4; | |||
409 | code = ILL_PRVOPC5; | |||
410 | break; | |||
411 | case T_DIVIDE8: | |||
412 | sig = SIGFPE8; | |||
413 | code = FPE_INTDIV1; | |||
414 | break; | |||
415 | case T_ARITHTRAP2: | |||
416 | case T_XMM19: /* real arithmetic exceptions */ | |||
417 | sig = SIGFPE8; | |||
418 | code = fputrap(type); | |||
419 | break; | |||
420 | case T_BPTFLT1: /* bpt instruction fault */ | |||
421 | case T_TRCTRAP5: /* trace trap */ | |||
422 | sig = SIGTRAP5; | |||
423 | code = TRAP_BRKPT1; | |||
424 | break; | |||
425 | case T_CP21: | |||
426 | sig = SIGILL4; | |||
427 | code = (frame->tf_err & 0x7fff) < 4 ? ILL_ILLOPC1 | |||
428 | : ILL_BADSTK8; | |||
429 | break; | |||
430 | ||||
431 | case T_PAGEFLT6: /* page fault */ | |||
432 | if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p)((p)->p_md.md_regs->tf_rsp), | |||
433 | "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", | |||
434 | uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) | |||
435 | goto out; | |||
436 | if (upageflttrap(frame, cr2)) | |||
437 | goto out; | |||
438 | /* FALLTHROUGH */ | |||
439 | ||||
440 | default: | |||
441 | trap_print(frame, type); | |||
442 | panic("impossible trap"); | |||
443 | } | |||
444 | ||||
445 | sv.sival_ptr = (void *)frame->tf_rip; | |||
446 | trapsignal(p, sig, type, code, sv); | |||
447 | ||||
448 | out: | |||
449 | userret(p); | |||
450 | } | |||
451 | ||||
452 | ||||
453 | static void | |||
454 | trap_print(struct trapframe *frame, int type) | |||
455 | { | |||
456 | if (type < trap_types) | |||
457 | printf("fatal %s", trap_type[type]); | |||
458 | else | |||
459 | printf("unknown trap %d", type); | |||
460 | printf(" in %s mode\n", KERNELMODE(frame->tf_cs, frame->tf_rflags)(((frame->tf_cs) & 3) == 0) ? | |||
461 | "supervisor" : "user"); | |||
462 | printf("trap type %d code %llx rip %llx cs %llx rflags %llx cr2 " | |||
463 | "%llx cpl %x rsp %llx\n", | |||
464 | type, frame->tf_err, frame->tf_rip, frame->tf_cs, | |||
465 | frame->tf_rflags, rcr2(), curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_ilevel, frame->tf_rsp); | |||
466 | printf("gsbase %p kgsbase %p\n", | |||
467 | (void *)rdmsr(MSR_GSBASE0xc0000101), (void *)rdmsr(MSR_KERNELGSBASE0xc0000102)); | |||
468 | } | |||
469 | ||||
470 | ||||
471 | static inline void | |||
472 | frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2) | |||
473 | { | |||
474 | #ifdef TRAP_SIGDEBUG | |||
475 | printf("pid %d (%s): %s at rip %llx addr %llx\n", | |||
476 | p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2); | |||
477 | printf("rip %p cs 0x%x rfl %p rsp %p ss 0x%x\n", | |||
478 | (void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff, | |||
479 | (void *)tf->tf_rflags, | |||
480 | (void *)tf->tf_rsp, (unsigned)tf->tf_ss & 0xffff); | |||
481 | printf("err 0x%llx trapno 0x%llx\n", | |||
482 | tf->tf_err, tf->tf_trapno); | |||
483 | printf("rdi %p rsi %p rdx %p\n", | |||
484 | (void *)tf->tf_rdi, (void *)tf->tf_rsi, (void *)tf->tf_rdx); | |||
485 | printf("rcx %p r8 %p r9 %p\n", | |||
486 | (void *)tf->tf_rcx, (void *)tf->tf_r8, (void *)tf->tf_r9); | |||
487 | printf("r10 %p r11 %p r12 %p\n", | |||
488 | (void *)tf->tf_r10, (void *)tf->tf_r11, (void *)tf->tf_r12); | |||
489 | printf("r13 %p r14 %p r15 %p\n", | |||
490 | (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15); | |||
491 | printf("rbp %p rbx %p rax %p\n", | |||
492 | (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax); | |||
493 | #endif | |||
494 | } | |||
495 | ||||
496 | static inline void | |||
497 | verify_smap(const char *func) | |||
498 | { | |||
499 | #ifdef DIAGNOSTIC1 | |||
500 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000) { | |||
501 | u_long rf = read_rflags(); | |||
502 | if (rf & PSL_AC0x00040000) { | |||
503 | write_rflags(rf & ~PSL_AC0x00040000); | |||
504 | panic("%s: AC set on entry", func); | |||
505 | } | |||
506 | } | |||
507 | #endif | |||
508 | } | |||
509 | ||||
510 | static inline void | |||
511 | debug_trap(struct trapframe *frame, struct proc *p, long type) | |||
512 | { | |||
513 | #ifdef DEBUG | |||
514 | if (trapdebug) { | |||
515 | printf("trap %ld code %llx rip %llx cs %llx rflags %llx " | |||
516 | "cr2 %llx cpl %x\n", | |||
517 | type, frame->tf_err, frame->tf_rip, frame->tf_cs, | |||
518 | frame->tf_rflags, rcr2(), curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_ilevel); | |||
519 | printf("curproc %p\n", (void *)p); | |||
520 | if (p != NULL((void *)0)) | |||
521 | printf("pid %d\n", p->p_p->ps_pid); | |||
522 | } | |||
523 | #endif | |||
524 | } | |||
525 | ||||
526 | /* | |||
527 | * ast(frame): | |||
528 | * AST handler. This is called from assembly language stubs when | |||
529 | * returning to userspace after a syscall or interrupt. | |||
530 | */ | |||
531 | void | |||
532 | ast(struct trapframe *frame) | |||
533 | { | |||
534 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
535 | ||||
536 | uvmexp.traps++; | |||
537 | KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_rflags))((!(((frame->tf_cs) & 3) == 0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/trap.c", 537, "!KERNELMODE(frame->tf_cs, frame->tf_rflags)" )); | |||
538 | p->p_md.md_regs = frame; | |||
539 | refreshcreds(p); | |||
540 | uvmexp.softs++; | |||
541 | mi_ast(p, curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_want_resched); | |||
542 | userret(p); | |||
543 | } | |||
544 | ||||
545 | ||||
546 | /* | |||
547 | * syscall(frame): | |||
548 | * System call request from POSIX system call gate interface to kernel. | |||
549 | */ | |||
550 | void | |||
551 | syscall(struct trapframe *frame) | |||
552 | { | |||
553 | caddr_t params; | |||
554 | const struct sysent *callp; | |||
555 | struct proc *p; | |||
556 | int error = ENOSYS78; | |||
557 | size_t argsize, argoff; | |||
558 | register_t code, args[9], rval[2], *argp; | |||
559 | ||||
560 | verify_smap(__func__); | |||
561 | uvmexp.syscalls++; | |||
562 | p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
563 | ||||
564 | if (verify_pkru(p)) { | |||
| ||||
565 | userret(p); | |||
566 | return; | |||
567 | } | |||
568 | ||||
569 | code = frame->tf_rax; | |||
570 | argp = &args[0]; | |||
571 | argoff = 0; | |||
572 | ||||
573 | if (code <= 0 || code >= SYS_MAXSYSCALL331) | |||
574 | goto bad; | |||
575 | callp = sysent + code; | |||
576 | argsize = (callp->sy_argsize >> 3) + argoff; | |||
577 | if (argsize) { | |||
578 | switch (MIN(argsize, 6)(((argsize)<(6))?(argsize):(6))) { | |||
579 | case 6: | |||
580 | args[5] = frame->tf_r9; | |||
581 | case 5: | |||
582 | args[4] = frame->tf_r8; | |||
583 | case 4: | |||
584 | args[3] = frame->tf_r10; | |||
585 | case 3: | |||
586 | args[2] = frame->tf_rdx; | |||
587 | case 2: | |||
588 | args[1] = frame->tf_rsi; | |||
589 | case 1: | |||
590 | args[0] = frame->tf_rdi; | |||
591 | break; | |||
592 | default: | |||
593 | panic("impossible syscall argsize"); | |||
594 | } | |||
595 | if (argsize > 6) { | |||
596 | argsize -= 6; | |||
597 | params = (caddr_t)frame->tf_rsp + sizeof(register_t); | |||
598 | if ((error = copyin(params, &args[6], argsize << 3))) | |||
599 | goto bad; | |||
600 | } | |||
601 | } | |||
602 | ||||
603 | rval[0] = 0; | |||
604 | rval[1] = 0; | |||
605 | ||||
606 | error = mi_syscall(p, code, callp, argp, rval); | |||
607 | ||||
608 | switch (error) { | |||
609 | case 0: | |||
610 | frame->tf_rax = rval[0]; | |||
611 | frame->tf_rflags &= ~PSL_C0x00000001; /* carry bit */ | |||
612 | break; | |||
613 | case ERESTART-1: | |||
614 | /* Back up over the syscall instruction (2 bytes) */ | |||
615 | frame->tf_rip -= 2; | |||
616 | break; | |||
617 | case EJUSTRETURN-2: | |||
618 | /* nothing to do */ | |||
619 | break; | |||
620 | default: | |||
621 | bad: | |||
622 | frame->tf_rax = error; | |||
623 | frame->tf_rflags |= PSL_C0x00000001; /* carry bit */ | |||
624 | break; | |||
625 | } | |||
626 | ||||
627 | mi_syscall_return(p, code, error, rval); | |||
628 | } | |||
629 | ||||
630 | void | |||
631 | child_return(void *arg) | |||
632 | { | |||
633 | struct proc *p = arg; | |||
634 | struct trapframe *tf = p->p_md.md_regs; | |||
635 | ||||
636 | tf->tf_rax = 0; | |||
637 | tf->tf_rflags &= ~PSL_C0x00000001; | |||
638 | ||||
639 | KERNEL_UNLOCK()_kernel_unlock(); | |||
640 | ||||
641 | mi_child_return(p); | |||
642 | } | |||
643 |
1 | /* $OpenBSD: syscall_mi.h,v 1.29 2023/12/12 15:30:55 deraadt Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 1982, 1986, 1989, 1993 | |||
5 | * The Regents of the University of California. All rights reserved. | |||
6 | * | |||
7 | * Redistribution and use in source and binary forms, with or without | |||
8 | * modification, are permitted provided that the following conditions | |||
9 | * are met: | |||
10 | * 1. Redistributions of source code must retain the above copyright | |||
11 | * notice, this list of conditions and the following disclaimer. | |||
12 | * 2. Redistributions in binary form must reproduce the above copyright | |||
13 | * notice, this list of conditions and the following disclaimer in the | |||
14 | * documentation and/or other materials provided with the distribution. | |||
15 | * 3. Neither the name of the University nor the names of its contributors | |||
16 | * may be used to endorse or promote products derived from this software | |||
17 | * without specific prior written permission. | |||
18 | * | |||
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
29 | * SUCH DAMAGE. | |||
30 | * | |||
31 | * @(#)kern_xxx.c 8.2 (Berkeley) 11/14/93 | |||
32 | */ | |||
33 | ||||
34 | #include <sys/param.h> | |||
35 | #include <sys/pledge.h> | |||
36 | #include <sys/tracepoint.h> | |||
37 | #include <sys/syscall.h> | |||
38 | #include <uvm/uvm_extern.h> | |||
39 | ||||
40 | #ifdef KTRACE1 | |||
41 | #include <sys/ktrace.h> | |||
42 | #endif | |||
43 | ||||
44 | #include "dt.h" | |||
45 | #if NDT1 > 0 | |||
46 | #include <dev/dt/dtvar.h> | |||
47 | #endif | |||
48 | ||||
49 | ||||
50 | /* | |||
51 | * The MD setup for a system call has been done; here's the MI part. | |||
52 | */ | |||
53 | static inline int | |||
54 | mi_syscall(struct proc *p, register_t code, const struct sysent *callp, | |||
55 | register_t *argp, register_t retval[2]) | |||
56 | { | |||
57 | uint64_t tval; | |||
58 | int lock = !(callp->sy_flags & SY_NOLOCK0x01); | |||
59 | int error, pledged; | |||
60 | ||||
61 | /* refresh the thread's cache of the process's creds */ | |||
62 | refreshcreds(p); | |||
63 | ||||
64 | #ifdef SYSCALL_DEBUG | |||
65 | KERNEL_LOCK()_kernel_lock(); | |||
66 | scdebug_call(p, code, argp); | |||
67 | KERNEL_UNLOCK()_kernel_unlock(); | |||
68 | #endif | |||
69 | TRACEPOINT(raw_syscalls, sys_enter, code, NULL)do { extern struct dt_probe (dt_static_raw_syscalls_sys_enter ); struct dt_probe *dtp = &(dt_static_raw_syscalls_sys_enter ); if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect (((dtp->dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv->dtpv_enter(dtpv, dtp, code, ((void *)0)); } } while (0); | |||
70 | #if NDT1 > 0 | |||
71 | DT_ENTER(syscall, code, callp->sy_argsize, argp)do { extern struct dt_provider dt_prov_syscall ; struct dt_provider *dtpv = &dt_prov_syscall ; if (__builtin_expect(((dt_tracing ) != 0), 0) && __builtin_expect(((dtpv->dtpv_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, code, callp->sy_argsize , argp); } } while (0); | |||
72 | #endif | |||
73 | #ifdef KTRACE1 | |||
74 | if (KTRPOINT(p, KTR_SYSCALL)((p)->p_p->ps_traceflag & (1<<(1)) && ((p)->p_flag & 0x00000001) == 0)) { | |||
75 | /* convert to mask, then include with code */ | |||
76 | KERNEL_LOCK()_kernel_lock(); | |||
77 | ktrsyscall(p, code, callp->sy_argsize, argp); | |||
78 | KERNEL_UNLOCK()_kernel_unlock(); | |||
79 | } | |||
80 | #endif | |||
81 | ||||
82 | /* SP must be within MAP_STACK space */ | |||
83 | if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p)((p)->p_md.md_regs->tf_rsp), | |||
84 | "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", | |||
85 | uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) | |||
86 | return (EPERM1); | |||
87 | ||||
88 | /* PC must be in un-writeable permitted text (sigtramp, libc, ld.so) */ | |||
89 | if (!uvm_map_inentry(p, &p->p_pcinentry, PROC_PC(p)((p)->p_md.md_regs->tf_rip), | |||
90 | "[%s]%d/%d pc=%lx inside %lx-%lx: bogus syscall\n", | |||
91 | uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial)) | |||
92 | return (EPERM1); | |||
93 | ||||
94 | pledged = (p->p_p->ps_flags & PS_PLEDGE0x00100000); | |||
95 | if (pledged && (error = pledge_syscall(p, code, &tval))) { | |||
96 | KERNEL_LOCK()_kernel_lock(); | |||
97 | error = pledge_fail(p, error, tval); | |||
98 | KERNEL_UNLOCK()_kernel_unlock(); | |||
99 | return (error); | |||
100 | } | |||
101 | if (lock) | |||
102 | KERNEL_LOCK()_kernel_lock(); | |||
103 | error = (*callp->sy_call)(p, argp, retval); | |||
104 | if (lock) | |||
105 | KERNEL_UNLOCK()_kernel_unlock(); | |||
106 | ||||
107 | return (error); | |||
108 | } | |||
109 | ||||
110 | /* | |||
111 | * Finish MI stuff on return, after the registers have been set | |||
112 | */ | |||
113 | static inline void | |||
114 | mi_syscall_return(struct proc *p, register_t code, int error, | |||
115 | const register_t retval[2]) | |||
116 | { | |||
117 | #ifdef SYSCALL_DEBUG | |||
118 | KERNEL_LOCK()_kernel_lock(); | |||
119 | scdebug_ret(p, code, error, retval); | |||
120 | KERNEL_UNLOCK()_kernel_unlock(); | |||
121 | #endif | |||
122 | #if NDT1 > 0 | |||
123 | DT_LEAVE(syscall, code, error, retval[0], retval[1])do { extern struct dt_provider dt_prov_syscall ; struct dt_provider *dtpv = &dt_prov_syscall ; if (__builtin_expect(((dt_tracing ) != 0), 0) && __builtin_expect(((dtpv->dtpv_recording ) != 0), 0)) { dtpv->dtpv_leave(dtpv, code, error, retval[ 0], retval[1]); } } while (0); | |||
| ||||
124 | #endif | |||
125 | TRACEPOINT(raw_syscalls, sys_exit, code, NULL)do { extern struct dt_probe (dt_static_raw_syscalls_sys_exit) ; struct dt_probe *dtp = &(dt_static_raw_syscalls_sys_exit ); if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect (((dtp->dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv->dtpv_enter(dtpv, dtp, code, ((void *)0)); } } while (0); | |||
126 | ||||
127 | userret(p); | |||
128 | ||||
129 | #ifdef KTRACE1 | |||
130 | if (KTRPOINT(p, KTR_SYSRET)((p)->p_p->ps_traceflag & (1<<(2)) && ((p)->p_flag & 0x00000001) == 0)) { | |||
131 | KERNEL_LOCK()_kernel_lock(); | |||
132 | ktrsysret(p, code, error, retval); | |||
133 | KERNEL_UNLOCK()_kernel_unlock(); | |||
134 | } | |||
135 | #endif | |||
136 | } | |||
137 | ||||
138 | /* | |||
139 | * Finish MI stuff for a new process/thread to return | |||
140 | */ | |||
141 | static inline void | |||
142 | mi_child_return(struct proc *p) | |||
143 | { | |||
144 | #if defined(SYSCALL_DEBUG) || defined(KTRACE1) || NDT1 > 0 | |||
145 | int code = (p->p_flag & P_THREAD0x04000000) ? SYS___tfork8 : | |||
146 | (p->p_p->ps_flags & PS_PPWAIT0x00000040) ? SYS_vfork66 : SYS_fork2; | |||
147 | const register_t child_retval[2] = { 0, 1 }; | |||
148 | #endif | |||
149 | ||||
150 | TRACEPOINT(sched, on__cpu, NULL)do { extern struct dt_probe (dt_static_sched_on__cpu); struct dt_probe *dtp = &(dt_static_sched_on__cpu); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, ((void *)0)); } } while (0); | |||
151 | ||||
152 | #ifdef SYSCALL_DEBUG | |||
153 | KERNEL_LOCK()_kernel_lock(); | |||
154 | scdebug_ret(p, code, 0, child_retval); | |||
155 | KERNEL_UNLOCK()_kernel_unlock(); | |||
156 | #endif | |||
157 | #if NDT1 > 0 | |||
158 | DT_LEAVE(syscall, code, 0, child_retval[0], child_retval[1])do { extern struct dt_provider dt_prov_syscall ; struct dt_provider *dtpv = &dt_prov_syscall ; if (__builtin_expect(((dt_tracing ) != 0), 0) && __builtin_expect(((dtpv->dtpv_recording ) != 0), 0)) { dtpv->dtpv_leave(dtpv, code, 0, child_retval [0], child_retval[1]); } } while (0); | |||
159 | #endif | |||
160 | TRACEPOINT(raw_syscalls, sys_exit, code, NULL)do { extern struct dt_probe (dt_static_raw_syscalls_sys_exit) ; struct dt_probe *dtp = &(dt_static_raw_syscalls_sys_exit ); if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect (((dtp->dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv->dtpv_enter(dtpv, dtp, code, ((void *)0)); } } while (0); | |||
161 | ||||
162 | userret(p); | |||
163 | ||||
164 | #ifdef KTRACE1 | |||
165 | if (KTRPOINT(p, KTR_SYSRET)((p)->p_p->ps_traceflag & (1<<(2)) && ((p)->p_flag & 0x00000001) == 0)) { | |||
166 | KERNEL_LOCK()_kernel_lock(); | |||
167 | ktrsysret(p, code, 0, child_retval); | |||
168 | KERNEL_UNLOCK()_kernel_unlock(); | |||
169 | } | |||
170 | #endif | |||
171 | } | |||
172 | ||||
173 | /* | |||
174 | * Do the specific processing necessary for an AST | |||
175 | */ | |||
176 | static inline void | |||
177 | mi_ast(struct proc *p, int resched) | |||
178 | { | |||
179 | if (p->p_flag & P_OWEUPC0x00008000) { | |||
180 | KERNEL_LOCK()_kernel_lock(); | |||
181 | ADDUPROF(p)do { x86_atomic_clearbits_u32(&(p)->p_flag, 0x00008000 ); addupc_task((p), (p)->p_prof_addr, (p)->p_prof_ticks ); (p)->p_prof_ticks = 0; } while (0); | |||
182 | KERNEL_UNLOCK()_kernel_unlock(); | |||
183 | } | |||
184 | if (resched) | |||
185 | preempt(); | |||
186 | ||||
187 | /* | |||
188 | * XXX could move call to userret() here, but | |||
189 | * hppa calls ast() in syscall return and sh calls | |||
190 | * it after userret() | |||
191 | */ | |||
192 | } |