/* FILE: arch/i386/kernel/entry.S */ 1 /* 2 * linux/arch/i386/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * entry.S contains the system-call and fault low-level 9 * handling routines. This also contains the 10 * timer-interrupt handler, as well as all interrupts and 11 * faults that can result in a task-switch. 12 * 13 * NOTE: This code handles signal-recognition, which 14 * happens every time after a timer-interrupt and after 15 * each system call. 16 * 17 * I changed all the .align's to 4 (16 byte alignment), 18 * as that's faster on a 486. 19 * 20 * Stack layout in 'ret_from_system_call': 21 * ptrace needs to have all regs on the stack. 22 * if the order here is changed, it needs to be 23 * updated in fork.c:copy_process, 24 * signal.c:do_signal, ptrace.c and ptrace.h 25 * 26 * 0(%esp) - %ebx 27 * 4(%esp) - %ecx 28 * 8(%esp) - %edx 29 * C(%esp) - %esi 30 * 10(%esp) - %edi 31 * 14(%esp) - %ebp 32 * 18(%esp) - %eax 33 * 1C(%esp) - %ds 34 * 20(%esp) - %es 35 * 24(%esp) - orig_eax 36 * 28(%esp) - %eip 37 * 2C(%esp) - %cs 38 * 30(%esp) - %eflags 39 * 34(%esp) - %oldesp 40 * 38(%esp) - %oldss 41 * 42 * "current" is in register %ebx during any slow entries. 43 */ 44 45 #include 46 #include 47 #include 48 #define ASSEMBLY 49 #include 50 51 EBX = 0x00 52 ECX = 0x04 53 EDX = 0x08 54 ESI = 0x0C 55 EDI = 0x10 56 EBP = 0x14 57 EAX = 0x18 58 DS = 0x1C 59 ES = 0x20 60 ORIG_EAX = 0x24 61 EIP = 0x28 62 CS = 0x2C 63 EFLAGS = 0x30 64 OLDESP = 0x34 65 OLDSS = 0x38 66 67 CF_MASK = 0x00000001 68 IF_MASK = 0x00000200 69 NT_MASK = 0x00004000 70 VM_MASK = 0x00020000 71 72 /* 73 * these are offsets into the task-struct. 74 */ 75 state = 0 76 flags = 4 77 sigpending = 8 78 addr_limit = 12 79 exec_domain = 16 80 need_resched = 20 81 82 ENOSYS = 38 83 84 85 #define SAVE_ALL \ 86 cld; \ 87 pushl %es; \ 88 pushl %ds; \ 89 pushl %eax; \ 90 pushl %ebp; \ 91 pushl %edi; \ 92 pushl %esi; \ 93 pushl %edx; \ 94 pushl %ecx; \ 95 pushl %ebx; \ 96 movl $(__KERNEL_DS),%edx; \ 97 movl %dx,%ds; \ 98 movl %dx,%es; 99 100 #define RESTORE_ALL \ 101 popl %ebx; \ 102 popl %ecx; \ 103 popl %edx; \ 104 popl %esi; \ 105 popl %edi; \ 106 popl %ebp; \ 107 popl %eax; \ 108 1: popl %ds; \ 109 2: popl %es; \ 110 addl $4,%esp; \ 111 3: iret; \ 112 .section .fixup,"ax"; \ 113 4: movl $0,(%esp); \ 114 jmp 1b; \ 115 5: movl $0,(%esp); \ 116 jmp 2b; \ 117 6: pushl %ss; \ 118 popl %ds; \ 119 pushl %ss; \ 120 popl %es; \ 121 pushl $11; \ 122 call do_exit; \ 123 .previous; \ 124 .section __ex_table,"a"; \ 125 .align 4; \ 126 .long 1b,4b; \ 127 .long 2b,5b; \ 128 .long 3b,6b; \ 129 .previous 130 131 #define GET_CURRENT(reg) \ 132 movl %esp, reg; \ 133 andl $-8192, reg; 134 135 ENTRY(lcall7) 136 pushfl # We get a different stack layout with call 137 pushl %eax # gates, which has to be cleaned up later.. 138 SAVE_ALL 139 movl EIP(%esp),%eax # this is eflags, not eip.. 140 movl CS(%esp),%edx # this is eip.. 141 movl EFLAGS(%esp),%ecx # and this is cs.. 142 movl %eax,EFLAGS(%esp) # 143 movl %edx,EIP(%esp) # move to their "normal" places 144 movl %ecx,CS(%esp) # 145 movl %esp,%ebx 146 pushl %ebx 147 andl $-8192,%ebx # GET_CURRENT 148 movl exec_domain(%ebx),%edx # Get the execution domain 149 movl 4(%edx),%edx # Get lcall7 handler for domain 150 call *%edx 151 popl %eax 152 jmp ret_from_sys_call 153 154 155 ALIGN 156 .globl ret_from_fork 157 ret_from_fork: 158 #ifdef __SMP__ 159 call SYMBOL_NAME(schedule_tail) 160 #endif /* __SMP__ */ 161 GET_CURRENT(%ebx) 162 jmp ret_from_sys_call 163 164 /* 165 * Return to user mode is not as complex as all this 166 * looks, but we want the default path for a system call 167 * return to go as quickly as possible which is why some 168 * of this is less clear than it otherwise should be. 169 */ 170 171 ENTRY(system_call) 172 pushl %eax # save orig_eax 173 SAVE_ALL 174 GET_CURRENT(%ebx) 175 cmpl $(NR_syscalls),%eax 176 jae badsys 177 testb $0x20,flags(%ebx) # PF_TRACESYS 178 jne tracesys 179 call *SYMBOL_NAME(sys_call_table)(,%eax,4) 180 movl %eax,EAX(%esp) # save the return value 181 ALIGN 182 .globl ret_from_sys_call 183 .globl ret_from_intr 184 ret_from_sys_call: 185 movl SYMBOL_NAME(bh_mask),%eax 186 andl SYMBOL_NAME(bh_active),%eax 187 jne handle_bottom_half 188 ret_with_reschedule: 189 cmpl $0,need_resched(%ebx) 190 jne reschedule 191 cmpl $0,sigpending(%ebx) 192 jne signal_return 193 restore_all: 194 RESTORE_ALL 195 196 ALIGN 197 signal_return: 198 sti # we can get here from an interrupt handler 199 testl $(VM_MASK),EFLAGS(%esp) 200 movl %esp,%eax 201 jne v86_signal_return 202 xorl %edx,%edx 203 call SYMBOL_NAME(do_signal) 204 jmp restore_all 205 206 ALIGN 207 v86_signal_return: 208 call SYMBOL_NAME(save_v86_state) 209 movl %eax,%esp 210 xorl %edx,%edx 211 call SYMBOL_NAME(do_signal) 212 jmp restore_all 213 214 ALIGN 215 tracesys: 216 movl $-ENOSYS,EAX(%esp) 217 call SYMBOL_NAME(syscall_trace) 218 movl ORIG_EAX(%esp),%eax 219 call *SYMBOL_NAME(sys_call_table)(,%eax,4) 220 movl %eax,EAX(%esp) # save the return value 221 call SYMBOL_NAME(syscall_trace) 222 jmp ret_from_sys_call 223 badsys: 224 movl $-ENOSYS,EAX(%esp) 225 jmp ret_from_sys_call 226 227 ALIGN 228 ret_from_exception: 229 movl SYMBOL_NAME(bh_mask),%eax 230 andl SYMBOL_NAME(bh_active),%eax 231 jne handle_bottom_half 232 ALIGN 233 ret_from_intr: 234 GET_CURRENT(%ebx) 235 movl EFLAGS(%esp),%eax # mix EFLAGS and CS 236 movb CS(%esp),%al 237 testl $(VM_MASK | 3),%eax # rtn to VM86 mode|non-super? 238 jne ret_with_reschedule 239 jmp restore_all 240 241 ALIGN 242 handle_bottom_half: 243 call SYMBOL_NAME(do_bottom_half) 244 jmp ret_from_intr 245 246 ALIGN 247 reschedule: 248 call SYMBOL_NAME(schedule) # test 249 jmp ret_from_sys_call 250 251 ENTRY(divide_error) 252 pushl $0 # no error code 253 pushl $ SYMBOL_NAME(do_divide_error) 254 ALIGN 255 error_code: 256 pushl %ds 257 pushl %eax 258 xorl %eax,%eax 259 pushl %ebp 260 pushl %edi 261 pushl %esi 262 pushl %edx 263 decl %eax # eax = -1 264 pushl %ecx 265 pushl %ebx 266 cld 267 movl %es,%cx 268 xchgl %eax, ORIG_EAX(%esp) # orig_eax (get error code.) 269 movl %esp,%edx 270 xchgl %ecx, ES(%esp) # get the addr and save es. 271 pushl %eax # push the error code 272 pushl %edx 273 movl $(__KERNEL_DS),%edx 274 movl %dx,%ds 275 movl %dx,%es 276 GET_CURRENT(%ebx) 277 call *%ecx 278 addl $8,%esp 279 jmp ret_from_exception 280 281 ENTRY(coprocessor_error) 282 pushl $0 283 pushl $ SYMBOL_NAME(do_coprocessor_error) 284 jmp error_code 285 286 ENTRY(device_not_available) 287 pushl $-1 # mark this as an int 288 SAVE_ALL 289 GET_CURRENT(%ebx) 290 pushl $ret_from_exception 291 movl %cr0,%eax 292 testl $0x4,%eax # EM (math emulation bit) 293 je SYMBOL_NAME(math_state_restore) 294 pushl $0 # temp storage for ORIG_EIP 295 call SYMBOL_NAME(math_emulate) 296 addl $4,%esp 297 ret 298 299 ENTRY(debug) 300 pushl $0 301 pushl $ SYMBOL_NAME(do_debug) 302 jmp error_code 303 304 ENTRY(nmi) 305 pushl $0 306 pushl $ SYMBOL_NAME(do_nmi) 307 jmp error_code 308 309 ENTRY(int3) 310 pushl $0 311 pushl $ SYMBOL_NAME(do_int3) 312 jmp error_code 313 314 ENTRY(overflow) 315 pushl $0 316 pushl $ SYMBOL_NAME(do_overflow) 317 jmp error_code 318 319 ENTRY(bounds) 320 pushl $0 321 pushl $ SYMBOL_NAME(do_bounds) 322 jmp error_code 323 324 ENTRY(invalid_op) 325 pushl $0 326 pushl $ SYMBOL_NAME(do_invalid_op) 327 jmp error_code 328 329 ENTRY(coprocessor_segment_overrun) 330 pushl $0 331 pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) 332 jmp error_code 333 334 ENTRY(reserved) 335 pushl $0 336 pushl $ SYMBOL_NAME(do_reserved) 337 jmp error_code 338 339 ENTRY(double_fault) 340 pushl $ SYMBOL_NAME(do_double_fault) 341 jmp error_code 342 343 ENTRY(invalid_TSS) 344 pushl $ SYMBOL_NAME(do_invalid_TSS) 345 jmp error_code 346 347 ENTRY(segment_not_present) 348 pushl $ SYMBOL_NAME(do_segment_not_present) 349 jmp error_code 350 351 ENTRY(stack_segment) 352 pushl $ SYMBOL_NAME(do_stack_segment) 353 jmp error_code 354 355 ENTRY(general_protection) 356 pushl $ SYMBOL_NAME(do_general_protection) 357 jmp error_code 358 359 ENTRY(alignment_check) 360 pushl $ SYMBOL_NAME(do_alignment_check) 361 jmp error_code 362 363 ENTRY(page_fault) 364 pushl $ SYMBOL_NAME(do_page_fault) 365 jmp error_code 366 367 ENTRY(spurious_interrupt_bug) 368 pushl $0 369 pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) 370 jmp error_code 371 372 .data 373 ENTRY(sys_call_table) 374 .long SYMBOL_NAME(sys_ni_syscall) /* 0 */ 375 .long SYMBOL_NAME(sys_exit) 376 .long SYMBOL_NAME(sys_fork) 377 .long SYMBOL_NAME(sys_read) 378 .long SYMBOL_NAME(sys_write) 379 .long SYMBOL_NAME(sys_open) /* 5 */ 380 .long SYMBOL_NAME(sys_close) 381 .long SYMBOL_NAME(sys_waitpid) 382 .long SYMBOL_NAME(sys_creat) 383 .long SYMBOL_NAME(sys_link) 384 .long SYMBOL_NAME(sys_unlink) /* 10 */ 385 .long SYMBOL_NAME(sys_execve) 386 .long SYMBOL_NAME(sys_chdir) 387 .long SYMBOL_NAME(sys_time) 388 .long SYMBOL_NAME(sys_mknod) 389 .long SYMBOL_NAME(sys_chmod) /* 15 */ 390 .long SYMBOL_NAME(sys_lchown) 391 .long SYMBOL_NAME(sys_ni_syscall) /*old break holder*/ 392 .long SYMBOL_NAME(sys_stat) 393 .long SYMBOL_NAME(sys_lseek) 394 .long SYMBOL_NAME(sys_getpid) /* 20 */ 395 .long SYMBOL_NAME(sys_mount) 396 .long SYMBOL_NAME(sys_oldumount) 397 .long SYMBOL_NAME(sys_setuid) 398 .long SYMBOL_NAME(sys_getuid) 399 .long SYMBOL_NAME(sys_stime) /* 25 */ 400 .long SYMBOL_NAME(sys_ptrace) 401 .long SYMBOL_NAME(sys_alarm) 402 .long SYMBOL_NAME(sys_fstat) 403 .long SYMBOL_NAME(sys_pause) 404 .long SYMBOL_NAME(sys_utime) /* 30 */ 405 .long SYMBOL_NAME(sys_ni_syscall) /* old stty holder */ 406 .long SYMBOL_NAME(sys_ni_syscall) /* old gtty holder */ 407 .long SYMBOL_NAME(sys_access) 408 .long SYMBOL_NAME(sys_nice) /*next: old ftime holder*/ 409 .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ 410 .long SYMBOL_NAME(sys_sync) 411 .long SYMBOL_NAME(sys_kill) 412 .long SYMBOL_NAME(sys_rename) 413 .long SYMBOL_NAME(sys_mkdir) 414 .long SYMBOL_NAME(sys_rmdir) /* 40 */ 415 .long SYMBOL_NAME(sys_dup) 416 .long SYMBOL_NAME(sys_pipe) 417 .long SYMBOL_NAME(sys_times) 418 .long SYMBOL_NAME(sys_ni_syscall) /* old prof holder */ 419 .long SYMBOL_NAME(sys_brk) /* 45 */ 420 .long SYMBOL_NAME(sys_setgid) 421 .long SYMBOL_NAME(sys_getgid) 422 .long SYMBOL_NAME(sys_signal) 423 .long SYMBOL_NAME(sys_geteuid) 424 .long SYMBOL_NAME(sys_getegid) /* 50 */ 425 .long SYMBOL_NAME(sys_acct) 426 .long SYMBOL_NAME(sys_umount) /*recyc never used phys*/ 427 .long SYMBOL_NAME(sys_ni_syscall) /* old lock holder */ 428 .long SYMBOL_NAME(sys_ioctl) 429 .long SYMBOL_NAME(sys_fcntl) /* 55 */ 430 .long SYMBOL_NAME(sys_ni_syscall) /* old mpx holder */ 431 .long SYMBOL_NAME(sys_setpgid) 432 .long SYMBOL_NAME(sys_ni_syscall) /*old ulimit holder*/ 433 .long SYMBOL_NAME(sys_olduname) 434 .long SYMBOL_NAME(sys_umask) /* 60 */ 435 .long SYMBOL_NAME(sys_chroot) 436 .long SYMBOL_NAME(sys_ustat) 437 .long SYMBOL_NAME(sys_dup2) 438 .long SYMBOL_NAME(sys_getppid) 439 .long SYMBOL_NAME(sys_getpgrp) /* 65 */ 440 .long SYMBOL_NAME(sys_setsid) 441 .long SYMBOL_NAME(sys_sigaction) 442 .long SYMBOL_NAME(sys_sgetmask) 443 .long SYMBOL_NAME(sys_ssetmask) 444 .long SYMBOL_NAME(sys_setreuid) /* 70 */ 445 .long SYMBOL_NAME(sys_setregid) 446 .long SYMBOL_NAME(sys_sigsuspend) 447 .long SYMBOL_NAME(sys_sigpending) 448 .long SYMBOL_NAME(sys_sethostname) 449 .long SYMBOL_NAME(sys_setrlimit) /* 75 */ 450 .long SYMBOL_NAME(sys_getrlimit) 451 .long SYMBOL_NAME(sys_getrusage) 452 .long SYMBOL_NAME(sys_gettimeofday) 453 .long SYMBOL_NAME(sys_settimeofday) 454 .long SYMBOL_NAME(sys_getgroups) /* 80 */ 455 .long SYMBOL_NAME(sys_setgroups) 456 .long SYMBOL_NAME(old_select) 457 .long SYMBOL_NAME(sys_symlink) 458 .long SYMBOL_NAME(sys_lstat) 459 .long SYMBOL_NAME(sys_readlink) /* 85 */ 460 .long SYMBOL_NAME(sys_uselib) 461 .long SYMBOL_NAME(sys_swapon) 462 .long SYMBOL_NAME(sys_reboot) 463 .long SYMBOL_NAME(old_readdir) 464 .long SYMBOL_NAME(old_mmap) /* 90 */ 465 .long SYMBOL_NAME(sys_munmap) 466 .long SYMBOL_NAME(sys_truncate) 467 .long SYMBOL_NAME(sys_ftruncate) 468 .long SYMBOL_NAME(sys_fchmod) 469 .long SYMBOL_NAME(sys_fchown) /* 95 */ 470 .long SYMBOL_NAME(sys_getpriority) 471 .long SYMBOL_NAME(sys_setpriority) 472 .long SYMBOL_NAME(sys_ni_syscall) /*old profil holder*/ 473 .long SYMBOL_NAME(sys_statfs) 474 .long SYMBOL_NAME(sys_fstatfs) /* 100 */ 475 .long SYMBOL_NAME(sys_ioperm) 476 .long SYMBOL_NAME(sys_socketcall) 477 .long SYMBOL_NAME(sys_syslog) 478 .long SYMBOL_NAME(sys_setitimer) 479 .long SYMBOL_NAME(sys_getitimer) /* 105 */ 480 .long SYMBOL_NAME(sys_newstat) 481 .long SYMBOL_NAME(sys_newlstat) 482 .long SYMBOL_NAME(sys_newfstat) 483 .long SYMBOL_NAME(sys_uname) 484 .long SYMBOL_NAME(sys_iopl) /* 110 */ 485 .long SYMBOL_NAME(sys_vhangup) 486 .long SYMBOL_NAME(sys_idle) 487 .long SYMBOL_NAME(sys_vm86old) 488 .long SYMBOL_NAME(sys_wait4) 489 .long SYMBOL_NAME(sys_swapoff) /* 115 */ 490 .long SYMBOL_NAME(sys_sysinfo) 491 .long SYMBOL_NAME(sys_ipc) 492 .long SYMBOL_NAME(sys_fsync) 493 .long SYMBOL_NAME(sys_sigreturn) 494 .long SYMBOL_NAME(sys_clone) /* 120 */ 495 .long SYMBOL_NAME(sys_setdomainname) 496 .long SYMBOL_NAME(sys_newuname) 497 .long SYMBOL_NAME(sys_modify_ldt) 498 .long SYMBOL_NAME(sys_adjtimex) 499 .long SYMBOL_NAME(sys_mprotect) /* 125 */ 500 .long SYMBOL_NAME(sys_sigprocmask) 501 .long SYMBOL_NAME(sys_create_module) 502 .long SYMBOL_NAME(sys_init_module) 503 .long SYMBOL_NAME(sys_delete_module) 504 .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ 505 .long SYMBOL_NAME(sys_quotactl) 506 .long SYMBOL_NAME(sys_getpgid) 507 .long SYMBOL_NAME(sys_fchdir) 508 .long SYMBOL_NAME(sys_bdflush) 509 .long SYMBOL_NAME(sys_sysfs) /* 135 */ 510 .long SYMBOL_NAME(sys_personality) 511 .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ 512 .long SYMBOL_NAME(sys_setfsuid) 513 .long SYMBOL_NAME(sys_setfsgid) 514 .long SYMBOL_NAME(sys_llseek) /* 140 */ 515 .long SYMBOL_NAME(sys_getdents) 516 .long SYMBOL_NAME(sys_select) 517 .long SYMBOL_NAME(sys_flock) 518 .long SYMBOL_NAME(sys_msync) 519 .long SYMBOL_NAME(sys_readv) /* 145 */ 520 .long SYMBOL_NAME(sys_writev) 521 .long SYMBOL_NAME(sys_getsid) 522 .long SYMBOL_NAME(sys_fdatasync) 523 .long SYMBOL_NAME(sys_sysctl) 524 .long SYMBOL_NAME(sys_mlock) /* 150 */ 525 .long SYMBOL_NAME(sys_munlock) 526 .long SYMBOL_NAME(sys_mlockall) 527 .long SYMBOL_NAME(sys_munlockall) 528 .long SYMBOL_NAME(sys_sched_setparam) 529 .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ 530 .long SYMBOL_NAME(sys_sched_setscheduler) 531 .long SYMBOL_NAME(sys_sched_getscheduler) 532 .long SYMBOL_NAME(sys_sched_yield) 533 .long SYMBOL_NAME(sys_sched_get_priority_max) 534 .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ 535 .long SYMBOL_NAME(sys_sched_rr_get_interval) 536 .long SYMBOL_NAME(sys_nanosleep) 537 .long SYMBOL_NAME(sys_mremap) 538 .long SYMBOL_NAME(sys_setresuid) 539 .long SYMBOL_NAME(sys_getresuid) /* 165 */ 540 .long SYMBOL_NAME(sys_vm86) 541 .long SYMBOL_NAME(sys_query_module) 542 .long SYMBOL_NAME(sys_poll) 543 .long SYMBOL_NAME(sys_nfsservctl) 544 .long SYMBOL_NAME(sys_setresgid) /* 170 */ 545 .long SYMBOL_NAME(sys_getresgid) 546 .long SYMBOL_NAME(sys_prctl) 547 .long SYMBOL_NAME(sys_rt_sigreturn) 548 .long SYMBOL_NAME(sys_rt_sigaction) 549 .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ 550 .long SYMBOL_NAME(sys_rt_sigpending) 551 .long SYMBOL_NAME(sys_rt_sigtimedwait) 552 .long SYMBOL_NAME(sys_rt_sigqueueinfo) 553 .long SYMBOL_NAME(sys_rt_sigsuspend) 554 .long SYMBOL_NAME(sys_pread) /* 180 */ 555 .long SYMBOL_NAME(sys_pwrite) 556 .long SYMBOL_NAME(sys_chown) 557 .long SYMBOL_NAME(sys_getcwd) 558 .long SYMBOL_NAME(sys_capget) 559 .long SYMBOL_NAME(sys_capset) /* 185 */ 560 .long SYMBOL_NAME(sys_sigaltstack) 561 .long SYMBOL_NAME(sys_sendfile) 562 .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ 563 .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ 564 .long SYMBOL_NAME(sys_vfork) /* 190 */ 565 566 /* 567 * NOTE!! This doesn't have to be exact - we just have 568 * to make sure we have _enough_ of the sys_ni_syscall 569 * entries. Don't panic if you notice that this hasn't 570 * been shrunk every time we add a new system call. 571 */ 572 .rept NR_syscalls-190 573 .long SYMBOL_NAME(sys_ni_syscall) 574 .endr /* FILE: arch/i386/kernel/init_task.c */ 575 #include 576 #include 577 578 #include 579 #include 580 #include 581 582 static struct vm_area_struct init_mmap = INIT_MMAP; 583 static struct fs_struct init_fs = INIT_FS; 584 static struct file * init_fd_array[NR_OPEN] = { NULL, }; 585 static struct files_struct init_files = INIT_FILES; 586 static struct signal_struct init_signals = INIT_SIGNALS; 587 struct mm_struct init_mm = INIT_MM; 588 589 /* Initial task structure. 590 * We need to make sure that this is 8192-byte aligned 591 * due to the way process stacks are handled. This is 592 * done by having a special "init_task" linker map 593 * entry.. */ 594 union task_union init_task_union 595 __attribute__((__section__(".data.init_task"))) = 596 { INIT_TASK }; 597 /* FILE: arch/i386/kernel/irq.c */ 598 /* 599 * linux/arch/i386/kernel/irq.c 600 * 601 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar 602 * 603 * This file contains the code used by various IRQ 604 * handling routines: asking for different IRQ's should 605 * be done through these routines instead of just 606 * grabbing them. Thus setups with different IRQ numbers 607 * shouldn't result in any weird surprises, and 608 * installing new handlers should be easier. */ 609 610 /* IRQs are in fact implemented a bit like signal 611 * handlers for the kernel. Naturally it's not a 1:1 612 * relation, but there are similarities. */ 613 614 #include 615 #include 616 #include 617 #include 618 #include 619 #include 620 #include 621 #include 622 #include 623 #include 624 #include 625 #include 626 #include 627 #include 628 #include 629 630 #include 631 #include 632 #include 633 #include 634 #include 635 #include 636 #include 637 #include 638 639 #include "irq.h" 640 641 unsigned int local_bh_count[NR_CPUS]; 642 unsigned int local_irq_count[NR_CPUS]; 643 644 atomic_t nmi_counter; 645 646 /* Linux has a controller-independent x86 interrupt 647 * architecture. every controller has a 648 * 'controller-template', that is used by the main code 649 * to do the right thing. Each driver-visible interrupt 650 * source is transparently wired to the apropriate 651 * controller. Thus drivers need not be aware of the 652 * interrupt-controller. 653 * 654 * Various interrupt controllers we handle: 8259 PIC, SMP 655 * IO-APIC, PIIX4's internal 8259 PIC and SGI's Visual 656 * Workstation Cobalt (IO-)APIC. (IO-APICs assumed to be 657 * messaging to Pentium local-APICs) 658 * 659 * the code is designed to be easily extended with 660 * new/different interrupt controllers, without having to 661 * do assembly magic. */ 662 663 /* Micro-access to controllers is serialized over the 664 * whole system. We never hold this lock when we call the 665 * actual IRQ handler. */ 666 spinlock_t irq_controller_lock; 667 668 /* Dummy controller type for unused interrupts */ 669 static void do_none(unsigned int irq, 670 struct pt_regs * regs) 671 { 672 /* we are careful. While for ISA irqs it's common to 673 * happen outside of any driver (think autodetection), 674 * this is not at all nice for PCI interrupts. So we 675 * are stricter and print a warning when such spurious 676 * interrupts happen. Spurious interrupts can confuse 677 * other drivers if the PCI IRQ line is shared. 678 * 679 * Such spurious interrupts are either driver bugs, or 680 * sometimes hw (chipset) bugs. */ 681 printk("unexpected IRQ vector %d on CPU#%d!\n", 682 irq, smp_processor_id()); 683 684 #ifdef __SMP__ 685 /* [currently unexpected vectors happen only on SMP and 686 * APIC. if we want to have non-APIC and non-8259A 687 * controllers in the future with unexpected vectors, 688 * this ack should probably be made 689 * controller-specific.] */ 690 ack_APIC_irq(); 691 #endif 692 } 693 static void enable_none(unsigned int irq) { } 694 static void disable_none(unsigned int irq) { } 695 696 /* startup is the same as "enable", shutdown is same as 697 * "disable" */ 698 #define startup_none enable_none 699 #define shutdown_none disable_none 700 701 struct hw_interrupt_type no_irq_type = { 702 "none", 703 startup_none, 704 shutdown_none, 705 do_none, 706 enable_none, 707 disable_none 708 }; 709 710 /* This is the 'legacy' 8259A Programmable Interrupt 711 * Controller, present in the majority of PC/AT boxes. */ 712 713 static void do_8259A_IRQ(unsigned int irq, 714 struct pt_regs * regs); 715 static void enable_8259A_irq(unsigned int irq); 716 void disable_8259A_irq(unsigned int irq); 717 718 /* startup is the same as "enable", shutdown is same as 719 * "disable" */ 720 #define startup_8259A_irq enable_8259A_irq 721 #define shutdown_8259A_irq disable_8259A_irq 722 723 static struct hw_interrupt_type i8259A_irq_type = { 724 "XT-PIC", 725 startup_8259A_irq, 726 shutdown_8259A_irq, 727 do_8259A_IRQ, 728 enable_8259A_irq, 729 disable_8259A_irq 730 }; 731 732 /* Controller mappings for all interrupt sources: */ 733 irq_desc_t irq_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = 734 { 0, &no_irq_type, } }; 735 736 737 /* 8259A PIC functions to handle ISA devices: */ 738 739 /* This contains the irq mask for both 8259A irq 740 * controllers, */ 741 static unsigned int cached_irq_mask = 0xffff; 742 743 #define __byte(x,y) (((unsigned char *)&(y))[x]) 744 #define __word(x,y) (((unsigned short *)&(y))[x]) 745 #define __long(x,y) (((unsigned int *)&(y))[x]) 746 747 #define cached_21 (__byte(0,cached_irq_mask)) 748 #define cached_A1 (__byte(1,cached_irq_mask)) 749 750 /* Not all IRQs can be routed through the IO-APIC, eg. on 751 * certain (older) boards the timer interrupt is not 752 * connected to any IO-APIC pin, it's fed to the CPU IRQ 753 * line directly. 754 * 755 * Any '1' bit in this mask means the IRQ is routed 756 * through the IO-APIC. this 'mixed mode' IRQ handling 757 * costs nothing because it's only used at IRQ setup 758 * time. */ 759 unsigned long io_apic_irqs = 0; 760 761 /* These have to be protected by the irq controller 762 * spinlock before being called. */ 763 void disable_8259A_irq(unsigned int irq) 764 { 765 unsigned int mask = 1 << irq; 766 cached_irq_mask |= mask; 767 if (irq & 8) { 768 outb(cached_A1,0xA1); 769 } else { 770 outb(cached_21,0x21); 771 } 772 } 773 774 static void enable_8259A_irq(unsigned int irq) 775 { 776 unsigned int mask = ~(1 << irq); 777 cached_irq_mask &= mask; 778 if (irq & 8) { 779 outb(cached_A1,0xA1); 780 } else { 781 outb(cached_21,0x21); 782 } 783 } 784 785 int i8259A_irq_pending(unsigned int irq) 786 { 787 unsigned int mask = 1<> 8)); 792 } 793 794 void make_8259A_irq(unsigned int irq) 795 { 796 disable_irq(irq); 797 __long(0,io_apic_irqs) &= ~(1<status & ~IRQ_REPLAY; 832 action = NULL; 833 if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) 834 action = desc->action; 835 desc->status = status | IRQ_INPROGRESS; 836 } 837 spin_unlock(&irq_controller_lock); 838 839 /* Exit early if we had no action or it was disabled */ 840 if (!action) 841 return; 842 843 handle_IRQ_event(irq, regs, action); 844 845 spin_lock(&irq_controller_lock); 846 { 847 unsigned int status = desc->status & ~IRQ_INPROGRESS; 848 desc->status = status; 849 if (!(status & IRQ_DISABLED)) 850 enable_8259A_irq(irq); 851 } 852 spin_unlock(&irq_controller_lock); 853 } 854 855 /* This builds up the IRQ handler stubs using some ugly 856 * macros in irq.h 857 * 858 * These macros create the low-level assembly IRQ 859 * routines that save register context and call do_IRQ(). 860 * do_IRQ() then does all the operations that are needed 861 * to keep the AT (or SMP IOAPIC) interrupt-controller 862 * happy. */ 863 864 BUILD_COMMON_IRQ() 865 866 #define BI(x,y) \ 867 BUILD_IRQ(##x##y) 868 869 #define BUILD_16_IRQS(x) \ 870 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ 871 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ 872 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 873 BI(x,c) BI(x,d) BI(x,e) BI(x,f) 874 875 /* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) 876 * interrupts: (these are usually mapped to vectors 877 * 0x20-0x30) */ 878 BUILD_16_IRQS(0x0) 879 880 #ifdef CONFIG_X86_IO_APIC 881 /* The IO-APIC gives us many more interrupt sources. Most 882 * of these are unused but an SMP system is supposed to 883 * have enough memory ... sometimes (mostly wrt. hw 884 * bugs) we get corrupted vectors all across the 885 * spectrum, so we really want to be prepared to get all 886 * of these. Plus, more powerful systems might have more 887 * than 64 IO-APIC registers. 888 * 889 * (these are usually mapped into the 0x30-0xff vector 890 * range) */ 891 BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) 892 BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) 893 BUILD_16_IRQS(0x7) BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) 894 BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) BUILD_16_IRQS(0xc) 895 BUILD_16_IRQS(0xd) 896 #endif 897 898 #undef BUILD_16_IRQS 899 #undef BI 900 901 902 #ifdef __SMP__ 903 /* The following vectors are part of the Linux 904 * architecture, there is no hardware IRQ pin equivalent 905 * for them, they are triggered through the ICC by us 906 * (IPIs) */ 907 BUILD_SMP_INTERRUPT(reschedule_interrupt) 908 BUILD_SMP_INTERRUPT(invalidate_interrupt) 909 BUILD_SMP_INTERRUPT(stop_cpu_interrupt) 910 BUILD_SMP_INTERRUPT(mtrr_interrupt) 911 BUILD_SMP_INTERRUPT(spurious_interrupt) 912 913 /* every pentium local APIC has two 'local interrupts', 914 * with a soft-definable vector attached to both 915 * interrupts, one of which is a timer interrupt, the 916 * other one is error counter overflow. Linux uses the 917 * local APIC timer interrupt to get a much simpler SMP 918 * time architecture: */ 919 BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt) 920 921 #endif 922 923 #define IRQ(x,y) \ 924 IRQ##x##y##_interrupt 925 926 #define IRQLIST_16(x) \ 927 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ 928 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ 929 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 930 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 931 932 static void (*interrupt[NR_IRQS])(void) = { 933 IRQLIST_16(0x0), 934 935 #ifdef CONFIG_X86_IO_APIC 936 IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), 937 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), 938 IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), 939 IRQLIST_16(0xa), IRQLIST_16(0xb), IRQLIST_16(0xc), 940 IRQLIST_16(0xd) 941 #endif 942 }; 943 944 #undef IRQ 945 #undef IRQLIST_16 946 947 948 /* Special irq handlers. */ 949 950 void no_action(int cpl, void *dev_id, 951 struct pt_regs *regs) 952 {} 953 954 #ifndef CONFIG_VISWS 955 /* Note that on a 486, we don't want to do a SIGFPE on an 956 * irq13 as the irq is unreliable, and exception 16 works 957 * correctly (ie as explained in the intel 958 * literature). On a 386, you can't use exception 16 due 959 * to bad IBM design, so we have to rely on the less 960 * exact irq13. 961 * 962 * Careful.. Not only is IRQ13 unreliable, but it is also 963 * leads to races. IBM designers who came up with it 964 * should be shot. */ 965 static void math_error_irq(int cpl, void *dev_id, 966 struct pt_regs *regs) 967 { 968 outb(0,0xF0); 969 if (ignore_irq13 || !boot_cpu_data.hard_math) 970 return; 971 math_error(); 972 } 973 974 static struct irqaction irq13 = 975 { math_error_irq, 0, 0, "fpu", NULL, NULL }; 976 977 /* IRQ2 is cascade interrupt to second interrupt 978 * controller */ 979 static struct irqaction irq2 = 980 { no_action, 0, 0, "cascade", NULL, NULL}; 981 #endif 982 983 /* Generic, controller-independent functions: */ 984 985 int get_irq_list(char *buf) 986 { 987 int i, j; 988 struct irqaction * action; 989 char *p = buf; 990 991 p += sprintf(p, " "); 992 for (j=0; jtypename); 1010 p += sprintf(p, " %s", action->name); 1011 1012 for (action=action->next; action; 1013 action = action->next) { 1014 p += sprintf(p, ", %s", action->name); 1015 } 1016 *p++ = '\n'; 1017 } 1018 p += sprintf(p, "NMI: %10u\n", 1019 atomic_read(&nmi_counter)); 1020 #ifdef __SMP__ 1021 p += sprintf(p, "ERR: %10lu\n", ipi_count); 1022 #endif 1023 return p - buf; 1024 } 1025 1026 /* Global interrupt locks for SMP. Allow interrupts to 1027 * come in on any CPU, yet make cli/sti act globally to 1028 * protect critical regions.. */ 1029 #ifdef __SMP__ 1030 unsigned char global_irq_holder = NO_PROC_ID; 1031 unsigned volatile int global_irq_lock; 1032 atomic_t global_irq_count; 1033 1034 atomic_t global_bh_count; 1035 atomic_t global_bh_lock; 1036 1037 /* "global_cli()" is a special case, in that it can hold 1038 * the interrupts disabled for a longish time, and also 1039 * because we may be doing TLB invalidates when holding 1040 * the global IRQ lock for historical reasons. Thus we 1041 * may need to check SMP invalidate events specially by 1042 * hand here (but not in any normal spinlocks) */ 1043 static inline void check_smp_invalidate(int cpu) 1044 { 1045 if (test_bit(cpu, &smp_invalidate_needed)) { 1046 clear_bit(cpu, &smp_invalidate_needed); 1047 local_flush_tlb(); 1048 } 1049 } 1050 1051 static void show(char * str) 1052 { 1053 int i; 1054 unsigned long *stack; 1055 int cpu = smp_processor_id(); 1056 extern char *get_options(char *str, int *ints); 1057 1058 printk("\n%s, CPU %d:\n", str, cpu); 1059 printk("irq: %d [%d %d]\n", 1060 atomic_read(&global_irq_count), local_irq_count[0], 1061 local_irq_count[1]); 1062 printk("bh: %d [%d %d]\n", 1063 atomic_read(&global_bh_count), local_bh_count[0], 1064 local_bh_count[1]); 1065 stack = (unsigned long *) &stack; 1066 for (i = 40; i ; i--) { 1067 unsigned long x = *++stack; 1068 if (x > (unsigned long) &get_options && 1069 x < (unsigned long) &vsprintf) { 1070 printk("<[%08lx]> ", x); 1071 } 1072 } 1073 } 1074 1075 #define MAXCOUNT 100000000 1076 1077 static inline void wait_on_bh(void) 1078 { 1079 int count = MAXCOUNT; 1080 do { 1081 if (!--count) { 1082 show("wait_on_bh"); 1083 count = ~0; 1084 } 1085 /* nothing .. wait for the other bh's to go away */ 1086 } while (atomic_read(&global_bh_count) != 0); 1087 } 1088 1089 /* I had a lockup scenario where a tight loop doing 1090 * spin_unlock()/spin_lock() on CPU#1 was racing with 1091 * spin_lock() on CPU#0. CPU#0 should have noticed 1092 * spin_unlock(), but apparently the spin_unlock() 1093 * information did not make it through to CPU#0 1094 * ... nasty, is this by design, do we have to limit 1095 * 'memory update oscillation frequency' artificially 1096 * like here? 1097 * 1098 * Such 'high frequency update' races can be avoided by 1099 * careful design, but some of our major constructs like 1100 * spinlocks use similar techniques, it would be nice to 1101 * clarify this issue. Set this define to 0 if you want 1102 * to check whether your system freezes. I suspect the 1103 * delay done by SYNC_OTHER_CORES() is in correlation 1104 * with 'snooping latency', but i thought that such 1105 * things are guaranteed by design, since we use the 1106 * 'LOCK' prefix. */ 1107 #define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 1 1108 1109 #if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 1110 # define SYNC_OTHER_CORES(x) udelay(x+1) 1111 #else 1112 /* We have to allow irqs to arrive between __sti and 1113 * __cli */ 1114 # define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") 1115 #endif 1116 1117 static inline void wait_on_irq(int cpu) 1118 { 1119 int count = MAXCOUNT; 1120 1121 for (;;) { 1122 1123 /* Wait until all interrupts are gone. Wait for 1124 * bottom half handlers unless we're already 1125 * executing in one.. */ 1126 if (!atomic_read(&global_irq_count)) { 1127 if (local_bh_count[cpu] || 1128 !atomic_read(&global_bh_count)) 1129 break; 1130 } 1131 1132 /* Duh, we have to loop. Release the lock to avoid 1133 * deadlocks */ 1134 clear_bit(0, &global_irq_lock); 1135 1136 for (;;) { 1137 if (!--count) { 1138 show("wait_on_irq"); 1139 count = ~0; 1140 } 1141 __sti(); 1142 SYNC_OTHER_CORES(cpu); 1143 __cli(); 1144 check_smp_invalidate(cpu); 1145 if (atomic_read(&global_irq_count)) 1146 continue; 1147 if (global_irq_lock) 1148 continue; 1149 if (!local_bh_count[cpu] && 1150 atomic_read(&global_bh_count)) 1151 continue; 1152 if (!test_and_set_bit(0,&global_irq_lock)) 1153 break; 1154 } 1155 } 1156 } 1157 1158 /* This is called when we want to synchronize with bottom 1159 * half handlers. We need to wait until no other CPU is 1160 * executing any bottom half handler. 1161 * 1162 * Don't wait if we're already running in an interrupt 1163 * context or are inside a bh handler. */ 1164 void synchronize_bh(void) 1165 { 1166 if (atomic_read(&global_bh_count) && !in_interrupt()) 1167 wait_on_bh(); 1168 } 1169 1170 /* This is called when we want to synchronize with 1171 * interrupts. We may for example tell a device to stop 1172 * sending interrupts: but to make sure there are no 1173 * interrupts that are executing on another CPU we need 1174 * to call this function. */ 1175 void synchronize_irq(void) 1176 { 1177 if (atomic_read(&global_irq_count)) { 1178 /* Stupid approach */ 1179 cli(); 1180 sti(); 1181 } 1182 } 1183 1184 static inline void get_irqlock(int cpu) 1185 { 1186 if (test_and_set_bit(0,&global_irq_lock)) { 1187 /* do we already hold the lock? */ 1188 if ((unsigned char) cpu == global_irq_holder) 1189 return; 1190 /* Uhhuh.. Somebody else got it. Wait.. */ 1191 do { 1192 do { 1193 check_smp_invalidate(cpu); 1194 } while (test_bit(0,&global_irq_lock)); 1195 } while (test_and_set_bit(0,&global_irq_lock)); 1196 } 1197 /* We also to make sure that nobody else is running in 1198 * an interrupt context. */ 1199 wait_on_irq(cpu); 1200 1201 /* Ok, finally.. */ 1202 global_irq_holder = cpu; 1203 } 1204 1205 #define EFLAGS_IF_SHIFT 9 1206 1207 /* A global "cli()" while in an interrupt context turns 1208 * into just a local cli(). Interrupts should use 1209 * spinlocks for the (very unlikely) case that they ever 1210 * want to protect against each other. 1211 * 1212 * If we already have local interrupts disabled, this 1213 * will not turn a local disable into a global one 1214 * (problems with spinlocks: this makes 1215 * save_flags+cli+sti usable inside a spinlock). */ 1216 void __global_cli(void) 1217 { 1218 unsigned int flags; 1219 1220 __save_flags(flags); 1221 if (flags & (1 << EFLAGS_IF_SHIFT)) { 1222 int cpu = smp_processor_id(); 1223 __cli(); 1224 if (!local_irq_count[cpu]) 1225 get_irqlock(cpu); 1226 } 1227 } 1228 1229 void __global_sti(void) 1230 { 1231 int cpu = smp_processor_id(); 1232 1233 if (!local_irq_count[cpu]) 1234 release_irqlock(cpu); 1235 __sti(); 1236 } 1237 1238 /* SMP flags value to restore to: 1239 * 0 - global cli 1240 * 1 - global sti 1241 * 2 - local cli 1242 * 3 - local sti */ 1243 unsigned long __global_save_flags(void) 1244 { 1245 int retval; 1246 int local_enabled; 1247 unsigned long flags; 1248 1249 __save_flags(flags); 1250 local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; 1251 /* default to local */ 1252 retval = 2 + local_enabled; 1253 1254 /*check for global flags if we're not in an interrupt*/ 1255 if (!local_irq_count[smp_processor_id()]) { 1256 if (local_enabled) 1257 retval = 1; 1258 if (global_irq_holder == 1259 (unsigned char) smp_processor_id()) 1260 retval = 0; 1261 } 1262 return retval; 1263 } 1264 1265 void __global_restore_flags(unsigned long flags) 1266 { 1267 switch (flags) { 1268 case 0: 1269 __global_cli(); 1270 break; 1271 case 1: 1272 __global_sti(); 1273 break; 1274 case 2: 1275 __cli(); 1276 break; 1277 case 3: 1278 __sti(); 1279 break; 1280 default: 1281 printk("global_restore_flags: %08lx (%08lx)\n", 1282 flags, (&flags)[-1]); 1283 } 1284 } 1285 1286 #endif 1287 1288 /* This should really return information about whether we 1289 * should do bottom half handling etc. Right now we end 1290 * up _always_ checking the bottom half, which is a waste 1291 * of time and is not what some drivers would prefer. */ 1292 int handle_IRQ_event(unsigned int irq, 1293 struct pt_regs * regs, struct irqaction * action) 1294 { 1295 int status; 1296 int cpu = smp_processor_id(); 1297 1298 irq_enter(cpu, irq); 1299 1300 status = 1; /* Force the "do bottom halves" bit */ 1301 1302 if (!(action->flags & SA_INTERRUPT)) 1303 __sti(); 1304 1305 do { 1306 status |= action->flags; 1307 action->handler(irq, action->dev_id, regs); 1308 action = action->next; 1309 } while (action); 1310 if (status & SA_SAMPLE_RANDOM) 1311 add_interrupt_randomness(irq); 1312 __cli(); 1313 1314 irq_exit(cpu, irq); 1315 1316 return status; 1317 } 1318 1319 /* Generic enable/disable code: this just calls down into 1320 * the PIC-specific version for the actual hardware 1321 * disable after having gotten the irq controller lock. 1322 */ 1323 void disable_irq(unsigned int irq) 1324 { 1325 unsigned long flags; 1326 1327 spin_lock_irqsave(&irq_controller_lock, flags); 1328 if (!irq_desc[irq].depth++) { 1329 irq_desc[irq].status |= IRQ_DISABLED; 1330 irq_desc[irq].handler->disable(irq); 1331 } 1332 spin_unlock_irqrestore(&irq_controller_lock, flags); 1333 1334 if (irq_desc[irq].status & IRQ_INPROGRESS) 1335 synchronize_irq(); 1336 } 1337 1338 void enable_irq(unsigned int irq) 1339 { 1340 unsigned long flags; 1341 1342 spin_lock_irqsave(&irq_controller_lock, flags); 1343 switch (irq_desc[irq].depth) { 1344 case 1: 1345 irq_desc[irq].status &= ~(IRQ_DISABLED | 1346 IRQ_INPROGRESS); 1347 irq_desc[irq].handler->enable(irq); 1348 /* fall throught */ 1349 default: 1350 irq_desc[irq].depth--; 1351 break; 1352 case 0: 1353 printk("enable_irq() unbalanced from %p\n", 1354 __builtin_return_address(0)); 1355 } 1356 spin_unlock_irqrestore(&irq_controller_lock, flags); 1357 } 1358 1359 /* do_IRQ handles all normal device IRQ's (the special 1360 * SMP cross-CPU interrupts have their own specific 1361 * handlers). */ 1362 asmlinkage void do_IRQ(struct pt_regs regs) 1363 { 1364 /* We ack quickly, we don't want the irq controller 1365 * thinking we're snobs just because some other CPU has 1366 * disabled global interrupts (we have already done the 1367 * INT_ACK cycles, it's too late to try to pretend to 1368 * the controller that we aren't taking the interrupt). 1369 * 1370 * 0 return value means that this irq is already being 1371 * handled by some other CPU. (or is disabled) */ 1372 int irq = regs.orig_eax & 0xff; /* subtle, see irq.h */ 1373 int cpu = smp_processor_id(); 1374 1375 kstat.irqs[cpu][irq]++; 1376 irq_desc[irq].handler->handle(irq, ®s); 1377 1378 /* This should be conditional: we should really get a 1379 * return code from the irq handler to tell us whether 1380 * the handler wants us to do software bottom half 1381 * handling or not.. */ 1382 if (1) { 1383 if (bh_active & bh_mask) 1384 do_bottom_half(); 1385 } 1386 } 1387 1388 int setup_x86_irq(unsigned int irq, 1389 struct irqaction * new) 1390 { 1391 int shared = 0; 1392 struct irqaction *old, **p; 1393 unsigned long flags; 1394 1395 /* Some drivers like serial.c use request_irq() 1396 * heavily, so we have to be careful not to interfere 1397 * with a running system. */ 1398 if (new->flags & SA_SAMPLE_RANDOM) { 1399 /* This function might sleep, we want to call it 1400 * first, outside of the atomic block. Yes, this 1401 * might clear the entropy pool if the wrong driver 1402 * is attempted to be loaded, without actually 1403 * installing a new handler, but is this really a 1404 * problem, only the sysadmin is able to do this. */ 1405 rand_initialize_irq(irq); 1406 } 1407 1408 /* The following block of code has to be executed 1409 * atomically */ 1410 spin_lock_irqsave(&irq_controller_lock,flags); 1411 p = &irq_desc[irq].action; 1412 if ((old = *p) != NULL) { 1413 /* Can't share interrupts unless both agree to */ 1414 if (!(old->flags & new->flags & SA_SHIRQ)) { 1415 spin_unlock_irqrestore(&irq_controller_lock,flags); 1416 return -EBUSY; 1417 } 1418 1419 /* add new interrupt at end of irq queue */ 1420 do { 1421 p = &old->next; 1422 old = *p; 1423 } while (old); 1424 shared = 1; 1425 } 1426 1427 *p = new; 1428 1429 if (!shared) { 1430 irq_desc[irq].depth = 0; 1431 irq_desc[irq].status &= ~(IRQ_DISABLED | 1432 IRQ_INPROGRESS); 1433 irq_desc[irq].handler->startup(irq); 1434 } 1435 spin_unlock_irqrestore(&irq_controller_lock,flags); 1436 return 0; 1437 } 1438 1439 int request_irq(unsigned int irq, 1440 void (*handler)(int, void *, struct pt_regs *), 1441 unsigned long irqflags, 1442 const char * devname, 1443 void *dev_id) 1444 { 1445 int retval; 1446 struct irqaction * action; 1447 1448 if (irq >= NR_IRQS) 1449 return -EINVAL; 1450 if (!handler) 1451 return -EINVAL; 1452 1453 action = (struct irqaction *) 1454 kmalloc(sizeof(struct irqaction), GFP_KERNEL); 1455 if (!action) 1456 return -ENOMEM; 1457 1458 action->handler = handler; 1459 action->flags = irqflags; 1460 action->mask = 0; 1461 action->name = devname; 1462 action->next = NULL; 1463 action->dev_id = dev_id; 1464 1465 retval = setup_x86_irq(irq, action); 1466 1467 if (retval) 1468 kfree(action); 1469 return retval; 1470 } 1471 1472 void free_irq(unsigned int irq, void *dev_id) 1473 { 1474 struct irqaction * action, **p; 1475 unsigned long flags; 1476 1477 if (irq >= NR_IRQS) 1478 return; 1479 1480 spin_lock_irqsave(&irq_controller_lock,flags); 1481 for (p = &irq_desc[irq].action; 1482 (action = *p) != NULL; p = &action->next) { 1483 if (action->dev_id != dev_id) 1484 continue; 1485 1486 /* Found it - now free it */ 1487 *p = action->next; 1488 kfree(action); 1489 if (!irq_desc[irq].action) { 1490 irq_desc[irq].status |= IRQ_DISABLED; 1491 irq_desc[irq].handler->shutdown(irq); 1492 } 1493 goto out; 1494 } 1495 printk("Trying to free free IRQ%d\n",irq); 1496 out: 1497 spin_unlock_irqrestore(&irq_controller_lock,flags); 1498 } 1499 1500 /* IRQ autodetection code.. 1501 * 1502 * This depends on the fact that any interrupt that comes 1503 * in on to an unassigned handler will get stuck with 1504 * "IRQ_INPROGRESS" asserted and the interrupt disabled. 1505 */ 1506 unsigned long probe_irq_on(void) 1507 { 1508 unsigned int i; 1509 unsigned long delay; 1510 1511 /* first, enable any unassigned irqs */ 1512 spin_lock_irq(&irq_controller_lock); 1513 for (i = NR_IRQS-1; i > 0; i--) { 1514 if (!irq_desc[i].action) { 1515 unsigned int status = 1516 irq_desc[i].status | IRQ_AUTODETECT; 1517 irq_desc[i].status = status & ~IRQ_INPROGRESS; 1518 irq_desc[i].handler->startup(i); 1519 } 1520 } 1521 spin_unlock_irq(&irq_controller_lock); 1522 1523 /* Wait for spurious interrupts to trigger */ 1524 for (delay = jiffies + HZ/10; 1525 time_after(delay, jiffies); ) 1526 /* about 100ms delay */ synchronize_irq(); 1527 1528 /* Now filter out any obviously spurious interrupts */ 1529 spin_lock_irq(&irq_controller_lock); 1530 for (i=0; ishutdown(i); 1540 } 1541 } 1542 spin_unlock_irq(&irq_controller_lock); 1543 1544 return 0x12345678; 1545 } 1546 1547 int probe_irq_off(unsigned long unused) 1548 { 1549 int i, irq_found, nr_irqs; 1550 1551 if (unused != 0x12345678) 1552 printk("Bad IRQ probe from %lx\n", (&unused)[-1]); 1553 1554 nr_irqs = 0; 1555 irq_found = 0; 1556 spin_lock_irq(&irq_controller_lock); 1557 for (i=0; ishutdown(i); 1570 } 1571 spin_unlock_irq(&irq_controller_lock); 1572 1573 if (nr_irqs > 1) 1574 irq_found = -irq_found; 1575 return irq_found; 1576 } 1577 1578 void init_ISA_irqs (void) 1579 { 1580 int i; 1581 1582 for (i = 0; i < NR_IRQS; i++) { 1583 irq_desc[i].status = IRQ_DISABLED; 1584 irq_desc[i].action = 0; 1585 irq_desc[i].depth = 0; 1586 1587 if (i < 16) { 1588 /* 16 old-style INTA-cycle interrupts: */ 1589 irq_desc[i].handler = &i8259A_irq_type; 1590 } else { 1591 /* 'high' PCI IRQs filled in on demand */ 1592 irq_desc[i].handler = &no_irq_type; 1593 } 1594 } 1595 } 1596 1597 __initfunc(void init_IRQ(void)) 1598 { 1599 int i; 1600 1601 #ifndef CONFIG_X86_VISWS_APIC 1602 init_ISA_irqs(); 1603 #else 1604 init_VISWS_APIC_irqs(); 1605 #endif 1606 /* Cover the whole vector space, no vector can escape 1607 * us. (some of these will be overridden and become 1608 * 'special' SMP interrupts) */ 1609 for (i = 0; i < NR_IRQS; i++) { 1610 int vector = FIRST_EXTERNAL_VECTOR + i; 1611 if (vector != SYSCALL_VECTOR) 1612 set_intr_gate(vector, interrupt[i]); 1613 } 1614 1615 #ifdef __SMP__ 1616 1617 /* IRQ0 must be given a fixed assignment and 1618 * initialized before init_IRQ_SMP. */ 1619 set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]); 1620 1621 /* The reschedule interrupt is a CPU-to-CPU 1622 * reschedule-helper IPI, driven by wakeup. */ 1623 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 1624 1625 /* IPI for invalidation */ 1626 set_intr_gate(INVALIDATE_TLB_VECTOR, 1627 invalidate_interrupt); 1628 1629 /* IPI for CPU halt */ 1630 set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt); 1631 1632 /* self generated IPI for local APIC timer */ 1633 set_intr_gate(LOCAL_TIMER_VECTOR,apic_timer_interrupt); 1634 1635 /* IPI for MTRR control */ 1636 set_intr_gate(MTRR_CHANGE_VECTOR, mtrr_interrupt); 1637 1638 /* IPI vector for APIC spurious interrupts */ 1639 set_intr_gate(SPURIOUS_APIC_VECTOR,spurious_interrupt); 1640 #endif 1641 request_region(0x20,0x20,"pic1"); 1642 request_region(0xa0,0x20,"pic2"); 1643 1644 /* Set the clock to 100 Hz, we already have a valid 1645 * vector now: */ 1646 outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ 1647 outb_p(LATCH & 0xff , 0x40); /* LSB */ 1648 outb(LATCH >> 8 , 0x40); /* MSB */ 1649 1650 #ifndef CONFIG_VISWS 1651 setup_x86_irq(2, &irq2); 1652 setup_x86_irq(13, &irq13); 1653 #endif 1654 } 1655 1656 #ifdef CONFIG_X86_IO_APIC 1657 __initfunc(void init_IRQ_SMP(void)) 1658 { 1659 int i; 1660 for (i = 0; i < NR_IRQS ; i++) 1661 if (IO_APIC_VECTOR(i) > 0) 1662 set_intr_gate(IO_APIC_VECTOR(i), interrupt[i]); 1663 } 1664 #endif 1665 /* FILE: arch/i386/kernel/irq.h */ 1666 #ifndef __irq_h 1667 #define __irq_h 1668 1669 #include 1670 1671 /* Interrupt controller descriptor. This is all we need 1672 * to describe about the low-level hardware. */ 1673 struct hw_interrupt_type { 1674 const char * typename; 1675 void (*startup)(unsigned int irq); 1676 void (*shutdown)(unsigned int irq); 1677 void (*handle)(unsigned int irq,struct pt_regs * regs); 1678 void (*enable)(unsigned int irq); 1679 void (*disable)(unsigned int irq); 1680 }; 1681 1682 extern struct hw_interrupt_type no_irq_type; 1683 1684 /* IRQ line status. */ 1685 #define IRQ_INPROGRESS 1 /* active - do not enter! */ 1686 #define IRQ_DISABLED 2 /* disabled - do not enter! */ 1687 #define IRQ_PENDING 4 /* pending, replay on enable*/ 1688 #define IRQ_REPLAY 8 /* replayed but not acked */ 1689 #define IRQ_AUTODETECT 16 /* IRQ being autodetected */ 1690 1691 /* This is the "IRQ descriptor", which contains various 1692 * information about the irq, including what kind of 1693 * hardware handling it has, whether it is disabled etc 1694 * etc. 1695 * 1696 * Pad this out to 32 bytes for cache and indexing 1697 * reasons. */ 1698 typedef struct { 1699 /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */ 1700 unsigned int status; 1701 /* handle/enable/disable functions */ 1702 struct hw_interrupt_type *handler; 1703 /* IRQ action list */ 1704 struct irqaction *action; 1705 /* Disable depth for nested irq disables */ 1706 unsigned int depth; 1707 } irq_desc_t; 1708 1709 /* IDT vectors usable for external interrupt sources 1710 * start at 0x20: */ 1711 #define FIRST_EXTERNAL_VECTOR 0x20 1712 1713 #define SYSCALL_VECTOR 0x80 1714 1715 /* Vectors 0x20-0x2f are used for ISA interrupts. */ 1716 1717 /* Special IRQ vectors used by the SMP architecture: 1718 * 1719 * (some of the following vectors are 'rare', they might 1720 * be merged into a single vector to save vector 1721 * space. TLB, reschedule and local APIC vectors are 1722 * performance-critical.) */ 1723 #define RESCHEDULE_VECTOR 0x30 1724 #define INVALIDATE_TLB_VECTOR 0x31 1725 #define STOP_CPU_VECTOR 0x40 1726 #define LOCAL_TIMER_VECTOR 0x41 1727 #define MTRR_CHANGE_VECTOR 0x50 1728 1729 /* First APIC vector available to drivers: (vectors 1730 * 0x51-0xfe) */ 1731 #define IRQ0_TRAP_VECTOR 0x51 1732 1733 /* This IRQ should never happen, but we print a message 1734 nevertheless. */ 1735 #define SPURIOUS_APIC_VECTOR 0xff 1736 1737 extern irq_desc_t irq_desc[NR_IRQS]; 1738 extern int irq_vector[NR_IRQS]; 1739 #define IO_APIC_VECTOR(irq) irq_vector[irq] 1740 1741 extern void init_IRQ_SMP(void); 1742 extern int handle_IRQ_event(unsigned int, 1743 struct pt_regs *, struct irqaction *); 1744 extern int setup_x86_irq(unsigned int, 1745 struct irqaction *); 1746 1747 /* Various low-level irq details needed by irq.c, 1748 * process.c, time.c, io_apic.c and smp.c 1749 * 1750 * Interrupt entry/exit code at both C and assembly level 1751 */ 1752 1753 extern void no_action(int cpl, void *dev_id, 1754 struct pt_regs *regs); 1755 extern void mask_irq(unsigned int irq); 1756 extern void unmask_irq(unsigned int irq); 1757 extern void disable_8259A_irq(unsigned int irq); 1758 extern int i8259A_irq_pending(unsigned int irq); 1759 extern void ack_APIC_irq(void); 1760 extern void FASTCALL(send_IPI_self(int vector)); 1761 extern void smp_send_mtrr(void); 1762 extern void init_VISWS_APIC_irqs(void); 1763 extern void setup_IO_APIC(void); 1764 extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, 1765 int fn); 1766 extern void make_8259A_irq(unsigned int irq); 1767 extern void send_IPI(int dest, int vector); 1768 extern void init_pic_mode(void); 1769 extern void print_IO_APIC(void); 1770 1771 extern unsigned long io_apic_irqs; 1772 1773 extern char _stext, _etext; 1774 1775 #define MAX_IRQ_SOURCES 128 1776 #define MAX_MP_BUSSES 32 1777 enum mp_bustype { 1778 MP_BUS_ISA, 1779 MP_BUS_PCI 1780 }; 1781 extern int mp_bus_id_to_type [MAX_MP_BUSSES]; 1782 extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; 1783 extern char ioapic_OEM_ID [16]; 1784 extern char ioapic_Product_ID [16]; 1785 1786 extern spinlock_t irq_controller_lock; 1787 1788 #ifdef __SMP__ 1789 1790 #include 1791 1792 static inline void irq_enter(int cpu, unsigned int irq) 1793 { 1794 hardirq_enter(cpu); 1795 while (test_bit(0,&global_irq_lock)) { 1796 /* nothing */; 1797 } 1798 } 1799 1800 static inline void irq_exit(int cpu, unsigned int irq) 1801 { 1802 hardirq_exit(cpu); 1803 } 1804 1805 #define IO_APIC_IRQ(x) (((x) >= 16) || \ 1806 ((1<<(x)) & io_apic_irqs)) 1807 1808 #else 1809 1810 #define irq_enter(cpu, irq) (++local_irq_count[cpu]) 1811 #define irq_exit(cpu, irq) (--local_irq_count[cpu]) 1812 1813 #define IO_APIC_IRQ(x) (0) 1814 1815 #endif 1816 1817 #define __STR(x) #x 1818 #define STR(x) __STR(x) 1819 1820 #define SAVE_ALL \ 1821 "cld\n\t" \ 1822 "pushl %es\n\t" \ 1823 "pushl %ds\n\t" \ 1824 "pushl %eax\n\t" \ 1825 "pushl %ebp\n\t" \ 1826 "pushl %edi\n\t" \ 1827 "pushl %esi\n\t" \ 1828 "pushl %edx\n\t" \ 1829 "pushl %ecx\n\t" \ 1830 "pushl %ebx\n\t" \ 1831 "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ 1832 "movl %dx,%ds\n\t" \ 1833 "movl %dx,%es\n\t" 1834 1835 #define IRQ_NAME2(nr) nr##_interrupt(void) 1836 #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) 1837 1838 #define GET_CURRENT \ 1839 "movl %esp, %ebx\n\t" \ 1840 "andl $-8192, %ebx\n\t" 1841 1842 #ifdef __SMP__ 1843 1844 /* SMP has a few special interrupts for IPI messages */ 1845 1846 #define BUILD_SMP_INTERRUPT(x) \ 1847 asmlinkage void x(void); \ 1848 __asm__( \ 1849 "\n"__ALIGN_STR"\n" \ 1850 SYMBOL_NAME_STR(x) ":\n\t" \ 1851 "pushl $-1\n\t" \ 1852 SAVE_ALL \ 1853 "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ 1854 "jmp ret_from_intr\n"); 1855 1856 #define BUILD_SMP_TIMER_INTERRUPT(x) \ 1857 asmlinkage void x(struct pt_regs * regs); \ 1858 __asm__( \ 1859 "\n"__ALIGN_STR"\n" \ 1860 SYMBOL_NAME_STR(x) ":\n\t" \ 1861 "pushl $-1\n\t" \ 1862 SAVE_ALL \ 1863 "movl %esp,%eax\n\t" \ 1864 "pushl %eax\n\t" \ 1865 "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ 1866 "addl $4,%esp\n\t" \ 1867 "jmp ret_from_intr\n"); 1868 1869 #endif /* __SMP__ */ 1870 1871 #define BUILD_COMMON_IRQ() \ 1872 __asm__( \ 1873 "\n" __ALIGN_STR"\n" \ 1874 "common_interrupt:\n\t" \ 1875 SAVE_ALL \ 1876 "pushl $ret_from_intr\n\t" \ 1877 "jmp "SYMBOL_NAME_STR(do_IRQ)); 1878 1879 /* subtle. orig_eax is used by the signal code to 1880 * distinct between system calls and interrupted 'random 1881 * user-space'. Thus we have to put a negative value into 1882 * orig_eax here. (the problem is that both system calls 1883 * and IRQs want to have small integer numbers in 1884 * orig_eax, and the syscall code has won the 1885 * optimization conflict ;) */ 1886 #define BUILD_IRQ(nr) \ 1887 asmlinkage void IRQ_NAME(nr); \ 1888 __asm__( \ 1889 "\n"__ALIGN_STR"\n" \ 1890 SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ 1891 "pushl $"#nr"-256\n\t" \ 1892 "jmp common_interrupt"); 1893 1894 /* x86 profiling function, SMP safe. We might want to do 1895 * this in assembly totally? */ 1896 static inline void x86_do_profile (unsigned long eip) 1897 { 1898 if (prof_buffer && current->pid) { 1899 eip -= (unsigned long) &_stext; 1900 eip >>= prof_shift; 1901 /* Don't ignore out-of-bounds EIP values silently, 1902 * put them into the last histogram slot, so if 1903 * present, they will show up as a sharp peak. */ 1904 if (eip > prof_len-1) 1905 eip = prof_len-1; 1906 atomic_inc((atomic_t *)&prof_buffer[eip]); 1907 } 1908 } 1909 1910 #endif /* FILE: arch/i386/kernel/process.c */ 1911 /* 1912 * linux/arch/i386/kernel/process.c 1913 * 1914 * Copyright (C) 1995 Linus Torvalds 1915 */ 1916 1917 /* This file handles the architecture-dependent parts of 1918 * process handling.. */ 1919 1920 #define __KERNEL_SYSCALLS__ 1921 #include 1922 1923 #include 1924 #include 1925 #include 1926 #include 1927 #include 1928 #include 1929 #include 1930 #include 1931 #include 1932 #include 1933 #include 1934 #include 1935 #include 1936 #include 1937 #include 1938 #include 1939 #include 1940 #include 1941 #include 1942 #include 1943 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) 1944 #include 1945 #endif 1946 1947 #include 1948 #include 1949 #include 1950 #include 1951 #include 1952 #include 1953 #include 1954 #ifdef CONFIG_MATH_EMULATION 1955 #include 1956 #endif 1957 1958 #include "irq.h" 1959 1960 spinlock_t semaphore_wake_lock = SPIN_LOCK_UNLOCKED; 1961 1962 asmlinkage void ret_from_fork(void) 1963 __asm__("ret_from_fork"); 1964 1965 #ifdef CONFIG_APM 1966 extern int apm_do_idle(void); 1967 extern void apm_do_busy(void); 1968 #endif 1969 1970 static int hlt_counter=0; 1971 1972 #define HARD_IDLE_TIMEOUT (HZ / 3) 1973 1974 void disable_hlt(void) 1975 { 1976 hlt_counter++; 1977 } 1978 1979 void enable_hlt(void) 1980 { 1981 hlt_counter--; 1982 } 1983 1984 #ifndef __SMP__ 1985 1986 static void hard_idle(void) 1987 { 1988 while (!current->need_resched) { 1989 if (boot_cpu_data.hlt_works_ok && !hlt_counter) { 1990 #ifdef CONFIG_APM 1991 /* If the APM BIOS is not enabled, or there 1992 is an error calling the idle routine, we 1993 should hlt if possible. We need to check 1994 need_resched again because an interrupt 1995 may have occurred in apm_do_idle(). */ 1996 start_bh_atomic(); 1997 if (!apm_do_idle() && !current->need_resched) 1998 __asm__("hlt"); 1999 end_bh_atomic(); 2000 #else 2001 __asm__("hlt"); 2002 #endif 2003 } 2004 if (current->need_resched) 2005 break; 2006 schedule(); 2007 } 2008 #ifdef CONFIG_APM 2009 apm_do_busy(); 2010 #endif 2011 } 2012 2013 /* The idle loop on a uniprocessor i386.. */ 2014 static int cpu_idle(void *unused) 2015 { 2016 int work = 1; 2017 unsigned long start_idle = 0; 2018 2019 /* endless idle loop with no priority at all */ 2020 current->priority = 0; 2021 current->counter = -100; 2022 for (;;) { 2023 if (work) 2024 start_idle = jiffies; 2025 2026 if (jiffies - start_idle > HARD_IDLE_TIMEOUT) 2027 hard_idle(); 2028 else { 2029 if (boot_cpu_data.hlt_works_ok && 2030 !hlt_counter && !current->need_resched) 2031 __asm__("hlt"); 2032 } 2033 2034 work = current->need_resched; 2035 schedule(); 2036 check_pgt_cache(); 2037 } 2038 } 2039 2040 #else 2041 2042 /* This is being executed in task 0 'user space'. */ 2043 2044 int cpu_idle(void *unused) 2045 { 2046 /* endless idle loop with no priority at all */ 2047 current->priority = 0; 2048 current->counter = -100; 2049 while(1) { 2050 if (current_cpu_data.hlt_works_ok && !hlt_counter && 2051 !current->need_resched) 2052 __asm__("hlt"); 2053 /* although we are an idle CPU, we do not want to get 2054 * into the scheduler unnecessarily. */ 2055 if (current->need_resched) { 2056 schedule(); 2057 check_pgt_cache(); 2058 } 2059 } 2060 } 2061 2062 #endif 2063 2064 asmlinkage int sys_idle(void) 2065 { 2066 if (current->pid != 0) 2067 return -EPERM; 2068 cpu_idle(NULL); 2069 return 0; 2070 } 2071 2072 /* This routine reboots the machine by asking the 2073 * keyboard controller to pulse the reset-line low. We 2074 * try that for a while, and if it doesn't work, we do 2075 * some other stupid things. */ 2076 2077 static long no_idt[2] = {0, 0}; 2078 static int reboot_mode = 0; 2079 static int reboot_thru_bios = 0; 2080 2081 __initfunc(void reboot_setup(char *str, int *ints)) 2082 { 2083 while(1) { 2084 switch (*str) { 2085 case 'w': /* "warm" reboot (no memory testing etc) */ 2086 reboot_mode = 0x1234; 2087 break; 2088 case 'c': /* "cold" reboot (w/ memory testing etc) */ 2089 reboot_mode = 0x0; 2090 break; 2091 case 'b': /* "bios" reboot by jumping thru the BIOS*/ 2092 reboot_thru_bios = 1; 2093 break; 2094 case 'h': 2095 /* "hard" reboot by toggling RESET and/or crashing 2096 * the CPU */ 2097 reboot_thru_bios = 0; 2098 break; 2099 } 2100 if((str = strchr(str,',')) != NULL) 2101 str++; 2102 else 2103 break; 2104 } 2105 } 2106 2107 /* The following code and data reboots the machine by 2108 * switching to real mode and jumping to the BIOS reset 2109 * entry point, as if the CPU has really been reset. The 2110 * previous version asked the keyboard controller to 2111 * pulse the CPU reset line, which is more thorough, but 2112 * doesn't work with at least one type of 486 2113 * motherboard. It is easy to stop this code working; 2114 * hence the copious comments. */ 2115 static unsigned long long 2116 real_mode_gdt_entries [3] = 2117 { 2118 0x0000000000000000ULL, /* Null descriptor */ 2119 /* 16-bit real-mode 64k code at 0x00000000 */ 2120 0x00009a000000ffffULL, 2121 /* 16-bit real-mode 64k data at 0x00000100 */ 2122 0x000092000100ffffULL 2123 }; 2124 2125 static struct 2126 { 2127 unsigned short size __attribute__ ((packed)); 2128 unsigned long long * base __attribute__ ((packed)); 2129 } 2130 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, 2131 real_mode_gdt_entries }, 2132 real_mode_idt = { 0x3ff, 0 }; 2133 2134 /* This is 16-bit protected mode code to disable paging 2135 and the cache, switch to real mode and jump to the 2136 BIOS reset code. 2137 2138 The instruction that switches to real mode by writing 2139 to CR0 must be followed immediately by a far jump 2140 instruction, which set CS to a valid value for real 2141 mode, and flushes the prefetch queue to avoid running 2142 instructions that have already been decoded in 2143 protected mode. 2144 2145 Clears all the flags except ET, especially PG 2146 (paging), PE (protected-mode enable) and TS (task 2147 switch for coprocessor state save). Flushes the TLB 2148 after paging has been disabled. Sets CD and NW, to 2149 disable the cache on a 486, and invalidates the cache. 2150 This is more like the state of a 486 after reset. I 2151 don't know if something else should be done for other 2152 chips. 2153 2154 More could be done here to set up the registers as if 2155 a CPU reset had occurred; hopefully real BIOSs don't 2156 assume much. */ 2157 2158 static unsigned char real_mode_switch [] = 2159 { 2160 0x66, 0x0f, 0x20, 0xc0, /*movl %cr0,%eax */ 2161 0x66, 0x83, 0xe0, 0x11, /*andl $0x00000011,%eax*/ 2162 /*orl $0x60000000,%eax*/ 2163 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, 2164 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0 */ 2165 0x66, 0x0f, 0x22, 0xd8, /*movl %eax,%cr3 */ 2166 0x66, 0x0f, 0x20, 0xc3, /*movl %cr0,%ebx */ 2167 /*andl $0x60000000,%ebx*/ 2168 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, 2169 0x74, 0x02, /*jz f */ 2170 0x0f, 0x08, /*invd */ 2171 0x24, 0x10, /*f: andb $0x10,al*/ 2172 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0*/ 2173 0xea, 0x00, 0x00, 0xff, 0xff /*ljmp $0xffff,$0x0000*/ 2174 }; 2175 2176 static inline void kb_wait(void) 2177 { 2178 int i; 2179 2180 for (i=0; i<0x10000; i++) 2181 if ((inb_p(0x64) & 0x02) == 0) 2182 break; 2183 } 2184 2185 void machine_restart(char * __unused) 2186 { 2187 #if __SMP__ 2188 /* turn off the IO-APIC, so we can do a clean reboot */ 2189 init_pic_mode(); 2190 #endif 2191 2192 if(!reboot_thru_bios) { 2193 /* rebooting needs to touch the page at abs addr 0 */ 2194 *((unsigned short *)__va(0x472)) = reboot_mode; 2195 for (;;) { 2196 int i; 2197 for (i=0; i<100; i++) { 2198 kb_wait(); 2199 udelay(50); 2200 outb(0xfe,0x64); /* pulse reset low */ 2201 udelay(50); 2202 } 2203 /* That didn't work - force a triple fault.. */ 2204 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 2205 __asm__ __volatile__("int3"); 2206 } 2207 } 2208 2209 cli(); 2210 2211 /* Write zero to CMOS register number 0x0f, which the 2212 BIOS POST routine will recognize as telling it to do 2213 a proper reboot. (Well that's what this book in 2214 front of me says -- it may only apply to the Phoenix 2215 BIOS though, it's not clear). At the same time, 2216 disable NMIs by setting the top bit in the CMOS 2217 address register, as we're about to do peculiar 2218 things to the CPU. I'm not sure if `outb_p' is 2219 needed instead of just `outb'. Use it to be on the 2220 safe side. */ 2221 2222 outb_p (0x8f, 0x70); 2223 outb_p (0x00, 0x71); 2224 2225 /* Remap the kernel at virtual address zero, as well as 2226 offset zero from the kernel segment. This assumes 2227 the kernel segment starts at virtual address 2228 PAGE_OFFSET. */ 2229 2230 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 2231 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 2232 2233 /* Make sure the first page is mapped to the start of 2234 physical memory. It is normally not mapped, to trap 2235 kernel NULL pointer dereferences. */ 2236 2237 pg0[0] = _PAGE_RW | _PAGE_PRESENT; 2238 2239 /* Use `swapper_pg_dir' as our page directory. We 2240 * bother with `SET_PAGE_DIR' because although might be 2241 * rebooting, but if we change the way we set root page 2242 * dir in the future, then we wont break a seldom used 2243 * feature ;) */ 2244 2245 SET_PAGE_DIR(current,swapper_pg_dir); 2246 2247 /* Write 0x1234 to absolute memory location 0x472. The 2248 BIOS reads this on booting to tell it to "Bypass 2249 memory test (also warm boot)". This seems like a 2250 fairly standard thing that gets set by REBOOT.COM 2251 programs, and the previous reset routine did this 2252 too. */ 2253 2254 *((unsigned short *)0x472) = reboot_mode; 2255 2256 /* For the switch to real mode, copy some code to low 2257 memory. It has to be in the first 64k because it is 2258 running in 16-bit mode, and it has to have the same 2259 physical and virtual address, because it turns off 2260 paging. Copy it near the end of the first page, out 2261 of the way of BIOS variables. */ 2262 2263 memcpy ((void *) (0x1000 - sizeof (real_mode_switch)), 2264 real_mode_switch, sizeof (real_mode_switch)); 2265 2266 /* Set up the IDT for real mode. */ 2267 2268 __asm__ __volatile__ 2269 ("lidt %0" : : "m" (real_mode_idt)); 2270 2271 /* Set up a GDT from which we can load segment 2272 descriptors for real mode. The GDT is not used in 2273 real mode; it is just needed here to prepare the 2274 descriptors. */ 2275 2276 __asm__ __volatile__ 2277 ("lgdt %0" : : "m" (real_mode_gdt)); 2278 2279 /* Load the data segment registers, and thus the 2280 descriptors ready for real mode. The base address 2281 of each segment is 0x100, 16 times the selector 2282 value being loaded here. This is so that the 2283 segment registers don't have to be reloaded after 2284 switching to real mode: the values are consistent 2285 for real mode operation already. */ 2286 2287 __asm__ __volatile__ ("movl $0x0010,%%eax\n" 2288 "\tmovl %%ax,%%ds\n" 2289 "\tmovl %%ax,%%es\n" 2290 "\tmovl %%ax,%%fs\n" 2291 "\tmovl %%ax,%%gs\n" 2292 "\tmovl %%ax,%%ss" : : : "eax"); 2293 2294 /* Jump to the 16-bit code that we copied earlier. It 2295 disables paging and the cache, switches to real 2296 mode, and jumps to the BIOS reset entry point. */ 2297 2298 __asm__ __volatile__ ("ljmp $0x0008,%0" 2299 : 2300 : "i" ((void *) (0x1000 - 2301 sizeof (real_mode_switch)))); 2302 } 2303 2304 void machine_halt(void) 2305 {} 2306 2307 void machine_power_off(void) 2308 { 2309 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) 2310 apm_power_off(); 2311 #endif 2312 } 2313 2314 2315 void show_regs(struct pt_regs * regs) 2316 { 2317 long cr0 = 0L, cr2 = 0L, cr3 = 0L; 2318 2319 printk("\n"); 2320 printk("EIP: %04x:[<%08lx>]", 2321 0xffff & regs->xcs,regs->eip); 2322 if (regs->xcs & 3) 2323 printk(" ESP: %04x:%08lx", 2324 0xffff & regs->xss,regs->esp); 2325 printk(" EFLAGS: %08lx\n",regs->eflags); 2326 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 2327 regs->eax,regs->ebx,regs->ecx,regs->edx); 2328 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 2329 regs->esi, regs->edi, regs->ebp); 2330 printk(" DS: %04x ES: %04x\n", 2331 0xffff & regs->xds,0xffff & regs->xes); 2332 __asm__("movl %%cr0, %0": "=r" (cr0)); 2333 __asm__("movl %%cr2, %0": "=r" (cr2)); 2334 __asm__("movl %%cr3, %0": "=r" (cr3)); 2335 printk("CR0: %08lx CR2: %08lx CR3: %08lx\n", 2336 cr0, cr2, cr3); 2337 } 2338 2339 /* Allocation and freeing of basic task resources. 2340 * 2341 * NOTE! The task struct and the stack go together 2342 * 2343 * The task structure is a two-page thing, and as such 2344 * not reliable to allocate using the basic page alloc 2345 * functions. We have a small cache of structures for 2346 * when the allocations fail.. 2347 * 2348 * This extra buffer essentially acts to make for less 2349 * "jitter" in the allocations.. 2350 * 2351 * On SMP we don't do this right now because: 2352 * - we aren't holding any locks when called, and we 2353 * might as well just depend on the generic memory 2354 * management to do proper locking for us instead of 2355 * complicating it here. 2356 * - if you use SMP you have a beefy enough machine that 2357 * this shouldn't matter.. */ 2358 #ifndef __SMP__ 2359 #define EXTRA_TASK_STRUCT 16 2360 static struct task_struct * 2361 task_struct_stack[EXTRA_TASK_STRUCT]; 2362 static int task_struct_stack_ptr = -1; 2363 #endif 2364 2365 struct task_struct * alloc_task_struct(void) 2366 { 2367 #ifndef EXTRA_TASK_STRUCT 2368 return (struct task_struct *) 2369 __get_free_pages(GFP_KERNEL,1); 2370 #else 2371 int index; 2372 struct task_struct *ret; 2373 2374 index = task_struct_stack_ptr; 2375 if (index >= EXTRA_TASK_STRUCT/2) 2376 goto use_cache; 2377 ret = (struct task_struct *) 2378 __get_free_pages(GFP_KERNEL,1); 2379 if (!ret) { 2380 index = task_struct_stack_ptr; 2381 if (index >= 0) { 2382 use_cache: 2383 ret = task_struct_stack[index]; 2384 task_struct_stack_ptr = index-1; 2385 } 2386 } 2387 return ret; 2388 #endif 2389 } 2390 2391 void free_task_struct(struct task_struct *p) 2392 { 2393 #ifdef EXTRA_TASK_STRUCT 2394 int index = task_struct_stack_ptr+1; 2395 2396 if (index < EXTRA_TASK_STRUCT) { 2397 task_struct_stack[index] = p; 2398 task_struct_stack_ptr = index; 2399 } else 2400 #endif 2401 free_pages((unsigned long) p, 1); 2402 } 2403 2404 void release_segments(struct mm_struct *mm) 2405 { 2406 if (mm->segments) { 2407 void * ldt = mm->segments; 2408 mm->segments = NULL; 2409 vfree(ldt); 2410 } 2411 } 2412 2413 void forget_segments(void) 2414 { 2415 /* forget local segments */ 2416 __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs" 2417 : /* no outputs */ 2418 : "r" (0)); 2419 2420 /* Get the LDT entry from init_task. */ 2421 current->tss.ldt = _LDT(0); 2422 load_ldt(0); 2423 } 2424 2425 /* Create a kernel thread */ 2426 int kernel_thread(int (*fn)(void *), void * arg, 2427 unsigned long flags) 2428 { 2429 long retval, d0; 2430 2431 __asm__ __volatile__( 2432 "movl %%esp,%%esi\n\t" 2433 "int $0x80\n\t" /* Linux/i386 system call */ 2434 "cmpl %%esp,%%esi\n\t" /* child or parent? */ 2435 "je 1f\n\t" /* parent - jump */ 2436 /* Load the argument into eax, and push it. That 2437 * way, it does not matter whether the called 2438 * function is compiled with -mregparm or not. */ 2439 "movl %4,%%eax\n\t" 2440 "pushl %%eax\n\t" 2441 "call *%5\n\t" /* call fn */ 2442 "movl %3,%0\n\t" /* exit */ 2443 "int $0x80\n" 2444 "1:\t" 2445 :"=&a" (retval), "=&S" (d0) 2446 :"0" (__NR_clone), "i" (__NR_exit), 2447 "r" (arg), "r" (fn), 2448 "b" (flags | CLONE_VM) 2449 : "memory"); 2450 return retval; 2451 } 2452 2453 /* Free current thread data structures etc.. */ 2454 void exit_thread(void) 2455 { 2456 /* nothing to do ... */ 2457 } 2458 2459 void flush_thread(void) 2460 { 2461 int i; 2462 struct task_struct *tsk = current; 2463 2464 for (i=0 ; i<8 ; i++) 2465 tsk->tss.debugreg[i] = 0; 2466 2467 /* Forget coprocessor state.. */ 2468 clear_fpu(tsk); 2469 tsk->used_math = 0; 2470 } 2471 2472 void release_thread(struct task_struct *dead_task) 2473 { 2474 } 2475 2476 /* If new_mm is NULL, we're being called to set up the 2477 * LDT descriptor for a clone task. Each clone must have 2478 * a separate entry in the GDT. */ 2479 void copy_segments(int nr, struct task_struct *p, 2480 struct mm_struct *new_mm) 2481 { 2482 struct mm_struct * old_mm = current->mm; 2483 void * old_ldt = old_mm->segments, * ldt = old_ldt; 2484 2485 /* default LDT - use the one from init_task */ 2486 p->tss.ldt = _LDT(0); 2487 if (old_ldt) { 2488 if (new_mm) { 2489 ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); 2490 new_mm->segments = ldt; 2491 if (!ldt) { 2492 printk(KERN_WARNING "ldt allocation failed\n"); 2493 return; 2494 } 2495 memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); 2496 } 2497 p->tss.ldt = _LDT(nr); 2498 set_ldt_desc(nr, ldt, LDT_ENTRIES); 2499 return; 2500 } 2501 } 2502 2503 /* Save a segment. */ 2504 #define savesegment(seg,value) \ 2505 asm volatile("movl %%" #seg ",%0":"=m" \ 2506 (*(int *)&(value))) 2507 2508 int copy_thread(int nr, unsigned long clone_flags, 2509 unsigned long esp,struct task_struct * p, 2510 struct pt_regs * regs) 2511 { 2512 struct pt_regs * childregs; 2513 2514 childregs = ((struct pt_regs *) 2515 (2*PAGE_SIZE + (unsigned long) p)) - 1; 2516 *childregs = *regs; 2517 childregs->eax = 0; 2518 childregs->esp = esp; 2519 2520 p->tss.esp = (unsigned long) childregs; 2521 p->tss.esp0 = (unsigned long) (childregs+1); 2522 p->tss.ss0 = __KERNEL_DS; 2523 2524 p->tss.tr = _TSS(nr); 2525 set_tss_desc(nr,&(p->tss)); 2526 p->tss.eip = (unsigned long) ret_from_fork; 2527 2528 savesegment(fs,p->tss.fs); 2529 savesegment(gs,p->tss.gs); 2530 2531 /* a bitmap offset pointing outside of the TSS limit 2532 * causes a nicely controllable SIGSEGV. The first 2533 * sys_ioperm() call sets up the bitmap properly. */ 2534 p->tss.bitmap = sizeof(struct thread_struct); 2535 2536 unlazy_fpu(current); 2537 p->tss.i387 = current->tss.i387; 2538 2539 return 0; 2540 } 2541 2542 /* fill in the FPU structure for a core dump. */ 2543 int dump_fpu(struct pt_regs * regs, 2544 struct user_i387_struct * fpu) 2545 { 2546 int fpvalid; 2547 struct task_struct *tsk = current; 2548 2549 fpvalid = tsk->used_math; 2550 if (fpvalid) { 2551 unlazy_fpu(tsk); 2552 memcpy(fpu,&tsk->tss.i387.hard,sizeof(*fpu)); 2553 } 2554 2555 return fpvalid; 2556 } 2557 2558 /* fill in the user structure for a core dump.. */ 2559 void dump_thread(struct pt_regs * regs, 2560 struct user * dump) 2561 { 2562 int i; 2563 2564 /* changed the size calculations - should hopefully work 2565 better. lbt */ 2566 dump->magic = CMAGIC; 2567 dump->start_code = 0; 2568 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); 2569 dump->u_tsize = 2570 ((unsigned long) current->mm->end_code) 2571 >> PAGE_SHIFT; 2572 dump->u_dsize = 2573 ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) 2574 >> PAGE_SHIFT; 2575 dump->u_dsize -= dump->u_tsize; 2576 dump->u_ssize = 0; 2577 for (i = 0; i < 8; i++) 2578 dump->u_debugreg[i] = current->tss.debugreg[i]; 2579 2580 if (dump->start_stack < TASK_SIZE) 2581 dump->u_ssize = 2582 ((unsigned long) (TASK_SIZE - dump->start_stack)) 2583 >> PAGE_SHIFT; 2584 2585 dump->regs.ebx = regs->ebx; 2586 dump->regs.ecx = regs->ecx; 2587 dump->regs.edx = regs->edx; 2588 dump->regs.esi = regs->esi; 2589 dump->regs.edi = regs->edi; 2590 dump->regs.ebp = regs->ebp; 2591 dump->regs.eax = regs->eax; 2592 dump->regs.ds = regs->xds; 2593 dump->regs.es = regs->xes; 2594 savesegment(fs,dump->regs.fs); 2595 savesegment(gs,dump->regs.gs); 2596 dump->regs.orig_eax = regs->orig_eax; 2597 dump->regs.eip = regs->eip; 2598 dump->regs.cs = regs->xcs; 2599 dump->regs.eflags = regs->eflags; 2600 dump->regs.esp = regs->esp; 2601 dump->regs.ss = regs->xss; 2602 2603 dump->u_fpvalid = dump_fpu (regs, &dump->i387); 2604 } 2605 2606 /* This special macro can be used to load a debugging 2607 * register */ 2608 #define loaddebug(tsk,register) \ 2609 __asm__("movl %0,%%db" #register \ 2610 : /* no output */ \ 2611 :"r" (tsk->tss.debugreg[register])) 2612 2613 2614 /* switch_to(x,yn) should switch tasks from x to y. 2615 * 2616 * We fsave/fwait so that an exception goes off at the 2617 * right time (as a call from the fsave or fwait in 2618 * effect) rather than to the wrong process. Lazy FP 2619 * saving no longer makes any sense with modern CPU's, 2620 * and this simplifies a lot of things (SMP and UP become 2621 * the same). 2622 * 2623 * NOTE! We used to use the x86 hardware context 2624 * switching. The reason for not using it any more 2625 * becomes apparent when you try to recover gracefully 2626 * from saved state that is no longer valid (stale 2627 * segment register values in particular). With the 2628 * hardware task-switch, there is no way to fix up bad 2629 * state in a reasonable manner. 2630 * 2631 * The fact that Intel documents the hardware 2632 * task-switching to be slow is a fairly red herring - 2633 * this code is not noticeably faster. However, there 2634 * _is_ some room for improvement here, so the 2635 * performance issues may eventually be a valid point. 2636 * More important, however, is the fact that this allows 2637 * us much more flexibility. */ 2638 void __switch_to(struct task_struct *prev, 2639 struct task_struct *next) 2640 { 2641 /* Save FPU and set TS if it wasn't set before.. */ 2642 unlazy_fpu(prev); 2643 2644 /* Reload TR, LDT and the page table pointers.. 2645 * 2646 * We need TR for the IO permission bitmask (and the 2647 * vm86 bitmasks in case we ever use enhanced v86 mode 2648 * properly). 2649 * 2650 * We may want to get rid of the TR register some day, 2651 * and copy the bitmaps around by hand. Oh, well. In 2652 * the meantime we have to clear the busy bit in the 2653 * TSS entry, ugh. */ 2654 gdt_table[next->tss.tr >> 3].b &= 0xfffffdff; 2655 asm volatile("ltr %0": :"g" 2656 (*(unsigned short *)&next->tss.tr)); 2657 2658 /* Save away %fs and %gs. No need to save %es and %ds, 2659 * as those are always kernel segments while inside the 2660 * kernel. */ 2661 asm volatile("movl %%fs,%0":"=m" 2662 (*(int *)&prev->tss.fs)); 2663 asm volatile("movl %%gs,%0":"=m" 2664 (*(int *)&prev->tss.gs)); 2665 2666 /* Re-load LDT if necessary */ 2667 if (next->mm->segments != prev->mm->segments) 2668 asm volatile("lldt %0": :"g" 2669 (*(unsigned short *)&next->tss.ldt)); 2670 2671 /* Re-load page tables */ 2672 { 2673 unsigned long new_cr3 = next->tss.cr3; 2674 if (new_cr3 != prev->tss.cr3) 2675 asm volatile("movl %0,%%cr3": :"r" (new_cr3)); 2676 } 2677 2678 /* Restore %fs and %gs. */ 2679 loadsegment(fs,next->tss.fs); 2680 loadsegment(gs,next->tss.gs); 2681 2682 /* Now maybe reload the debug registers */ 2683 if (next->tss.debugreg[7]){ 2684 loaddebug(next,0); 2685 loaddebug(next,1); 2686 loaddebug(next,2); 2687 loaddebug(next,3); 2688 loaddebug(next,6); 2689 loaddebug(next,7); 2690 } 2691 } 2692 2693 asmlinkage int sys_fork(struct pt_regs regs) 2694 { 2695 return do_fork(SIGCHLD, regs.esp, ®s); 2696 } 2697 2698 asmlinkage int sys_clone(struct pt_regs regs) 2699 { 2700 unsigned long clone_flags; 2701 unsigned long newsp; 2702 2703 clone_flags = regs.ebx; 2704 newsp = regs.ecx; 2705 if (!newsp) 2706 newsp = regs.esp; 2707 return do_fork(clone_flags, newsp, ®s); 2708 } 2709 2710 /* This is trivial, and on the face of it looks like it 2711 * could equally well be done in user mode. 2712 * 2713 * Not so, for quite unobvious reasons - register 2714 * pressure. In user mode vfork() cannot have a stack 2715 * frame, and if done by calling the "clone()" system 2716 * call directly, you do not have enough call-clobbered 2717 * registers to hold all the information you need. */ 2718 asmlinkage int sys_vfork(struct pt_regs regs) 2719 { 2720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 2721 regs.esp, ®s); 2722 } 2723 2724 /* sys_execve() executes a new program. */ 2725 asmlinkage int sys_execve(struct pt_regs regs) 2726 { 2727 int error; 2728 char * filename; 2729 2730 lock_kernel(); 2731 filename = getname((char *) regs.ebx); 2732 error = PTR_ERR(filename); 2733 if (IS_ERR(filename)) 2734 goto out; 2735 error = do_execve(filename, (char **) regs.ecx, 2736 (char **) regs.edx, ®s); 2737 if (error == 0) 2738 current->flags &= ~PF_DTRACE; 2739 putname(filename); 2740 out: 2741 unlock_kernel(); 2742 return error; 2743 } /* FILE: arch/i386/kernel/signal.c */ 2744 /* 2745 * linux/arch/i386/kernel/signal.c 2746 * 2747 * Copyright (C) 1991, 1992 Linus Torvalds 2748 * 1997-11-28 Modified for POSIX.1b signals by Richard 2749 * Henderson */ 2750 2751 #include 2752 2753 #include 2754 #include 2755 #include 2756 #include 2757 #include 2758 #include 2759 #include 2760 #include 2761 #include 2762 #include 2763 #include 2764 #include 2765 #include 2766 2767 #define DEBUG_SIG 0 2768 2769 #define _BLOCKABLE (~(sigmask(SIGKILL)|sigmask(SIGSTOP))) 2770 2771 asmlinkage int sys_wait4(pid_t pid, 2772 unsigned long *stat_addr, 2773 int options, unsigned long *ru); 2774 asmlinkage int FASTCALL(do_signal(struct pt_regs *regs, 2775 sigset_t *oldset)); 2776 2777 /* Atomically swap in the new signal mask, and wait for a 2778 * signal. */ 2779 asmlinkage int 2780 sys_sigsuspend(int history0, int history1, 2781 old_sigset_t mask) 2782 { 2783 struct pt_regs * regs = (struct pt_regs *) &history0; 2784 sigset_t saveset; 2785 2786 mask &= _BLOCKABLE; 2787 spin_lock_irq(¤t->sigmask_lock); 2788 saveset = current->blocked; 2789 siginitset(¤t->blocked, mask); 2790 recalc_sigpending(current); 2791 spin_unlock_irq(¤t->sigmask_lock); 2792 2793 regs->eax = -EINTR; 2794 while (1) { 2795 current->state = TASK_INTERRUPTIBLE; 2796 schedule(); 2797 if (do_signal(regs, &saveset)) 2798 return -EINTR; 2799 } 2800 } 2801 2802 asmlinkage int 2803 sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) 2804 { 2805 struct pt_regs * regs = (struct pt_regs *) &unewset; 2806 sigset_t saveset, newset; 2807 2808 /* XXX: Don't preclude handling different sized 2809 * sigset_t's. */ 2810 if (sigsetsize != sizeof(sigset_t)) 2811 return -EINVAL; 2812 2813 if (copy_from_user(&newset, unewset, sizeof(newset))) 2814 return -EFAULT; 2815 sigdelsetmask(&newset, ~_BLOCKABLE); 2816 2817 spin_lock_irq(¤t->sigmask_lock); 2818 saveset = current->blocked; 2819 current->blocked = newset; 2820 recalc_sigpending(current); 2821 spin_unlock_irq(¤t->sigmask_lock); 2822 2823 regs->eax = -EINTR; 2824 while (1) { 2825 current->state = TASK_INTERRUPTIBLE; 2826 schedule(); 2827 if (do_signal(regs, &saveset)) 2828 return -EINTR; 2829 } 2830 } 2831 2832 asmlinkage int 2833 sys_sigaction(int sig, const struct old_sigaction *act, 2834 struct old_sigaction *oact) 2835 { 2836 struct k_sigaction new_ka, old_ka; 2837 int ret; 2838 2839 if (act) { 2840 old_sigset_t mask; 2841 if (verify_area(VERIFY_READ, act, sizeof(*act)) || 2842 __get_user(new_ka.sa.sa_handler, 2843 &act->sa_handler) || 2844 __get_user(new_ka.sa.sa_restorer, 2845 &act->sa_restorer)) 2846 return -EFAULT; 2847 __get_user(new_ka.sa.sa_flags, &act->sa_flags); 2848 __get_user(mask, &act->sa_mask); 2849 siginitset(&new_ka.sa.sa_mask, mask); 2850 } 2851 2852 ret = do_sigaction(sig, act ? &new_ka : NULL, 2853 oact ? &old_ka : NULL); 2854 2855 if (!ret && oact) { 2856 if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || 2857 __put_user(old_ka.sa.sa_handler, 2858 &oact->sa_handler) || 2859 __put_user(old_ka.sa.sa_restorer, 2860 &oact->sa_restorer)) 2861 return -EFAULT; 2862 __put_user(old_ka.sa.sa_flags, &oact->sa_flags); 2863 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); 2864 } 2865 2866 return ret; 2867 } 2868 2869 asmlinkage int 2870 sys_sigaltstack(const stack_t *uss, stack_t *uoss) 2871 { 2872 struct pt_regs *regs = (struct pt_regs *) &uss; 2873 return do_sigaltstack(uss, uoss, regs->esp); 2874 } 2875 2876 2877 /* Do a signal return; undo the signal stack. */ 2878 2879 struct sigframe 2880 { 2881 char *pretcode; 2882 int sig; 2883 struct sigcontext sc; 2884 struct _fpstate fpstate; 2885 unsigned long extramask[_NSIG_WORDS-1]; 2886 char retcode[8]; 2887 }; 2888 2889 struct rt_sigframe 2890 { 2891 char *pretcode; 2892 int sig; 2893 struct siginfo *pinfo; 2894 void *puc; 2895 struct siginfo info; 2896 struct ucontext uc; 2897 struct _fpstate fpstate; 2898 char retcode[8]; 2899 }; 2900 2901 2902 static inline int restore_i387_hard(struct _fpstate *buf) 2903 { 2904 struct task_struct *tsk = current; 2905 clear_fpu(tsk); 2906 return __copy_from_user(&tsk->tss.i387.hard, buf, 2907 sizeof(*buf)); 2908 } 2909 2910 static inline int restore_i387(struct _fpstate *buf) 2911 { 2912 int err; 2913 #ifndef CONFIG_MATH_EMULATION 2914 err = restore_i387_hard(buf); 2915 #else 2916 if (boot_cpu_data.hard_math) 2917 err = restore_i387_hard(buf); 2918 else 2919 err = restore_i387_soft(¤t->tss.i387.soft,buf); 2920 #endif 2921 current->used_math = 1; 2922 return err; 2923 } 2924 2925 static int 2926 restore_sigcontext(struct pt_regs *regs, 2927 struct sigcontext *sc, int *peax) 2928 { 2929 unsigned int err = 0; 2930 2931 #define COPY(x) err |= __get_user(regs->x, &sc->x) 2932 2933 #define COPY_SEG(seg) \ 2934 { unsigned short tmp; \ 2935 err |= __get_user(tmp, &sc->seg); \ 2936 regs->x##seg = tmp; } 2937 2938 #define COPY_SEG_STRICT(seg) \ 2939 { unsigned short tmp; \ 2940 err |= __get_user(tmp, &sc->seg); \ 2941 regs->x##seg = tmp|3; } 2942 2943 #define GET_SEG(seg) \ 2944 { unsigned short tmp; \ 2945 err |= __get_user(tmp, &sc->seg); \ 2946 loadsegment(seg,tmp); } 2947 2948 GET_SEG(gs); 2949 GET_SEG(fs); 2950 COPY_SEG(es); 2951 COPY_SEG(ds); 2952 COPY(edi); 2953 COPY(esi); 2954 COPY(ebp); 2955 COPY(esp); 2956 COPY(ebx); 2957 COPY(edx); 2958 COPY(ecx); 2959 COPY(eip); 2960 COPY_SEG_STRICT(cs); 2961 COPY_SEG_STRICT(ss); 2962 2963 { 2964 unsigned int tmpflags; 2965 err |= __get_user(tmpflags, &sc->eflags); 2966 regs->eflags = (regs->eflags & ~0x40DD5) | 2967 (tmpflags & 0x40DD5); 2968 regs->orig_eax = -1; /* disable syscall checks */ 2969 } 2970 2971 { 2972 struct _fpstate * buf; 2973 err |= __get_user(buf, &sc->fpstate); 2974 if (buf) { 2975 if (verify_area(VERIFY_READ, buf, sizeof(*buf))) 2976 goto badframe; 2977 err |= restore_i387(buf); 2978 } 2979 } 2980 2981 err |= __get_user(*peax, &sc->eax); 2982 return err; 2983 2984 badframe: 2985 return 1; 2986 } 2987 2988 asmlinkage int sys_sigreturn(unsigned long __unused) 2989 { 2990 struct pt_regs *regs = (struct pt_regs *) &__unused; 2991 struct sigframe *frame = 2992 (struct sigframe *)(regs->esp - 8); 2993 sigset_t set; 2994 int eax; 2995 2996 if (verify_area(VERIFY_READ, frame, sizeof(*frame))) 2997 goto badframe; 2998 if (__get_user(set.sig[0], &frame->sc.oldmask) 2999 || (_NSIG_WORDS > 1 3000 && __copy_from_user(&set.sig[1], &frame->extramask, 3001 sizeof(frame->extramask)))) 3002 goto badframe; 3003 3004 sigdelsetmask(&set, ~_BLOCKABLE); 3005 spin_lock_irq(¤t->sigmask_lock); 3006 current->blocked = set; 3007 recalc_sigpending(current); 3008 spin_unlock_irq(¤t->sigmask_lock); 3009 3010 if (restore_sigcontext(regs, &frame->sc, &eax)) 3011 goto badframe; 3012 return eax; 3013 3014 badframe: 3015 force_sig(SIGSEGV, current); 3016 return 0; 3017 } 3018 3019 asmlinkage int sys_rt_sigreturn(unsigned long __unused) 3020 { 3021 struct pt_regs *regs = (struct pt_regs *) &__unused; 3022 struct rt_sigframe *frame = 3023 (struct rt_sigframe *)(regs->esp - 4); 3024 sigset_t set; 3025 stack_t st; 3026 int eax; 3027 3028 if (verify_area(VERIFY_READ, frame, sizeof(*frame))) 3029 goto badframe; 3030 if (__copy_from_user(&set, &frame->uc.uc_sigmask, 3031 sizeof(set))) 3032 goto badframe; 3033 3034 sigdelsetmask(&set, ~_BLOCKABLE); 3035 spin_lock_irq(¤t->sigmask_lock); 3036 current->blocked = set; 3037 recalc_sigpending(current); 3038 spin_unlock_irq(¤t->sigmask_lock); 3039 3040 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, 3041 &eax)) 3042 goto badframe; 3043 3044 if (__copy_from_user(&st, &frame->uc.uc_stack, 3045 sizeof(st))) 3046 goto badframe; 3047 /* It is more difficult to avoid calling this function 3048 * than to call it and ignore errors. */ 3049 do_sigaltstack(&st, NULL, regs->esp); 3050 3051 return eax; 3052 3053 badframe: 3054 force_sig(SIGSEGV, current); 3055 return 0; 3056 } 3057 3058 /* Set up a signal frame. */ 3059 3060 static inline int save_i387_hard(struct _fpstate * buf) 3061 { 3062 struct task_struct *tsk = current; 3063 3064 unlazy_fpu(tsk); 3065 tsk->tss.i387.hard.status = tsk->tss.i387.hard.swd; 3066 if (__copy_to_user(buf, &tsk->tss.i387.hard, 3067 sizeof(*buf))) 3068 return -1; 3069 return 1; 3070 } 3071 3072 static int save_i387(struct _fpstate *buf) 3073 { 3074 if (!current->used_math) 3075 return 0; 3076 3077 /* This will cause a "finit" to be triggered by the 3078 * next attempted FPU operation by the 'current' 3079 * process. */ 3080 current->used_math = 0; 3081 3082 #ifndef CONFIG_MATH_EMULATION 3083 return save_i387_hard(buf); 3084 #else 3085 return boot_cpu_data.hard_math ? save_i387_hard(buf) 3086 : save_i387_soft(¤t->tss.i387.soft, buf); 3087 #endif 3088 } 3089 3090 static int 3091 setup_sigcontext(struct sigcontext *sc, 3092 struct _fpstate *fpstate, 3093 struct pt_regs *regs,unsigned long mask) 3094 { 3095 int tmp, err = 0; 3096 3097 tmp = 0; 3098 __asm__("movl %%gs,%w0" : "=r"(tmp): "0"(tmp)); 3099 err |= __put_user(tmp, (unsigned int *)&sc->gs); 3100 __asm__("movl %%fs,%w0" : "=r"(tmp): "0"(tmp)); 3101 err |= __put_user(tmp, (unsigned int *)&sc->fs); 3102 3103 err |= __put_user(regs->xes, (unsigned int *)&sc->es); 3104 err |= __put_user(regs->xds, (unsigned int *)&sc->ds); 3105 err |= __put_user(regs->edi, &sc->edi); 3106 err |= __put_user(regs->esi, &sc->esi); 3107 err |= __put_user(regs->ebp, &sc->ebp); 3108 err |= __put_user(regs->esp, &sc->esp); 3109 err |= __put_user(regs->ebx, &sc->ebx); 3110 err |= __put_user(regs->edx, &sc->edx); 3111 err |= __put_user(regs->ecx, &sc->ecx); 3112 err |= __put_user(regs->eax, &sc->eax); 3113 err |= __put_user(current->tss.trap_no, &sc->trapno); 3114 err |= __put_user(current->tss.error_code, &sc->err); 3115 err |= __put_user(regs->eip, &sc->eip); 3116 err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); 3117 err |= __put_user(regs->eflags, &sc->eflags); 3118 err |= __put_user(regs->esp, &sc->esp_at_signal); 3119 err |= __put_user(regs->xss, (unsigned int *)&sc->ss); 3120 3121 tmp = save_i387(fpstate); 3122 if (tmp < 0) 3123 err = 1; 3124 else 3125 err |= __put_user(tmp ? fpstate : NULL,&sc->fpstate); 3126 3127 /* non-iBCS2 extensions.. */ 3128 err |= __put_user(mask, &sc->oldmask); 3129 err |= __put_user(current->tss.cr2, &sc->cr2); 3130 3131 return err; 3132 } 3133 3134 /* Determine which stack to use.. */ 3135 static inline void * 3136 get_sigframe(struct k_sigaction *ka, 3137 struct pt_regs * regs, size_t frame_size) 3138 { 3139 unsigned long esp; 3140 3141 /* Default to using normal stack */ 3142 esp = regs->esp; 3143 3144 /* This is the X/Open sanctioned signal stack 3145 * switching. */ 3146 if (ka->sa.sa_flags & SA_ONSTACK) { 3147 if (! on_sig_stack(esp)) 3148 esp = current->sas_ss_sp + current->sas_ss_size; 3149 } 3150 3151 /* This is the legacy signal stack switching. */ 3152 else if ((regs->xss & 0xffff) != __USER_DS && 3153 !(ka->sa.sa_flags & SA_RESTORER) && 3154 ka->sa.sa_restorer) { 3155 esp = (unsigned long) ka->sa.sa_restorer; 3156 } 3157 3158 return (void *)((esp - frame_size) & -8ul); 3159 } 3160 3161 static void setup_frame(int sig, struct k_sigaction *ka, 3162 sigset_t *set, struct pt_regs * regs) 3163 { 3164 struct sigframe *frame; 3165 int err = 0; 3166 3167 frame = get_sigframe(ka, regs, sizeof(*frame)); 3168 3169 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 3170 goto give_sigsegv; 3171 3172 err |= __put_user((current->exec_domain 3173 && current->exec_domain->signal_invmap 3174 && sig < 32 3175 ? current->exec_domain->signal_invmap[sig] 3176 : sig), 3177 &frame->sig); 3178 3179 err |= setup_sigcontext(&frame->sc, &frame->fpstate, 3180 regs, set->sig[0]); 3181 3182 if (_NSIG_WORDS > 1) { 3183 err |= __copy_to_user(frame->extramask, &set->sig[1], 3184 sizeof(frame->extramask)); 3185 } 3186 3187 /* Set up to return from userspace. If provided, use a 3188 * stub already in userspace. */ 3189 if (ka->sa.sa_flags & SA_RESTORER) { 3190 err |= __put_user(ka->sa.sa_restorer, 3191 &frame->pretcode); 3192 } else { 3193 err |= __put_user(frame->retcode, &frame->pretcode); 3194 /* This is popl %eax ; movl $,%eax ; int $0x80 */ 3195 err |= __put_user(0xb858, 3196 (short *)(frame->retcode+0)); 3197 err |= __put_user(__NR_sigreturn, 3198 (int *)(frame->retcode+2)); 3199 err |= __put_user(0x80cd, 3200 (short *)(frame->retcode+6)); 3201 } 3202 3203 if (err) 3204 goto give_sigsegv; 3205 3206 /* Set up registers for signal handler */ 3207 regs->esp = (unsigned long) frame; 3208 regs->eip = (unsigned long) ka->sa.sa_handler; 3209 3210 set_fs(USER_DS); 3211 regs->xds = __USER_DS; 3212 regs->xes = __USER_DS; 3213 regs->xss = __USER_DS; 3214 regs->xcs = __USER_CS; 3215 regs->eflags &= ~TF_MASK; 3216 3217 #if DEBUG_SIG 3218 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 3219 current->comm, current->pid, frame, regs->eip, 3220 frame->pretcode); 3221 #endif 3222 3223 return; 3224 3225 give_sigsegv: 3226 if (sig == SIGSEGV) 3227 ka->sa.sa_handler = SIG_DFL; 3228 force_sig(SIGSEGV, current); 3229 } 3230 3231 static void setup_rt_frame(int sig, 3232 struct k_sigaction *ka, siginfo_t *info, 3233 sigset_t *set, struct pt_regs * regs) 3234 { 3235 struct rt_sigframe *frame; 3236 int err = 0; 3237 3238 frame = get_sigframe(ka, regs, sizeof(*frame)); 3239 3240 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 3241 goto give_sigsegv; 3242 3243 err |= __put_user((current->exec_domain 3244 && current->exec_domain->signal_invmap 3245 && sig < 32 3246 ? current->exec_domain->signal_invmap[sig] 3247 : sig), 3248 &frame->sig); 3249 err |= __put_user(&frame->info, &frame->pinfo); 3250 err |= __put_user(&frame->uc, &frame->puc); 3251 err |= __copy_to_user(&frame->info,info,sizeof(*info)); 3252 3253 /* Create the ucontext. */ 3254 err |= __put_user(0, &frame->uc.uc_flags); 3255 err |= __put_user(0, &frame->uc.uc_link); 3256 err |= __put_user(current->sas_ss_sp, 3257 &frame->uc.uc_stack.ss_sp); 3258 err |= __put_user(sas_ss_flags(regs->esp), 3259 &frame->uc.uc_stack.ss_flags); 3260 err |= __put_user(current->sas_ss_size, 3261 &frame->uc.uc_stack.ss_size); 3262 err |= setup_sigcontext(&frame->uc.uc_mcontext, 3263 &frame->fpstate, 3264 regs, set->sig[0]); 3265 err |= __copy_to_user(&frame->uc.uc_sigmask, set, 3266 sizeof(*set)); 3267 3268 /* Set up to return from userspace. If provided, use a 3269 * stub already in userspace. */ 3270 if (ka->sa.sa_flags & SA_RESTORER) { 3271 err |= __put_user(ka->sa.sa_restorer, 3272 &frame->pretcode); 3273 } else { 3274 err |= __put_user(frame->retcode, &frame->pretcode); 3275 /* This is movl $,%eax ; int $0x80 */ 3276 err |= __put_user(0xb8, (char *)(frame->retcode+0)); 3277 err |= __put_user(__NR_rt_sigreturn, 3278 (int *)(frame->retcode+1)); 3279 err |= __put_user(0x80cd, 3280 (short *)(frame->retcode+5)); 3281 } 3282 3283 if (err) 3284 goto give_sigsegv; 3285 3286 /* Set up registers for signal handler */ 3287 regs->esp = (unsigned long) frame; 3288 regs->eip = (unsigned long) ka->sa.sa_handler; 3289 3290 set_fs(USER_DS); 3291 regs->xds = __USER_DS; 3292 regs->xes = __USER_DS; 3293 regs->xss = __USER_DS; 3294 regs->xcs = __USER_CS; 3295 regs->eflags &= ~TF_MASK; 3296 3297 #if DEBUG_SIG 3298 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 3299 current->comm, current->pid, frame, regs->eip, 3300 frame->pretcode); 3301 #endif 3302 3303 return; 3304 3305 give_sigsegv: 3306 if (sig == SIGSEGV) 3307 ka->sa.sa_handler = SIG_DFL; 3308 force_sig(SIGSEGV, current); 3309 } 3310 3311 /* OK, we're invoking a handler */ 3312 3313 static void 3314 handle_signal(unsigned long sig, struct k_sigaction *ka, 3315 siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) 3316 { 3317 /* Are we from a system call? */ 3318 if (regs->orig_eax >= 0) { 3319 /* If so, check system call restarting.. */ 3320 switch (regs->eax) { 3321 case -ERESTARTNOHAND: 3322 regs->eax = -EINTR; 3323 break; 3324 3325 case -ERESTARTSYS: 3326 if (!(ka->sa.sa_flags & SA_RESTART)) { 3327 regs->eax = -EINTR; 3328 break; 3329 } 3330 /* fallthrough */ 3331 case -ERESTARTNOINTR: 3332 regs->eax = regs->orig_eax; 3333 regs->eip -= 2; 3334 } 3335 } 3336 3337 /* Set up the stack frame */ 3338 if (ka->sa.sa_flags & SA_SIGINFO) 3339 setup_rt_frame(sig, ka, info, oldset, regs); 3340 else 3341 setup_frame(sig, ka, oldset, regs); 3342 3343 if (ka->sa.sa_flags & SA_ONESHOT) 3344 ka->sa.sa_handler = SIG_DFL; 3345 3346 if (!(ka->sa.sa_flags & SA_NODEFER)) { 3347 spin_lock_irq(¤t->sigmask_lock); 3348 sigorsets(¤t->blocked,¤t->blocked, 3349 &ka->sa.sa_mask); 3350 sigaddset(¤t->blocked,sig); 3351 recalc_sigpending(current); 3352 spin_unlock_irq(¤t->sigmask_lock); 3353 } 3354 } 3355 3356 /* Note that 'init' is a special process: it doesn't get 3357 * signals it doesn't want to handle. Thus you cannot 3358 * kill init even with a SIGKILL even by mistake. 3359 * 3360 * Note that we go through the signals twice: once to 3361 * check the signals that the kernel can handle, and then 3362 * we build all the user-level signal handling 3363 * stack-frames in one go after that. */ 3364 int do_signal(struct pt_regs *regs, sigset_t *oldset) 3365 { 3366 siginfo_t info; 3367 struct k_sigaction *ka; 3368 3369 /* We want the common case to go fast, which is why we 3370 * may in certain cases get here from kernel mode. Just 3371 * return without doing anything if so. */ 3372 if ((regs->xcs & 3) != 3) 3373 return 1; 3374 3375 if (!oldset) 3376 oldset = ¤t->blocked; 3377 3378 for (;;) { 3379 unsigned long signr; 3380 3381 spin_lock_irq(¤t->sigmask_lock); 3382 signr = dequeue_signal(¤t->blocked, &info); 3383 spin_unlock_irq(¤t->sigmask_lock); 3384 3385 if (!signr) 3386 break; 3387 3388 if ((current->flags & PF_PTRACED) && 3389 signr != SIGKILL) { 3390 /* Let the debugger run. */ 3391 current->exit_code = signr; 3392 current->state = TASK_STOPPED; 3393 notify_parent(current, SIGCHLD); 3394 schedule(); 3395 3396 /* We're back. Did the debugger cancel the sig? */ 3397 if (!(signr = current->exit_code)) 3398 continue; 3399 current->exit_code = 0; 3400 3401 /* The debugger continued. Ignore SIGSTOP. */ 3402 if (signr == SIGSTOP) 3403 continue; 3404 3405 /* Update the siginfo structure. Is this good? */ 3406 if (signr != info.si_signo) { 3407 info.si_signo = signr; 3408 info.si_errno = 0; 3409 info.si_code = SI_USER; 3410 info.si_pid = current->p_pptr->pid; 3411 info.si_uid = current->p_pptr->uid; 3412 } 3413 3414 /* If (new) signal is now blocked, requeue it. */ 3415 if (sigismember(¤t->blocked, signr)) { 3416 send_sig_info(signr, &info, current); 3417 continue; 3418 } 3419 } 3420 3421 ka = ¤t->sig->action[signr-1]; 3422 if (ka->sa.sa_handler == SIG_IGN) { 3423 if (signr != SIGCHLD) 3424 continue; 3425 /* Check for SIGCHLD: it's special. */ 3426 while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) 3427 /* nothing */; 3428 continue; 3429 } 3430 3431 if (ka->sa.sa_handler == SIG_DFL) { 3432 int exit_code = signr; 3433 3434 /* Init gets no signals it doesn't want. */ 3435 if (current->pid == 1) 3436 continue; 3437 3438 switch (signr) { 3439 case SIGCONT: case SIGCHLD: case SIGWINCH: 3440 continue; 3441 3442 case SIGTSTP: case SIGTTIN: case SIGTTOU: 3443 if (is_orphaned_pgrp(current->pgrp)) 3444 continue; 3445 /* FALLTHRU */ 3446 3447 case SIGSTOP: 3448 current->state = TASK_STOPPED; 3449 current->exit_code = signr; 3450 if (!(current->p_pptr->sig->action[SIGCHLD-1]. 3451 sa.sa_flags & SA_NOCLDSTOP)) 3452 notify_parent(current, SIGCHLD); 3453 schedule(); 3454 continue; 3455 3456 case SIGQUIT: case SIGILL: case SIGTRAP: 3457 case SIGABRT: case SIGFPE: case SIGSEGV: 3458 lock_kernel(); 3459 if (current->binfmt 3460 && current->binfmt->core_dump 3461 && current->binfmt->core_dump(signr, regs)) 3462 exit_code |= 0x80; 3463 unlock_kernel(); 3464 /* FALLTHRU */ 3465 3466 default: 3467 lock_kernel(); 3468 sigaddset(¤t->signal, signr); 3469 current->flags |= PF_SIGNALED; 3470 do_exit(exit_code); 3471 /* NOTREACHED */ 3472 } 3473 } 3474 3475 /* Whee! Actually deliver the signal. */ 3476 handle_signal(signr, ka, &info, oldset, regs); 3477 return 1; 3478 } 3479 3480 /* Did we come from a system call? */ 3481 if (regs->orig_eax >= 0) { 3482 /* Restart the system call - no handlers present */ 3483 if (regs->eax == -ERESTARTNOHAND || 3484 regs->eax == -ERESTARTSYS || 3485 regs->eax == -ERESTARTNOINTR) { 3486 regs->eax = regs->orig_eax; 3487 regs->eip -= 2; 3488 } 3489 } 3490 return 0; 3491 } /* FILE: arch/i386/kernel/smp.c */ 3492 /* 3493 * Intel MP v1.1/v1.4 specification support routines 3494 * for multi-pentium hosts. 3495 * 3496 * (c) 1995 Alan Cox, CymruNET Ltd 3497 * (c) 1998 Ingo Molnar 3498 * 3499 * Supported by Caldera http://www.caldera.com. 3500 * Much of the core SMP work is based on previous 3501 * work by Thomas Radke, to whom a great many thanks 3502 * are extended. 3503 * 3504 * Thanks to Intel for making available several 3505 * different Pentium, Pentium Pro and 3506 * Pentium-II/Xeon MP machines. 3507 * 3508 * This code is released under the GNU public 3509 * license version 2 or later. 3510 * 3511 * Fixes 3512 * Felix Koop : NR_CPUS used properly 3513 * Jose Renau : Handle single CPU case. 3514 * Alan Cox : By repeated request 8) - 3515 * Total BogoMIP report. 3516 * Greg Wright : Fix for kernel stacks panic. 3517 * Erich Boleyn : MP v1.4 and additional changes. 3518 * Matthias Sattler : Changes for 2.1 kernel map. 3519 * Michel Lespinasse: Changes for 2.1 kernel map. 3520 * Michael Chastain : Change trampoline.S to gnu as. 3521 * Alan Cox : Dumb bug: 'B' step PPro's are fine 3522 * Ingo Molnar : Added APIC timers, based on code 3523 * from Jose Renau 3524 * Alan Cox : Added EBDA scanning 3525 * Ingo Molnar : various cleanups and rewrites */ 3526 3527 #include 3528 #include 3529 #include 3530 #include 3531 #include 3532 #include 3533 #include 3534 #include 3535 3536 #include "irq.h" 3537 3538 extern unsigned long start_kernel; 3539 extern void update_one_process( struct task_struct *p, 3540 unsigned long ticks, unsigned long user, 3541 unsigned long system, int cpu); 3542 /* Some notes on processor bugs: 3543 * 3544 * Pentium and Pentium Pro (and all CPUs) have 3545 * bugs. The Linux issues for SMP are handled as 3546 * follows. 3547 * 3548 * Pentium Pro: 3549 * Occasional delivery of 'spurious interrupt' as trap 3550 * #16. This is very rare. The kernel logs the event and 3551 * recovers 3552 * 3553 * Pentium: 3554 * There is a marginal case where REP MOVS on 100MHz SMP 3555 * machines with B stepping processors can fail. XXX 3556 * should provide an L1cache=Writethrough or L1cache=off 3557 * option. 3558 * 3559 * B stepping CPUs may hang. There are hardware work 3560 * arounds for this. We warn about it in case your board 3561 * doesnt have the work arounds. Basically thats so I can 3562 * tell anyone with a B stepping CPU and SMP problems 3563 * "tough". 3564 * 3565 * Specific items [From Pentium Processor 3566 * Specification Update] 3567 * 3568 * 1AP. Linux doesn't use remote read 3569 * 2AP. Linux doesn't trust APIC errors 3570 * 3AP. We work around this 3571 * 4AP. Linux never generated 3 interrupts of the 3572 * same pri to cause a lost local interrupt. 3573 * 5AP. Remote read is never used 3574 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX 3575 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX 3576 * 11AP. Linux reads the APIC between writes to 3577 * avoid this, as per the documentation. Make 3578 * sure you preserve this as it affects the C 3579 * stepping chips too. 3580 * 3581 * If this sounds worrying believe me these bugs are 3582 * ___RARE___ and there's about nothing of note with 3583 * C stepping upwards. */ 3584 3585 3586 /* Kernel spinlock */ 3587 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; 3588 3589 /* function prototypes: */ 3590 3591 static void cache_APIC_registers (void); 3592 static void stop_this_cpu (void); 3593 3594 /* Set if we find a B stepping CPU */ 3595 static int smp_b_stepping = 0; 3596 3597 /* Setup configured maximum number of CPUs to activate */ 3598 static int max_cpus = -1; 3599 /* Have we found an SMP box */ 3600 int smp_found_config=0; 3601 3602 /* Bitmask of physically existing CPUs */ 3603 unsigned long cpu_present_map = 0; 3604 /* Bitmask of currently online CPUs */ 3605 unsigned long cpu_online_map = 0; 3606 /* Total count of live CPUs */ 3607 int smp_num_cpus = 1; 3608 /* Set when the idlers are all forked */ 3609 int smp_threads_ready=0; 3610 /* which CPU maps to which logical number */ 3611 volatile int cpu_number_map[NR_CPUS]; 3612 /* which logical number maps to which CPU */ 3613 volatile int __cpu_logical_map[NR_CPUS]; 3614 /* We always use 0 the rest is ready for parallel 3615 * delivery */ 3616 static volatile 3617 unsigned long cpu_callin_map[NR_CPUS] = {0,}; 3618 /* We always use 0 the rest is ready for parallel 3619 * delivery */ 3620 static volatile 3621 unsigned long cpu_callout_map[NR_CPUS] = {0,}; 3622 /* Used for the invalidate map that's also checked in the 3623 * spinlock */ 3624 volatile unsigned long smp_invalidate_needed; 3625 /* Stack vector for booting CPUs */ 3626 volatile unsigned long kstack_ptr; 3627 /* Per CPU bogomips and other parameters */ 3628 struct cpuinfo_x86 cpu_data[NR_CPUS]; 3629 /* Internal processor count */ 3630 static unsigned int num_processors = 1; 3631 /* Address of the I/O apic (not yet used) */ 3632 unsigned long mp_ioapic_addr = 0xFEC00000; 3633 /* Processor that is doing the boot up */ 3634 unsigned char boot_cpu_id = 0; 3635 /* Tripped once we need to start cross invalidating */ 3636 static int smp_activated = 0; 3637 /* APIC version number */ 3638 int apic_version[NR_CPUS]; 3639 /* Just debugging the assembler.. */ 3640 unsigned long apic_retval; 3641 3642 /* Number of times the processor holds the lock */ 3643 volatile unsigned long kernel_counter=0; 3644 /* Number of times the processor holds the syscall lock*/ 3645 volatile unsigned long syscall_count=0; 3646 3647 /* Number of IPIs delivered */ 3648 volatile unsigned long ipi_count; 3649 3650 const char lk_lockmsg[] = 3651 "lock from interrupt context at %p\n"; 3652 3653 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; 3654 extern int mp_irq_entries; 3655 extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 3656 extern int mpc_default_type; 3657 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; 3658 int mp_current_pci_id = 0; 3659 unsigned long mp_lapic_addr = 0; 3660 /* 1 if "noapic" boot option passed */ 3661 int skip_ioapic_setup = 0; 3662 3663 /* #define SMP_DEBUG */ 3664 3665 #ifdef SMP_DEBUG 3666 #define SMP_PRINTK(x) printk x 3667 #else 3668 #define SMP_PRINTK(x) 3669 #endif 3670 3671 /* IA s/w dev Vol 3, Section 7.4 */ 3672 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 3673 3674 /* Reads and clears the Pentium Timestamp-Counter */ 3675 #define READ_TSC(x) __asm__ __volatile__ ( "rdtsc" \ 3676 :"=a" (((unsigned long*)&(x))[0]), \ 3677 "=d" (((unsigned long*)&(x))[1])) 3678 3679 #define CLEAR_TSC \ 3680 __asm__ __volatile__ ("\t.byte 0x0f, 0x30;\n":: \ 3681 "a"(0x00001000), "d"(0x00001000), "c"(0x10):"memory") 3682 3683 /* Setup routine for controlling SMP activation 3684 * 3685 * Command-line option of "nosmp" or "maxcpus=0" 3686 * will disable SMP activation entirely (the MPS 3687 * table probe still happens, though). 3688 * 3689 * Command-line option of "maxcpus=", where 3690 * is an integer greater than 0, limits the 3691 * maximum number of CPUs activated in SMP mode to 3692 * . */ 3693 3694 void __init smp_setup(char *str, int *ints) 3695 { 3696 if (ints && ints[0] > 0) 3697 max_cpus = ints[1]; 3698 else 3699 max_cpus = 0; 3700 } 3701 3702 void ack_APIC_irq(void) 3703 { 3704 /* Clear the IPI */ 3705 3706 /* Dummy read */ 3707 apic_read(APIC_SPIV); 3708 3709 /* Docs say use 0 for future compatibility */ 3710 apic_write(APIC_EOI, 0); 3711 } 3712 3713 /* Intel MP BIOS table parsing routines: */ 3714 3715 #ifndef CONFIG_X86_VISWS_APIC 3716 /* Checksum an MP configuration block. */ 3717 3718 static int mpf_checksum(unsigned char *mp, int len) 3719 { 3720 int sum=0; 3721 while(len--) 3722 sum+=*mp++; 3723 return sum&0xFF; 3724 } 3725 3726 /* Processor encoding in an MP configuration block */ 3727 3728 static char *mpc_family(int family,int model) 3729 { 3730 static char n[32]; 3731 static char *model_defs[]= 3732 { 3733 "80486DX","80486DX", 3734 "80486SX","80486DX/2 or 80487", 3735 "80486SL","Intel5X2(tm)", 3736 "Unknown","Unknown", 3737 "80486DX/4" 3738 }; 3739 if (family==0x6) 3740 return("Pentium(tm) Pro"); 3741 if (family==0x5) 3742 return("Pentium(tm)"); 3743 if (family==0x0F && model==0x0F) 3744 return("Special controller"); 3745 if (family==0x04 && model<9) 3746 return model_defs[model]; 3747 sprintf(n,"Unknown CPU [%d:%d]",family, model); 3748 return n; 3749 } 3750 3751 /* Read the MPC */ 3752 3753 static int __init 3754 smp_read_mpc(struct mp_config_table *mpc) 3755 { 3756 char str[16]; 3757 int count=sizeof(*mpc); 3758 int ioapics = 0; 3759 unsigned char *mpt=((unsigned char *)mpc)+count; 3760 3761 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) 3762 { 3763 panic("SMP mptable: bad signature [%c%c%c%c]!\n", 3764 mpc->mpc_signature[0], 3765 mpc->mpc_signature[1], 3766 mpc->mpc_signature[2], 3767 mpc->mpc_signature[3]); 3768 return 1; 3769 } 3770 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) 3771 { 3772 panic("SMP mptable: checksum error!\n"); 3773 return 1; 3774 } 3775 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) 3776 { 3777 printk("Bad Config Table version (%d)!!\n", 3778 mpc->mpc_spec); 3779 return 1; 3780 } 3781 memcpy(str,mpc->mpc_oem,8); 3782 str[8]=0; 3783 memcpy(ioapic_OEM_ID,str,9); 3784 printk("OEM ID: %s ",str); 3785 3786 memcpy(str,mpc->mpc_productid,12); 3787 str[12]=0; 3788 memcpy(ioapic_Product_ID,str,13); 3789 printk("Product ID: %s ",str); 3790 3791 printk("APIC at: 0x%lX\n",mpc->mpc_lapic); 3792 3793 /* save the local APIC address, it might be 3794 * non-default */ 3795 mp_lapic_addr = mpc->mpc_lapic; 3796 3797 /* Now process the configuration blocks. */ 3798 3799 while(countmpc_length) 3800 { 3801 switch(*mpt) 3802 { 3803 case MP_PROCESSOR: 3804 { 3805 struct mpc_config_processor *m= 3806 (struct mpc_config_processor *)mpt; 3807 if (m->mpc_cpuflag&CPU_ENABLED) 3808 { 3809 printk("Processor #%d %s APIC version %d\n", 3810 m->mpc_apicid, 3811 mpc_family((m->mpc_cpufeature& 3812 CPU_FAMILY_MASK)>>8, 3813 (m->mpc_cpufeature& 3814 CPU_MODEL_MASK)>>4), 3815 m->mpc_apicver); 3816 #ifdef SMP_DEBUG 3817 if (m->mpc_featureflag&(1<<0)) 3818 printk(" Floating point unit present.\n"); 3819 if (m->mpc_featureflag&(1<<7)) 3820 printk(" Machine Exception supported.\n"); 3821 if (m->mpc_featureflag&(1<<8)) 3822 printk(" 64 bit compare & exchange " 3823 "supported.\n"); 3824 if (m->mpc_featureflag&(1<<9)) 3825 printk(" Internal APIC present.\n"); 3826 #endif 3827 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR) 3828 { 3829 SMP_PRINTK((" Bootup CPU\n")); 3830 boot_cpu_id=m->mpc_apicid; 3831 } 3832 else /* Boot CPU already counted */ 3833 num_processors++; 3834 3835 if (m->mpc_apicid>NR_CPUS) 3836 printk("Processor #%d unused. (Max %d " 3837 "processors).\n",m->mpc_apicid, NR_CPUS); 3838 else 3839 { 3840 int ver = m->mpc_apicver; 3841 3842 cpu_present_map|=(1<mpc_apicid); 3843 /* Validate version */ 3844 if (ver == 0x0) { 3845 printk("BIOS bug, APIC version is 0 for " 3846 "CPU#%d! fixing up to 0x10. (tell " 3847 "your hw vendor)\n", m->mpc_apicid); 3848 ver = 0x10; 3849 } 3850 apic_version[m->mpc_apicid] = ver; 3851 } 3852 } 3853 mpt+=sizeof(*m); 3854 count+=sizeof(*m); 3855 break; 3856 } 3857 case MP_BUS: 3858 { 3859 struct mpc_config_bus *m= 3860 (struct mpc_config_bus *)mpt; 3861 memcpy(str,m->mpc_bustype,6); 3862 str[6]=0; 3863 SMP_PRINTK(("Bus #%d is %s\n", 3864 m->mpc_busid, 3865 str)); 3866 if ((strncmp(m->mpc_bustype,"ISA",3) == 0) || 3867 (strncmp(m->mpc_bustype,"EISA",4) == 0)) 3868 mp_bus_id_to_type[m->mpc_busid] = 3869 MP_BUS_ISA; 3870 else 3871 if (strncmp(m->mpc_bustype,"PCI",3) == 0) { 3872 mp_bus_id_to_type[m->mpc_busid] = 3873 MP_BUS_PCI; 3874 mp_bus_id_to_pci_bus[m->mpc_busid] = 3875 mp_current_pci_id; 3876 mp_current_pci_id++; 3877 } 3878 mpt+=sizeof(*m); 3879 count+=sizeof(*m); 3880 break; 3881 } 3882 case MP_IOAPIC: 3883 { 3884 struct mpc_config_ioapic *m= 3885 (struct mpc_config_ioapic *)mpt; 3886 if (m->mpc_flags&MPC_APIC_USABLE) 3887 { 3888 ioapics++; 3889 printk("I/O APIC #%d Version %d at 0x%lX.\n", 3890 m->mpc_apicid,m->mpc_apicver, 3891 m->mpc_apicaddr); 3892 /* we use the first one only currently */ 3893 if (ioapics == 1) 3894 mp_ioapic_addr = m->mpc_apicaddr; 3895 } 3896 mpt+=sizeof(*m); 3897 count+=sizeof(*m); 3898 break; 3899 } 3900 case MP_INTSRC: 3901 { 3902 struct mpc_config_intsrc *m= 3903 (struct mpc_config_intsrc *)mpt; 3904 3905 mp_irqs [mp_irq_entries] = *m; 3906 if (++mp_irq_entries == MAX_IRQ_SOURCES) { 3907 printk("Max irq sources exceeded!!\n"); 3908 printk("Skipping remaining sources.\n"); 3909 --mp_irq_entries; 3910 } 3911 3912 mpt+=sizeof(*m); 3913 count+=sizeof(*m); 3914 break; 3915 } 3916 case MP_LINTSRC: 3917 { 3918 struct mpc_config_intlocal *m= 3919 (struct mpc_config_intlocal *)mpt; 3920 mpt+=sizeof(*m); 3921 count+=sizeof(*m); 3922 break; 3923 } 3924 } 3925 } 3926 if (ioapics > 1) 3927 { 3928 printk("Warning: " 3929 "Multiple IO-APICs not yet supported.\n"); 3930 printk("Warning: switching to non APIC mode.\n"); 3931 skip_ioapic_setup=1; 3932 } 3933 return num_processors; 3934 } 3935 3936 /* Scan the memory blocks for an SMP configuration block. 3937 */ 3938 3939 static int __init smp_scan_config(unsigned long base, 3940 unsigned long length) 3941 { 3942 unsigned long *bp=phys_to_virt(base); 3943 struct intel_mp_floating *mpf; 3944 3945 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n", 3946 bp,length)); 3947 if (sizeof(*mpf)!=16) 3948 printk("Error: MPF size\n"); 3949 3950 while (length>0) 3951 { 3952 if (*bp==SMP_MAGIC_IDENT) 3953 { 3954 mpf=(struct intel_mp_floating *)bp; 3955 if (mpf->mpf_length==1 && 3956 !mpf_checksum((unsigned char *)bp,16) && 3957 (mpf->mpf_specification == 1 3958 || mpf->mpf_specification == 4) ) 3959 { 3960 printk("Intel MultiProcessor Specification " 3961 "v1.%d\n", mpf->mpf_specification); 3962 if (mpf->mpf_feature2&(1<<7)) 3963 printk(" IMCR and PIC " 3964 "compatibility mode.\n"); 3965