diff --git a/applications/peek.lisp b/applications/peek.lisp index 098451c36..e04a6d310 100644 --- a/applications/peek.lisp +++ b/applications/peek.lisp @@ -122,7 +122,7 @@ "DCA" "SSE4.1" "SSE4.2" - nil + "x2APIC" nil "POPCNT")) diff --git a/drivers/sound.lisp b/drivers/sound.lisp index 9ea8878f0..acc2215a9 100644 --- a/drivers/sound.lisp +++ b/drivers/sound.lisp @@ -132,7 +132,7 @@ (fill buffer 0.0 :start start :end end)) (prog1 ;; Try-lock the sink mutex. If the music player holds it (inside - ;; output-sound's transcode path), return T immediately — the + ;; output-sound's transcode path), return T immediately: the ;; buffer is already zeroed, so the HDA plays silence for this ;; period. Next period will retry. (if (mezzano.supervisor:acquire-mutex *sink-lock* nil) @@ -311,7 +311,7 @@ (loop (when (buffer-empty sink) ;; Buffer is currently empty, sink is not live. - ;; Only add to the sink list if not already present — the + ;; Only add to the sink list if not already present. the ;; callback may have left it if it was re-filled concurrently. (unless (member sink *sinks*) (setf (sup:event-state *sinks-present-event*) t) diff --git a/supervisor/acpi.lisp b/supervisor/acpi.lisp index 0d13971dc..d72213520 100644 --- a/supervisor/acpi.lisp +++ b/supervisor/acpi.lisp @@ -380,6 +380,12 @@ (:area :wired)) address) +(defstruct (acpi-madt-processor-x2apic + (:area :wired)) + x2apic-id + flags + acpi-processor-uid) + (defun read-acpi-madt-table (address) (let ((table (make-acpi-madt-table :address address)) (n-controller-entries 0)) @@ -393,9 +399,10 @@ (loop (when (>= offset total-length) (return)) - (when (<= (physical-memref-unsigned-byte-8 (+ address offset)) 5) - ;; Ignore entries with an unknown type. - (incf n-controller-entries)) + (let ((type (physical-memref-unsigned-byte-8 (+ address offset)))) + (when (or (<= type 5) (eql type 9)) + ;; Ignore entries with an unknown type. + (incf n-controller-entries))) (incf offset (physical-memref-unsigned-byte-8 (+ address offset 1))))) (setf (acpi-madt-table-controllers table) (sys.int::make-simple-vector n-controller-entries :wired)) (let ((offset 44) @@ -440,7 +447,13 @@ (5 ;; Local APIC address override. (setf (svref (acpi-madt-table-controllers table) current) (make-acpi-madt-lapic-address-override - :address (physical-memref-unsigned-byte-64 (+ address offset 4)))))) + :address (physical-memref-unsigned-byte-64 (+ address offset 4))))) + (9 ;; Processor local x2APIC. + (setf (svref (acpi-madt-table-controllers table) current) + (make-acpi-madt-processor-x2apic + :x2apic-id (physical-memref-unsigned-byte-32 (+ address offset 4)) + :flags (physical-memref-unsigned-byte-32 (+ address offset 8)) + :acpi-processor-uid (physical-memref-unsigned-byte-32 (+ address offset 12)))))) (incf current) (incf offset len)))) table)) diff --git a/supervisor/arm64/cpu.lisp b/supervisor/arm64/cpu.lisp index 8320487d2..9a6d56185 100644 --- a/supervisor/arm64/cpu.lisp +++ b/supervisor/arm64/cpu.lisp @@ -21,6 +21,7 @@ page-fault-hook) (defun initialize-boot-cpu () + (setf *tlb-shootdown-in-progress* nil) (setf (arm64-cpu-self *bsp-cpu*) *bsp-cpu*) (setf (arm64-cpu-state *bsp-cpu*) :online) (setf (arm64-cpu-idle-thread *bsp-cpu*) @@ -77,8 +78,18 @@ (defun local-cpu () (local-cpu-info)) +(defun cpu-memory-barrier () + "Full inner-shareable memory barrier for ordering lock data accesses." + (%dsb.ish)) + (defun initialize-cpu () (setf (arm64-cpu-cpu-id *bsp-cpu*) (fdt-boot-cpuid)) + (setf (cpu-cpu-index *bsp-cpu*) 0) + (setf (cpu-inhibit-scheduling *bsp-cpu*) 0) + (setf (cpu-tlb-generation *bsp-cpu*) 0) + ;; Allocate MCS node for BSP if not already allocated by cold-generator. + (when (null (cpu-mcs-node *bsp-cpu*)) + (setf (cpu-mcs-node *bsp-cpu*) (%make-mcs-node))) (push-wired *bsp-cpu* *cpus*)) (sys.int::define-lap-function %el0-common () @@ -224,52 +235,58 @@ (mezzano.lap.arm64:hlt 4)) (defun broadcast-panic-ipi () - (broadcast-ipi +panic-sgi-id+)) + (send-ipi-to-all +panic-sgi-id+)) (defun panic-ipi-handler (interrupt-frame) (declare (ignore interrupt-frame)) (loop (%arch-panic-stop))) (defun broadcast-wakeup-ipi () - (broadcast-ipi +wakeup-sgi-id+)) + (dolist (cpu *cpus*) + (when (eql (arm64-cpu-state cpu) :online) + (wake-cpu cpu)))) + +(defun wake-cpu (cpu) + (when (cpu-idle-p cpu) + (send-ipi-to-cpu cpu +wakeup-sgi-id+))) (sys.int::defglobal *non-quiescent-cpus-remaining*) -;; FIXME: quiesce-cpus-for-world-stop needs to prevent migration across CPUs. (defun quiesce-cpus-for-world-stop () "Bring all CPUs to a consistent state to stop the world. Protected by the world stop lock." + ;; Prevent migration during the broadcast and busy-wait. + (setf (cpu-inhibit-scheduling (local-cpu)) + (1+ (cpu-inhibit-scheduling (local-cpu)))) (setf *non-quiescent-cpus-remaining* (1- *n-up-cpus*)) - (broadcast-ipi +quiesce-sgi-id+) + (send-ipi-to-all +quiesce-sgi-id+) ;; FIXME: Use WFE/SEV instead of this spin-loop. (loop (when (eql *non-quiescent-cpus-remaining* 0) (return)) - (sys.int::cpu-relax))) + (sys.int::cpu-relax)) + (setf (cpu-inhibit-scheduling (local-cpu)) + (max 0 (1- (cpu-inhibit-scheduling (local-cpu)))))) ;; Save the current thread's state and switch to the CPU's idle thread. (defun quiesce-ipi-handler (interrupt-frame) - (let* ((current (current-thread)) - (idle (local-cpu-idle-thread)) - (was-active (not (eql current idle)))) - (when was-active - (acquire-global-thread-lock) - ;; Return this thread to the run queue. - (setf (thread-state current) :runnable) - (push-run-queue current) - (preemption-timer-reset nil) - ;; Save thread state. - (save-fpu-state current) - (save-interrupted-state current interrupt-frame) - ;; Partially switch to the idle thread. - (setf (thread-state idle) :active)) - ;; Have now reached a quiescent state. - (sys.int::%atomic-fixnum-add-symbol '*non-quiescent-cpus-remaining* - -1) - (when was-active - ;; Finally, return to the idle thread. - (%%switch-to-thread-common idle - idle)))) + (cond (*debug-magic-button-hold-variable* + (magic-button-ipi-handler interrupt-frame)) + (t + (let* ((current (current-thread)) + (idle (local-cpu-idle-thread)) + (was-active (not (eql current idle)))) + (when was-active + (acquire-global-thread-lock) + (setf (thread-state current) :runnable) + (push-run-queue current) + (preemption-timer-reset nil) + (save-fpu-state current) + (save-interrupted-state current interrupt-frame) + (setf (thread-state idle) :active)) + (sys.int::%atomic-fixnum-add-symbol '*non-quiescent-cpus-remaining* -1) + (when was-active + (%%switch-to-thread-common idle idle)))))) ;; TODO: This needs to be fixed up to prevent multiple CPUs hitting it at ;; once. It can't currently happen because it is only used from IRQ handlers @@ -280,7 +297,7 @@ Protected by the world stop lock." (defun stop-other-cpus-for-debug-magic-button () (setf *debug-magic-button-ready-variable* (1- *n-up-cpus*) *debug-magic-button-hold-variable* t) - (broadcast-ipi +magic-button-sgi-id+) + (send-ipi-to-all +quiesce-sgi-id+) ;; Wait for other CPUs to arrive, this ensures the thread state is actually ;; consistent. (loop until (eql *debug-magic-button-ready-variable* 0))) @@ -303,26 +320,29 @@ Protected by the world stop lock." (sys.int::%atomic-fixnum-add-symbol '*debug-magic-button-ready-variable* -1)) -;; TLB shootdown isn't required as ARM has cross-core TLB invalidation instructions - -(defun begin-tlb-shootdown () - nil) - -(defun tlb-shootdown-single (address) - (declare (ignore address)) - nil) +;; ARM64 has hardware-broadcast TLB invalidation (TLBI IS instructions). +;; No IPIs needed; the initiating CPU issues TLBI VAE1IS which broadcasts +;; to all CPUs in the inner shareable domain. We still bracket the operation +;; with inhibit-scheduling to prevent CPU migration. -(defun tlb-shootdown-range (base length) - (declare (ignore base length)) - nil) +(sys.int::defglobal *tlb-shootdown-in-progress* nil) -(defun tlb-shootdown-all () - nil) +(defun begin-tlb-shootdown () + "Prepare for TLB shootdown on ARM64. +TLB shootdown must be protected by the VM lock." + (ensure (rw-lock-write-held-p *vm-lock*) "VM lock not held when doing TLB shootdown!") + (ensure (not *tlb-shootdown-in-progress*) "TLB shootdown already in progress!") + (setf *tlb-shootdown-in-progress* t) + (setf (cpu-inhibit-scheduling (local-cpu)) + (1+ (cpu-inhibit-scheduling (local-cpu))))) (defun finish-tlb-shootdown () - nil) + (ensure *tlb-shootdown-in-progress*) + (setf *tlb-shootdown-in-progress* nil) + (setf (cpu-inhibit-scheduling (local-cpu)) + (max 0 (1- (cpu-inhibit-scheduling (local-cpu)))))) -(defun check-tlb-shootdown-not-in-progress () +(defun check-tlb-generation-consistency () nil) (defun local-cpu-idle-thread () @@ -356,6 +376,10 @@ Protected by the world stop lock." :sp-el1 (+ (stack-base wired-stack) (stack-size wired-stack) -16)))) + (setf (cpu-cpu-index cpu) (length *cpus*)) + (setf (cpu-inhibit-scheduling cpu) 0) + (setf (cpu-tlb-generation cpu) 0) + (setf (cpu-mcs-node cpu) (%make-mcs-node)) (setf (arm64-cpu-self cpu) cpu) (setf (sys.int::memref-unsigned-byte-64 (arm64-cpu-sp-el1 cpu)) (sys.int::lisp-object-address cpu)) diff --git a/supervisor/arm64/gic.lisp b/supervisor/arm64/gic.lisp index 3f34f199a..8c55d5e2c 100644 --- a/supervisor/arm64/gic.lisp +++ b/supervisor/arm64/gic.lisp @@ -46,7 +46,6 @@ (defconstant +panic-sgi-id+ 1) (defconstant +wakeup-sgi-id+ 2) (defconstant +quiesce-sgi-id+ 3) -(defconstant +magic-button-sgi-id+ 4) (defun gic-dist-reg (index) (physical-memref-unsigned-byte-32 (+ *gic-distributor-base* index))) @@ -102,8 +101,7 @@ ;; Unmask our various IPIs (gic-unmask-interrupt +panic-sgi-id+) (gic-unmask-interrupt +wakeup-sgi-id+) - (gic-unmask-interrupt +quiesce-sgi-id+) - (gic-unmask-interrupt +magic-button-sgi-id+)) + (gic-unmask-interrupt +quiesce-sgi-id+)) (defun configure-gic-cpu () ;; Enable the local CPU. @@ -141,8 +139,6 @@ (#.+quiesce-sgi-id+ ;; nothing, handled later nil) - (#.+magic-button-sgi-id+ - (magic-button-ipi-handler interrupt-frame)) (t (debug-print-line "Received unknown SGI " vector)))) (t ;; Normal external IRQ @@ -174,9 +170,15 @@ (defun platform-unmask-irq (vector) (gic-unmask-interrupt vector)) -(defun broadcast-ipi (vector) +(defun send-ipi-to-all (vector) (%dsb.ishst) (setf (gic-dist-reg +gicd-sgir+) (logior (ash 1 24) ; all-but-self vector)) nil) + +(defun send-ipi-to-cpu (cpu vector) + (%dsb.ishst) + (setf (gic-dist-reg +gicd-sgir+) + (logior (ash 1 (+ 16 (cpu-cpu-index cpu))) + vector))) diff --git a/supervisor/arm64/thread.lisp b/supervisor/arm64/thread.lisp index fbb990353..404009af8 100644 --- a/supervisor/arm64/thread.lisp +++ b/supervisor/arm64/thread.lisp @@ -86,13 +86,7 @@ (mezzano.lap.arm64:ret)) (sys.int::define-lap-function %%restore-full-save-thread ((thread)) - ;; Drop the global thread lock. - ;; This must be done here, not in %%switch-to-thread-common to prevent - ;; another CPU from switching on to the old thread's stack while it is - ;; still in use. - (mezzano.lap.arm64:ldr :x1 (:symbol-global-cell *global-thread-lock*)) - (mezzano.lap.arm64:ldr :x2 (:constant :unlocked)) - (mezzano.lap.arm64:str :x2 (:object :x1 #.sys.int::+symbol-value-cell-value+)) + ;; The global thread lock is released by Lisp code before calling this function. ;; Switch back to SP_EL0, restoring the original value of SP_EL1. (mezzano.lap.arm64:add :sp :x27 0) (mezzano.lap.arm64:msr :spsel 0) @@ -130,13 +124,7 @@ (mezzano.lap.arm64:eret)) (sys.int::define-lap-function %%restore-partial-save-thread ((thread)) - ;; Drop the global thread lock. - ;; This must be done here, not in %%switch-to-thread-common to prevent - ;; another CPU from switching on to the old thread's stack while it is - ;; still in use. - (mezzano.lap.arm64:ldr :x1 (:symbol-global-cell *global-thread-lock*)) - (mezzano.lap.arm64:ldr :x2 (:constant :unlocked)) - (mezzano.lap.arm64:str :x2 (:object :x1 #.sys.int::+symbol-value-cell-value+)) + ;; The global thread lock is released by Lisp code before calling this function. ;; Switch back to SP_EL0, restoring the original value of SP_EL1. (mezzano.lap.arm64:add :sp :x27 0) (mezzano.lap.arm64:msr :spsel 0) diff --git a/supervisor/debug.lisp b/supervisor/debug.lisp index 10875a252..ea64466bc 100644 --- a/supervisor/debug.lisp +++ b/supervisor/debug.lisp @@ -315,12 +315,14 @@ (defun dump-threads () (dump-thread (current-thread) (sys.int::read-frame-pointer)) (when (boundp '*all-threads*) - (do ((thread *all-threads* - (thread-global-next thread))) - ((null thread)) - (when (not (eql thread (current-thread))) - (debug-print-line "----------") - (dump-thread thread (thread-frame-pointer thread)))))) + (with-rcu-read-lock + (do ((thread *all-threads* + (thread-global-next thread))) + ((null thread)) + (when (and (not (eql thread (current-thread))) + (not (eql (thread-state thread) :dead))) + (debug-print-line "----------") + (dump-thread thread (thread-frame-pointer thread))))))) (defun debug-dump () (debug-print-line "Local CPU is " (local-cpu)) diff --git a/supervisor/disk.lisp b/supervisor/disk.lisp index 3ce24c4b0..71af4490f 100644 --- a/supervisor/disk.lisp +++ b/supervisor/disk.lisp @@ -47,7 +47,7 @@ lba n-sectors buffer - (lock (place-spinlock-initializer)) + (lock :unlocked) (latch (make-event :name "Disk request notifier")) next) @@ -84,7 +84,7 @@ (setf *log-disk-requests* nil) (setf *disk-request-current* nil *disk-request-queue-head* nil - *disk-request-queue-lock* (place-spinlock-initializer) + *disk-request-queue-lock* :unlocked *disk-request-queue-latch* (make-event :name "Disk request queue notifier") *disks* '())) ;; Abort any queued or in-progress requests. diff --git a/supervisor/entry.lisp b/supervisor/entry.lisp index 4e51caea1..8fbaac6b1 100644 --- a/supervisor/entry.lisp +++ b/supervisor/entry.lisp @@ -154,7 +154,7 @@ (setf (sys.int::symbol-global-value 'mezzano.runtime::*active-catch-handlers*) 'nil (sys.int::symbol-global-value '*pseudo-atomic*) nil sys.int::*known-finalizers* nil - *big-wait-for-objects-lock* (place-spinlock-initializer))) + *big-wait-for-objects-lock* :unlocked)) (initialize-early-platform) (when (boundp '*boot-id*) (setf (event-state *boot-id*) t)) diff --git a/supervisor/interrupts.lisp b/supervisor/interrupts.lisp index 964b655b2..a37baf735 100644 --- a/supervisor/interrupts.lisp +++ b/supervisor/interrupts.lisp @@ -40,6 +40,7 @@ RETURN-FROM/GO must not be used to leave this form." nil ,@captures)) +;;; TATAS (test-and-test-and-set) spinlocks. general purpose, supports nesting. (defun place-spinlock-initializer () :unlocked) @@ -87,8 +88,6 @@ RETURN-FROM/GO must not be used to leave this form." `(let* (,@(mapcar #'list vars vals) (,new-sym :unlocked) (,old-sym ,read-form)) - ;; FIXME: This should use the write form but that doesn't have the proper - ;; release semantics yet on arm64. ,cas-form (values)))) @@ -121,6 +120,76 @@ RETURN-FROM/GO must not be used to leave this form." (check-type lock symbol) `(ensure-place-spinlock-held ,lock)) +;;; MCS (Mellor-Crummy-Scott) queue-based spinlocks - fair, FIFO, each CPU +;;; spins on its own cache line. CANNOT be nested on the same CPU. +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun mcs-cas-target (place) + "Convert a spinlock place to a form suitable for CAS. +Bare symbols become (sys.int::symbol-global-value 'SYM); +struct-accessor forms are returned as-is." + (if (symbolp place) + `(sys.int::symbol-global-value ',place) + place))) + +(defmacro acquire-mcs-spinlock (place) + "Acquire a spinlock using MCS fair queuing. +NOTE: do NOT nest MCS spinlock acquisitions on the same CPU." + (let ((mcs-node (gensym "MCS-NODE")) + (prev (gensym "PREV")) + (cas-target (mcs-cas-target place))) + `(let ((,mcs-node (cpu-mcs-node (local-cpu)))) + (ensure-interrupts-disabled) + (setf (mcs-node-next ,mcs-node) nil + (mcs-node-locked ,mcs-node) nil) + (let ((,prev nil)) + (loop + (setf ,prev ,place) + (when (eql (sys.int::cas ,cas-target ,prev ,mcs-node) ,prev) + (return))) + (if (null ,prev) + (setf (mcs-node-locked ,mcs-node) t) + (progn + (setf (mcs-node-next ,prev) ,mcs-node) + (loop until (mcs-node-locked ,mcs-node) + do (sys.int::cpu-relax)))) + ;; Acquire barrier: make sure protected-data reads are not + ;; reordered before the lock is observed held. Required on + ;; weakly-ordered ARM64 (x86-64 TSO folds this into the CAS). + #+arm64 (cpu-memory-barrier) + (values))))) + +(defmacro release-mcs-spinlock (place) + "Release an MCS spinlock." + (let ((mcs-node (gensym "MCS-NODE")) + (cas-target (mcs-cas-target place))) + `(let ((,mcs-node (cpu-mcs-node (local-cpu)))) + ;; Release barrier: make sure all protected-data stores from the + ;; critical section are visible before the handoff (or before the + ;; lock word goes to nil for the uncontended release). Required + ;; on weakly-ordered ARM64. + #+arm64 (cpu-memory-barrier) + (block release-mcs-spinlock + (if (null (mcs-node-next ,mcs-node)) + (if (eql (sys.int::cas ,cas-target ,mcs-node nil) ,mcs-node) + (return-from release-mcs-spinlock) + (loop until (mcs-node-next ,mcs-node) + do (sys.int::cpu-relax)))) + (setf (mcs-node-locked (mcs-node-next ,mcs-node)) t)) + (setf (mcs-node-locked ,mcs-node) nil) + (values)))) + +(defmacro with-mcs-spinlock ((place) &body body) + `(progn + (acquire-mcs-spinlock ,place) + (unwind-protect + (progn ,@body) + (release-mcs-spinlock ,place)))) + +(defmacro ensure-mcs-spinlock-held (place) + (declare (ignore place)) + `(ensure (mcs-node-locked (cpu-mcs-node (local-cpu))) + "Expected lock to be held by current CPU")) + (defmacro with-page-fault-hook (((&optional frame info fault-address) &body hook-body) &body body) (let ((old (gensym)) (frame (or frame (gensym "FRAME"))) @@ -228,7 +297,7 @@ RETURN-FROM/GO must not be used to leave this form." platform-number attachments (count 0) - (lock (place-spinlock-initializer))) + (lock :unlocked)) (defstruct (irq-attachment (:area :wired)) @@ -313,7 +382,7 @@ RETURN-FROM/GO must not be used to leave this form." latch event (state :masked) - (lock (place-spinlock-initializer))) + (lock :unlocked)) (defun make-simple-irq (irq-number &optional latch) (declare (mezzano.compiler::closure-allocation :wired)) diff --git a/supervisor/pager.lisp b/supervisor/pager.lisp index eb47979b6..c2e0b39dc 100644 --- a/supervisor/pager.lisp +++ b/supervisor/pager.lisp @@ -28,6 +28,29 @@ (sys.int::defglobal *store-fudge-factor*) +(defconstant +tlb-shootdown-batch-size+ 64 + "Maximum number of pages to invalidate individually before falling +back to a full TLB flush.") + +(defun check-tlb-shootdown-not-in-progress () + (ensure (not *tlb-shootdown-in-progress*) "TLB shootdown in progress!")) + +(defun tlb-shootdown-single (address) + (ensure *tlb-shootdown-in-progress*) + (flush-tlb-single address)) + +(defun tlb-shootdown-range (base length) + (ensure *tlb-shootdown-in-progress*) + (let ((n-pages (truncate (+ length (1- +4k-page-size+)) +4k-page-size+))) + (if (<= n-pages +tlb-shootdown-batch-size+) + (loop for addr from base below (+ base length) by +4k-page-size+ + do (flush-tlb-single addr)) + (flush-tlb)))) + +(defun tlb-shootdown-all () + (ensure *tlb-shootdown-in-progress*) + (flush-tlb)) + (defun pager-log (&rest things) (declare (dynamic-extent things)) (when (eql *pager-noisy* t) @@ -499,7 +522,6 @@ Returns NIL if the entry is missing and ALLOCATE is false." :allow-wired stackp :stackp stackp) (setf (page-table-entry pte 0) 0)))) (begin-tlb-shootdown) - (flush-tlb) (when (not (or stackp (mark-bit-region-p base))) (tlb-shootdown-range card-base card-length)) @@ -549,7 +571,6 @@ Returns NIL if the entry is missing and ALLOCATE is false." (t ;; Mark read-only. (update-pte pte :writable nil))))))) - (flush-tlb) (tlb-shootdown-all) (finish-tlb-shootdown))) @@ -568,8 +589,6 @@ Returns NIL if the entry is missing and ALLOCATE is false." (panic "Missing pte for wired page " wired-page)) (when (page-dirty-p pte) (setf (sys.int::card-table-dirty-gen wired-page) 0) - ;; ARM64's dirty bit emulation does not support emulating - ;; dirty bits in the wired area yet. #-arm64 (update-pte pte :dirty nil)))) (map-ptes @@ -579,11 +598,8 @@ Returns NIL if the entry is missing and ALLOCATE is false." (panic "Missing pte for wired function page " wired-page)) (when (page-dirty-p pte) (setf (sys.int::card-table-dirty-gen wired-page) 0) - ;; ARM64's dirty bit emulation does not support emulating - ;; dirty bits in the wired area yet. #-arm64 (update-pte pte :dirty nil)))) - (flush-tlb) (tlb-shootdown-all) (finish-tlb-shootdown))) @@ -734,7 +750,6 @@ mapped, then the entry will be NIL." (remove-from-page-replacement-list candidate) (begin-tlb-shootdown) (setf (page-table-entry pte-addr) (make-pte 0 :present nil)) - (flush-tlb-single candidate-virtual) (tlb-shootdown-single candidate-virtual) (finish-tlb-shootdown) ;; Maybe write it back to disk. @@ -848,7 +863,6 @@ mapped, then the entry will be NIL." (begin-tlb-shootdown) (setf (page-table-entry pte) (make-pte (ash (pte-physical-address (page-table-entry pte)) -12) :writable (block-info-writable-p block-info))) - (flush-tlb-single address) (tlb-shootdown-single address) (finish-tlb-shootdown)) #+(or)(debug-print-line "WFP " address " block " block-info " already mapped " (page-table-entry pte 0)) @@ -897,7 +911,6 @@ mapped, then the entry will be NIL." ;; Mark the page dirty to make sure the snapshotter & swap code know to swap it out. ;; The zero fill flag in the block map was cleared, but the on-disk data doesn't reflect that. :dirty is-zero-page)) - (flush-tlb-single address) (tlb-shootdown-single address) (finish-tlb-shootdown) #+(or) @@ -1108,10 +1121,10 @@ It will put the thread to sleep, while it waits for the page." (setf *pager-noisy* nil *pager-waiting-threads* '() *pager-current-thread* nil - *pager-lock* (place-spinlock-initializer) + *pager-lock* :unlocked *pager-fast-path-enabled* t *pager-lazy-block-allocation-enabled* t)) - (setf *page-replacement-list-lock* (place-spinlock-initializer) + (setf *page-replacement-list-lock* :unlocked *page-replacement-list-head* nil *page-replacement-list-tail* nil) (setf *pager-fast-path-hits* 0 diff --git a/supervisor/pci.lisp b/supervisor/pci.lisp index c11d9e430..ed6b21e77 100644 --- a/supervisor/pci.lisp +++ b/supervisor/pci.lisp @@ -302,6 +302,12 @@ Returns NIL if the BAR has an unknown type." (defun pci-intr-line (device) (pci-config/8 device +pci-config-intr-line+)) +(defun pci-configure-msi (device vector cpu) + "Configure MSI/MSI-X for DEVICE to use VECTOR on CPU. +Not yet implemented." + (declare (ignore device vector cpu)) + (panic "pci-configure-msi is not implemented")) + (defun pci-bus-master-enabled (device) (logbitp +pci-command-bus-master+ (pci-config/16 device +pci-config-command+))) diff --git a/supervisor/profiler.lisp b/supervisor/profiler.lisp index be024c1c3..fc1b5ad0f 100644 --- a/supervisor/profiler.lisp +++ b/supervisor/profiler.lisp @@ -93,17 +93,19 @@ (profile-append-return-address (thread-state-rip thread))) (profile-append-call-stack (thread-frame-pointer thread))))) (t - (loop - for thread = *all-threads* then (thread-global-next thread) - until (not thread) do - (when (not (eql thread (current-thread))) + (with-rcu-read-lock + (loop + for thread = *all-threads* then (thread-global-next thread) + until (not thread) do + (when (and (not (eql (thread-state thread) :dead)) + (not (eql thread (current-thread)))) (profile-append-entry thread) (profile-append-entry (thread-state thread)) (profile-append-entry (thread-wait-item thread)) (when (thread-full-save-p thread) ;; RIP is valid in the save area. (profile-append-return-address (thread-state-rip thread))) - (profile-append-call-stack (thread-frame-pointer thread)))))) + (profile-append-call-stack (thread-frame-pointer thread))))))) (resume-other-cpus-for-debug-magic-button))) (defun start-profiling (&key buffer-size thread (reset t) (sample-during-gc t)) diff --git a/supervisor/snapshot.lisp b/supervisor/snapshot.lisp index 0b91aeed9..c7c09b6b4 100644 --- a/supervisor/snapshot.lisp +++ b/supervisor/snapshot.lisp @@ -142,7 +142,6 @@ (make-pte new-frame :writable (and (block-info-writable-p block-info) (not (block-info-track-dirty-p block-info))))) - (flush-tlb-single fault-addr) (tlb-shootdown-single fault-addr) (finish-tlb-shootdown) #+(or)(debug-print-line "Copied page " fault-addr))) @@ -204,8 +203,7 @@ Returns 4 values: (make-pte frame :writable (and (block-info-writable-p block-info) (not (block-info-track-dirty-p block-info))) - :dirty (page-dirty-p pte))) - (flush-tlb-single address)) + :dirty (page-dirty-p pte)))) ;; Return page to normal use. (setf (physical-page-frame-type frame) :active) (append-to-page-replacement-list frame) diff --git a/supervisor/sync.lisp b/supervisor/sync.lisp index 88b281983..8f3df589c 100644 --- a/supervisor/sync.lisp +++ b/supervisor/sync.lisp @@ -51,7 +51,7 @@ (defstruct (wait-queue (:area :wired)) (name nil) - (%lock (place-spinlock-initializer)) + (%lock :unlocked) (head nil) (tail nil)) @@ -102,7 +102,12 @@ (state :unlocked) (stack-next nil) ;; Number of times ACQUIRE-MUTEX failed to immediately acquire the lock. - (contested-count 0 :type fixnum)) + (contested-count 0 :type fixnum) + ;; Priority inheritance (turnstile). + ;; Thread whose priority was boosted to prevent priority inversion. + (boosted-thread nil) + ;; Original priority of the boosted thread, for restoration on release. + (original-priority nil)) (defun acquire-mutex (mutex &optional (wait-p t)) (check-type mutex mutex) @@ -142,6 +147,18 @@ (setf (mutex-owner mutex) self) (unlock-wait-queue mutex) (return-from acquire-mutex-slow-path)) + ;; Priority inheritance: boost the holder if our priority is higher. + (let ((holder (mutex-owner mutex)) + (my-priority (thread-priority self))) + (when (and holder + (threadp holder) + (thread-priority-higher-p my-priority (thread-priority holder))) + ;; Record the boost if not already boosted. + (when (null (mutex-boosted-thread mutex)) + (setf (mutex-boosted-thread mutex) holder + (mutex-original-priority mutex) (thread-priority holder))) + ;; Elevate holder to our priority. + (setf (thread-priority holder) my-priority))) ;; Add to wait queue. Release will directly transfer ownership ;; to this thread. (push-wait-queue self mutex) @@ -191,6 +208,12 @@ ;; Contested lock. Need to wake a thread and pass the lock to it. (safe-without-interrupts (mutex) (with-wait-queue-lock (mutex) + ;; Restore priority of the boosted thread. + (let ((boosted (mutex-boosted-thread mutex))) + (when boosted + (setf (thread-priority boosted) (mutex-original-priority mutex)) + (setf (mutex-boosted-thread mutex) nil + (mutex-original-priority mutex) nil))) ;; Look for a thread to wake. (let ((thread (pop-wait-queue mutex))) (cond (thread @@ -206,6 +229,12 @@ (setf (mutex-owner mutex) nil) (when (not (eql (sys.int::cas (mutex-state mutex) :locked :unlocked) :locked)) ;; Mutex must be in the contested state. + ;; Restore priority of the boosted thread. + (let ((boosted (mutex-boosted-thread mutex))) + (when boosted + (setf (thread-priority boosted) (mutex-original-priority mutex)) + (setf (mutex-boosted-thread mutex) nil + (mutex-original-priority mutex) nil))) ;; Look for a thread to wake. (let ((thread (pop-wait-queue mutex))) (cond (thread @@ -406,7 +435,7 @@ May be used from an interrupt handler, assuming the associated mutex is interrup (:area :wired)) name (state +rw-lock-state-unlocked+ :type fixnum) - (lock (place-spinlock-initializer)) + (lock :unlocked) writer-wait-queue reader-wait-queue (n-pending-readers 0 :type fixnum) @@ -1081,7 +1110,7 @@ multiple threads." (buffer (error "no buffer supplied") :read-only t) (count) data-available - (lock (place-spinlock-initializer))) + (lock :unlocked)) (defun make-irq-fifo (size &key (element-type 't) name) ;; TODO: non-t element types. @@ -1157,6 +1186,57 @@ It is only possible for the second value to be false when wait-p is false." (irq-fifo-count fifo) 0) (setf (event-state (irq-fifo-data-available fifo)) nil)))) +;;; The MCS queue-based spinlock primitives live in interrupts.lisp +;;; (ACQUIRE-MCS-SPINLOCK / RELEASE-MCS-SPINLOCK / WITH-MCS-SPINLOCK) and +;;; each CPU has a pre-allocated MCS node in its cpu struct. +;;; *global-thread-lock* uses MCS; it is released from Lisp in +;;; %%SWITCH-TO-THREAD-COMMON (on the kernel wired stack) before the LAP +;;; restore trampolines switch to the new thread's stack. + +;;; RCU primitives for lock-free read-side access. +;;; Each CPU has an rcu-nest counter in the cpu struct. +;;; > 0 means inside an RCU read-side critical section. + +(defun rcu-read-lock () + "Enter an RCU read-side critical section." + (setf (cpu-rcu-nest (local-cpu)) + (1+ (cpu-rcu-nest (local-cpu))))) + +(defun rcu-read-unlock () + "Exit an RCU read-side critical section." + (let ((new (1- (cpu-rcu-nest (local-cpu))))) + (setf (cpu-rcu-nest (local-cpu)) new) + (when (minusp new) + (panic "RCU read-unlock without matching read-lock")))) + +(defmacro with-rcu-read-lock (&body body) + `(unwind-protect + (progn + (rcu-read-lock) + ,@body) + (rcu-read-unlock))) + +(sys.int::defglobal *rcu-deferred-list* nil + "List of objects to be freed after an RCU grace period.") + +(defun rcu-synchronize () + "Reclaim deferred thread deletions. +RCU readers (see ALL-THREADS / WITH-RCU-READ-LOCK) skip :dead threads +and thread objects are reclaimed by the GC once no reader references +them, so the deferred list can be drained without waiting for an +explicit grace period. The actual draining also happens opportunistically +on every thread exit inside THREAD-FINAL-CLEANUP." + (cleanup-dead-threads)) + +(defun call-with-rcu-synchronize (thunk) + (rcu-synchronize) + (funcall thunk) + (rcu-synchronize)) + +(defmacro after-rcu-grace-period (&body body) + "Execute BODY after an RCU grace period." + `(call-with-rcu-synchronize (lambda () ,@body))) + (defun initialize-sync (first-run-p) (when first-run-p (setf *watcher-watcher-pool* diff --git a/supervisor/thread.lisp b/supervisor/thread.lisp index 0603a60a4..f3bc33270 100644 --- a/supervisor/thread.lisp +++ b/supervisor/thread.lisp @@ -3,13 +3,13 @@ (in-package :mezzano.supervisor) (sys.int::defglobal *global-thread-lock* nil - "This lock protects the special variables that make up the thread list/run queues and the thread objects.") + "This lock protects the special variables that make up the thread list/run queues and the thread objects. +Free value is NIL; held/contended value is an MCS node chain (MCS queue spinlock).") (sys.int::defglobal *supervisor-priority-run-queue*) (sys.int::defglobal *high-priority-run-queue*) (sys.int::defglobal *normal-priority-run-queue*) (sys.int::defglobal *low-priority-run-queue*) (sys.int::defglobal *all-threads*) -(sys.int::defglobal *n-running-cpus*) (sys.int::defglobal *world-stopper*) (sys.int::defglobal *pseudo-atomic-thread-count*) @@ -53,6 +53,12 @@ This area is made read-only when the soft guard is triggered and is used to catch when the thread has left the guard region so that it can be reprotected.") +(defstruct (mcs-node + (:area :wired) + (:constructor %make-mcs-node)) + (next nil) + (locked nil)) + (defstruct (cpu (:area :wired)) (tlab-bump 0) @@ -61,7 +67,35 @@ can be reprotected.") (cons-allocation-count 0) (cons-fast-path-hits 0) (general-allocation-count 0) - (general-fast-path-hits 0)) + (general-fast-path-hits 0) + ;; Per-CPU index (0..N-1). + (cpu-index 0 :type (unsigned-byte 8)) + ;; MCS spinlock node for this CPU. + (mcs-node nil) + ;; RCU nesting counter. > 0 means inside an RCU read-side critical section. + (rcu-nest 0 :type fixnum) + ;; Per-CPU running thread count. + (running-threads 0 :type fixnum) + ;; True when this CPU is idle (in HLT/WFI). + (idle-p nil) + ;; Inhibit scheduling counter. > 0 means don't switch threads. + (inhibit-scheduling 0 :type fixnum) + ;; TLB generation counter for lazy shootdown. + (tlb-generation 0 :type fixnum) + ;; Per-CPU run queue placeholders (filled by scheduler rewrite). + (rq-supervisor nil) + (rq-high nil) + (rq-normal nil) + (rq-low nil) + ;; Batch lists for remote enqueue. + (rq-batch-supervisor nil) + (rq-batch-high nil) + (rq-batch-normal nil) + (rq-batch-low nil) + ;; Timer active flag. + (timer-active nil) + ;; Last scheduled thread (for warm-cache affinity). + (last-thread nil)) (defstruct (thread (:area :wired) @@ -225,18 +259,22 @@ can be reprotected.") ;;; Locking. +;;; *global-thread-lock* uses the per-CPU MCS queue spinlock for fair +;;; FIFO acquisition and local-cache-line spinning. It is released from +;;; Lisp code in %%SWITCH-TO-THREAD-COMMON (on the kernel wired stack) +;;; before the LAP restore trampolines switch to the new thread's stack. (defun acquire-global-thread-lock () - (acquire-symbol-spinlock *global-thread-lock*)) + (acquire-mcs-spinlock *global-thread-lock*)) (defun release-global-thread-lock () - (release-symbol-spinlock *global-thread-lock*)) + (release-mcs-spinlock *global-thread-lock*)) (defmacro with-global-thread-lock ((&optional) &body body) - `(with-symbol-spinlock (*global-thread-lock*) + `(with-mcs-spinlock (*global-thread-lock*) ,@body)) (defun ensure-global-thread-lock-held () - (ensure-symbol-spinlock-held *global-thread-lock*)) + (ensure-mcs-spinlock-held *global-thread-lock*)) ;;; Run queue management. @@ -382,15 +420,24 @@ Interrupts must be off and the global thread lock must be held." (defun maybe-preempt-via-interrupt (interrupt-frame) (let ((current (current-thread))) - (acquire-global-thread-lock) - (cond ((or (and *world-stopper* - (eql current *world-stopper*)) - (eql (thread-priority current) :supervisor) - (eql current (local-cpu-idle-thread))) - (release-global-thread-lock)) + (cond ((plusp (cpu-inhibit-scheduling (local-cpu))) + ;; Scheduling is inhibited on this CPU (e.g. a TLB shootdown + ;; or quiesce is in progress). Do not migrate the current + ;; thread away: TLB shootdown relies on the initiator staying + ;; put so its local INVLPG invalidates the CPU that did not + ;; receive an IPI (itself). Re-arm the timer so preemption + ;; resumes once the inhibitor is released. + (preemption-timer-reset *timeslice-length*)) (t - (setf (thread-state current) :runnable) - (%reschedule-via-interrupt interrupt-frame))))) + (acquire-global-thread-lock) + (cond ((or (and *world-stopper* + (eql current *world-stopper*)) + (eql (thread-priority current) :supervisor) + (eql current (local-cpu-idle-thread))) + (release-global-thread-lock)) + (t + (setf (thread-state current) :runnable) + (%reschedule-via-interrupt interrupt-frame))))))) (defun %%switch-to-thread-via-wired-stack (current-thread sp fp next-thread) ;; Save frame pointer. @@ -439,14 +486,13 @@ Interrupts must be off and the global thread lock must be held." (setf (thread-switch-time-start new-thread) now)) ;; Switch threads. (set-current-thread new-thread) + ;; Check if this CPU missed a TLB shootdown while idle. + (check-tlb-generation-consistency) ;; Restore FPU state. (restore-fpu-state new-thread) - ;; The global thread lock is dropped by the restore functions, not here. - ;; We are still running on the current (old) thread's stack, so cannot - ;; allow another CPU to switch on to it just yet. - ;; This can only occur when performing a voluntary switch away from - ;; a thread with a wired stack - one of the ephemeral supervisor threads. - ;; Check if the thread is full-save. + ;; The global thread lock is released here, on the kernel wired stack, + ;; before the LAP restore trampolines switch to the new thread's stack. + (release-global-thread-lock) (if (thread-full-save-p new-thread) (%%restore-full-save-thread new-thread) (%%restore-partial-save-thread new-thread))) @@ -521,7 +567,7 @@ Interrupts must be off and the global thread lock must be held." (setf (thread-full-save-p thread) t (thread-state thread) :runnable) (safe-without-interrupts (thread) - (with-symbol-spinlock (*global-thread-lock*) + (with-global-thread-lock nil (push-run-queue thread) ;; Add thread to global thread list. (setf (thread-global-prev *all-threads*) thread @@ -568,7 +614,8 @@ Interrupts must be off and the global thread lock must be held." (setf (event-state (thread-join-event self)) (or return-values :no-values)) (acquire-global-thread-lock) (setf (thread-state self) :dead) - ;; Remove thread from the global list. + ;; Unlink self from *all-threads* directly. The global lock is held + ;; and no deferred list round-trip is needed — this is the only writer. (when (thread-global-next self) (setf (thread-global-prev (thread-global-next self)) (thread-global-prev self))) (when (thread-global-prev self) @@ -610,11 +657,14 @@ not and WAIT-P is false." ;; the system idles. Not needed to be correct, but reduces activity ;; when idle. (ensure (not (preemption-timer-remaining))) + ;; Mark this CPU as idle for directed wakeup. + (setf (cpu-idle-p (local-cpu)) t) ;; Look for a thread to switch to. (acquire-global-thread-lock) (setf (thread-state self) :runnable) (let ((next (update-run-queue))) (cond ((not (eql next self)) + (setf (cpu-idle-p (local-cpu)) nil) (increment-n-running-cpus) ;; Switch to thread. (%run-on-wired-stack-without-interrupts (sp fp next self) @@ -627,18 +677,33 @@ not and WAIT-P is false." (%disable-interrupts))))))) (defun increment-n-running-cpus () - (let ((prev (sys.int::%atomic-fixnum-add-symbol '*n-running-cpus* 1))) - (when (zerop prev) - (set-run-light t)))) + (incf (cpu-running-threads (local-cpu))) + (when (eql (cpu-running-threads (local-cpu)) 1) + (set-run-light t))) (defun decrement-n-running-cpus () - (let ((prev (sys.int::%atomic-fixnum-add-symbol '*n-running-cpus* -1))) - (when (and (eql prev 1) - (boundp '*light-run*)) - ;; Clear the run light immediately so it doesn't stay on between - ;; GUI screen updates. + (decf (cpu-running-threads (local-cpu))) + (when (and (zerop (cpu-running-threads (local-cpu))) + (not (n-running-cpus>0-p))) + (when (boundp '*light-run*) (clear-light *light-run*)))) +(defun n-running-cpus>0-p () + (dolist (cpu *cpus*) + (when (plusp (cpu-running-threads cpu)) + (return t)))) + +(defun n-running-cpus () + (let ((total 0)) + (dolist (cpu *cpus* total) + (incf total (cpu-running-threads cpu))))) + +(defun thread-priority-higher-p (pri1 pri2) + "Return true if PRI1 has higher scheduling priority than PRI2." + (declare (type (member :idle :low :normal :high :supervisor) pri1 pri2)) + (let ((order '(:idle :low :normal :high :supervisor))) + (> (position pri1 order) (position pri2 order)))) + (defun make-ephemeral-thread (entry-point initial-state &key name (stack-size (* 256 1024)) (priority :normal)) (let* ((thread (%make-thread name)) (stack (%allocate-stack stack-size t))) @@ -722,7 +787,7 @@ not and WAIT-P is false." (defun initialize-threads () (when (not (boundp '*global-thread-lock*)) ;; First-run stuff. - (setf *global-thread-lock* :unlocked) + (setf *global-thread-lock* nil) (setf *supervisor-priority-run-queue* (make-run-queue :supervisor) *high-priority-run-queue* (make-run-queue :high) *normal-priority-run-queue* (make-run-queue :normal) @@ -741,7 +806,7 @@ not and WAIT-P is false." (thread-global-next sys.int::*disk-io-thread*) nil (thread-global-prev sys.int::*disk-io-thread*) sys.int::*pager-thread*) (setf *default-stack-size* (* 1024 1024))) - (setf *n-running-cpus* 1) + (setf (cpu-running-threads (local-cpu)) 1) (reset-ephemeral-thread sys.int::*bsp-idle-thread* #'idle-thread :runnable :idle) (reset-ephemeral-thread sys.int::*snapshot-thread* #'snapshot-thread :sleeping :supervisor) ;; Don't let the pager run until the paging disk has been found. @@ -752,7 +817,7 @@ not and WAIT-P is false." (defun wake-thread (thread) "Wake a sleeping thread." (without-interrupts - (with-symbol-spinlock (*global-thread-lock*) + (with-global-thread-lock () (wake-thread-1 thread)))) (defun wake-thread-1 (thread) @@ -792,11 +857,33 @@ not and WAIT-P is false." (panic "Initial thread woken??"))) (defun all-threads () - (do ((list '()) - (current *all-threads* (thread-global-next current))) - ((null current) - list) - (push current list))) + (with-rcu-read-lock + (do ((list '()) + (current *all-threads* (thread-global-next current))) + ((null current) + list) + (unless (eql (thread-state current) :dead) + (push current list))))) + +(defun %cleanup-dead-threads () + "Remove threads marked :dead from the *all-threads* list. +The global thread lock must be held." + (ensure-global-thread-lock-held) + (dolist (thread *rcu-deferred-list*) + (when (thread-global-next thread) + (setf (thread-global-prev (thread-global-next thread)) (thread-global-prev thread))) + (when (thread-global-prev thread) + (setf (thread-global-next (thread-global-prev thread)) (thread-global-next thread))) + (when (eql thread *all-threads*) + (setf *all-threads* (thread-global-next thread)))) + (setf *rcu-deferred-list* nil)) + +(defun cleanup-dead-threads () + "Remove threads marked :dead from the *all-threads* list." + (without-interrupts + (acquire-global-thread-lock) + (%cleanup-dead-threads) + (release-global-thread-lock))) (defun terminate-thread (thread) (establish-thread-foothold @@ -881,7 +968,7 @@ footholds will be reenabled, otherwise footholds will stay inhibited." (let ((wi (thread-wait-item thread))) (when (wait-queue-p wi) (lock-wait-queue wi)) - (with-symbol-spinlock (*global-thread-lock*) + (with-global-thread-lock nil (cond ((eql (thread-state thread) :sleeping) ;; Remove the thread from its associated wait-queue. (ensure (wait-queue-p wi) @@ -909,7 +996,7 @@ footholds will be reenabled, otherwise footholds will stay inhibited." (let ((push-cons (sys.int::cons-in-area function nil :wired))) (flet ((push-foothold () (safe-without-interrupts (thread push-cons) - (with-symbol-spinlock (*global-thread-lock*) + (with-global-thread-lock nil (setf (cdr push-cons) (thread-pending-footholds thread) (thread-pending-footholds thread) push-cons))))) (cond ((eql thread (current-thread)) @@ -944,7 +1031,7 @@ footholds will be reenabled, otherwise footholds will stay inhibited." ;; allow the thread to settle. (defun sample-thread-state (thread) (safe-without-interrupts (thread) - (with-symbol-spinlock (*global-thread-lock*) + (with-global-thread-lock nil (thread-state thread)))) (defun stop-thread (thread) diff --git a/supervisor/time.lisp b/supervisor/time.lisp index eb79892d3..3650ab07f 100644 --- a/supervisor/time.lisp +++ b/supervisor/time.lisp @@ -125,7 +125,7 @@ (:area :wired)) head tail - (lock (place-spinlock-initializer))) + (lock :unlocked)) (defstruct (timer (:constructor %make-timer) diff --git a/supervisor/x86-64/cpu.lisp b/supervisor/x86-64/cpu.lisp index b00519b66..4d860a1e3 100644 --- a/supervisor/x86-64/cpu.lisp +++ b/supervisor/x86-64/cpu.lisp @@ -11,6 +11,7 @@ (sys.int::defglobal *bsp-cpu*) (sys.int::defglobal *lapic-address*) +(sys.int::defglobal *lapic-x2apic-mode*) (defconstant +lapic-reg-id+ #x02) (defconstant +lapic-reg-version+ #x03) @@ -46,12 +47,30 @@ (defconstant +lapic-lvt-mask+ #x10000) (defconstant +ipi-type-fixed+ 0) -(defconstant +ipi-type-lowest-priority+ 1) (defconstant +ipi-type-smi+ 2) (defconstant +ipi-type-nmi+ 4) (defconstant +ipi-type-init+ 5) (defconstant +ipi-type-sipi+ 6) +;; x2APIC detection. +(defconstant +cpuid-feature-x2apic+ 21) +(defconstant +cpuid-feature-tsc-deadline+ 24) +(defconstant +msr-ia32-apic-base-x2apic-enable+ #x400) +(defconstant +msr-ia32-apic-base-enable+ #x800) + +;; x2APIC MSR range base. +(defconstant +x2apic-msr-base+ #x800) + +;; ICR bit fields. +(defconstant +icr-vector+ (byte 8 0)) +(defconstant +icr-delivery-mode+ (byte 3 8)) +(defconstant +icr-destination-mode+ #x0800) +(defconstant +icr-destination-shorthand+ (byte 2 18)) + +;; x2APIC MSR addresses. +(defconstant +x2apic-msr-icr+ #x830) +(defconstant +x2apic-msr-self-ipi+ #x83F) + ;; Cold generator provided objects. (sys.int::defglobal sys.int::*interrupt-service-routines*) @@ -112,7 +131,8 @@ irq-stack page-fault-stack lapic-timer-active - page-fault-hook) + page-fault-hook + (apic-in-x2apic-mode nil)) (defconstant +ap-trampoline-physical-address+ #x7000 "Where the AP trampoline should be copied to in physical memory. @@ -130,8 +150,8 @@ The bootloader is loaded to #x7C00, so #x7000 should be safe.") (defconstant +tlb-shootdown-ipi-vector+ #x83 "Sent to CPUs to prepare them for TLB shootdown.") -(defconstant +magic-button-ipi-vector+ #x84 - "Sent to CPUs when the magic debug button is pressed.") +(defconstant +reschedule-ipi-vector+ #x85 + "Sent to a specific CPU to request rescheduling.") (defun set-idt-entry (cpu idt-index &key (offset 0) (segment #x0008) @@ -154,39 +174,94 @@ The bootloader is loaded to #x7C00, so #x7000 should be safe.") 0) (values)) +(defun lapic-reg-to-msr (register) + (+ +x2apic-msr-base+ register)) + +(defun read-lapic (register) + (if *lapic-x2apic-mode* + (ldb (byte 32 0) (sys.int::msr (lapic-reg-to-msr register))) + (physical-memref-unsigned-byte-32 (+ *lapic-address* (ash register 4))))) + +(defun write-lapic (value register) + (if *lapic-x2apic-mode* + (setf (sys.int::msr (lapic-reg-to-msr register)) + (logand value #xFFFFFFFF)) + (setf (physical-memref-unsigned-byte-32 (+ *lapic-address* (ash register 4))) value))) + +;; Mode-dispatching LAPIC accessors, retaining the original lapic-reg +;; names to avoid a wholesale rename of every call site. (defun lapic-reg (register) - (physical-memref-unsigned-byte-32 (+ *lapic-address* (ash register 4)))) + (read-lapic register)) (defun (setf lapic-reg) (value register) - (setf (physical-memref-unsigned-byte-32 (+ *lapic-address* (ash register 4))) value)) + (write-lapic value register)) (defun lapic-eoi () "Issue an EOI to the Local APIC." (setf (lapic-reg +lapic-reg-eoi+) 0)) -(defun send-ipi (target type vector) - (setf (lapic-reg +lapic-reg-interrupt-command-high+) (ash target 24)) - ;; Send: No shorthand, edge triggered, assert, physical dest. - (setf (lapic-reg +lapic-reg-interrupt-command-low+) (logior #x4000 - (ash type 8) - vector))) +(declaim (inline lapic-initialized-p lapic-x2apic-p)) +(defun lapic-initialized-p () + (and (boundp '*lapic-address*) *lapic-address*)) -(defun broadcast-ipi (type vector &optional including-self) - ;; BROADCAST-IPI can be called very early due to thread wakeups, before - ;; the lapic is mapped by INITIALIZE-CPU. - (when (and (boundp '*lapic-address*) - *lapic-address*) - ;; Disable interrupts to prevent cross-cpu migration from - ;; fouling up behaviour of INCLUDING-SELF. - (safe-without-interrupts (type vector including-self) +(defun lapic-x2apic-p () + (and (boundp '*lapic-x2apic-mode*) *lapic-x2apic-mode*)) + +(defun send-ipi (target type vector) + (if *lapic-x2apic-mode* + ;; x2APIC: single 64-bit WRMSR to MSR 0x830. + ;; Bits 14 (Level) and 15 (Trigger Mode) are reserved and must be + ;; zero in x2APIC mode (Intel SDM 10.12.9), so the #x4000 used by + ;; the xAPIC path below is deliberately not OR-ed in here. + (setf (sys.int::msr +x2apic-msr-icr+) + (logior (ash (logand target #xFFFFFFFF) 32) + (ash type 8) + vector)) + ;; xAPIC: two MMIO writes. + (progn + (setf (lapic-reg +lapic-reg-interrupt-command-high+) (ash target 24)) + (setf (lapic-reg +lapic-reg-interrupt-command-low+) (logior #x4000 + (ash type 8) + vector))))) + +(defun send-self-ipi (vector) + "Send an IPI to the local CPU. +In x2APIC mode uses the Self IPI MSR (0x83F), a single WRMSR. +In xAPIC mode falls back to a regular ICR write targeting self." + (check-type vector (unsigned-byte 8)) + (if *lapic-x2apic-mode* + (setf (sys.int::msr +x2apic-msr-self-ipi+) vector) + (send-ipi (read-local-apic-id) +ipi-type-fixed+ vector))) + +(defun send-ipi-to-cpu (cpu type vector) + (send-ipi (x86-64-cpu-apic-id cpu) type vector)) + +(defun send-ipi-to-all (type vector &key including-self) + (if (lapic-x2apic-p) + ;; x2APIC shorthand: the destination-shorthand field is ICR + ;; bits 19:18 - 01=self, 10=all-including-self, 11=all-excluding-self. + (let ((icr (logior (ash type 8) + vector))) + (setf (sys.int::msr +x2apic-msr-icr+) + (logior icr (ash (if including-self 2 3) 18)))) (dolist (cpu *cpus*) (when (and (eql (x86-64-cpu-state cpu) :online) (or including-self (not (eql cpu (local-cpu))))) - (send-ipi (x86-64-cpu-apic-id cpu) type vector)))))) + (send-ipi-to-cpu cpu type vector))))) + +(defun broadcast-ipi (type vector &optional including-self) + (when (lapic-initialized-p) + (if (lapic-x2apic-p) + (send-ipi-to-all type vector :including-self including-self) + (safe-without-interrupts (type vector including-self) + (send-ipi-to-all type vector :including-self including-self))))) (defun broadcast-wakeup-ipi () - (broadcast-ipi +ipi-type-fixed+ +wakeup-ipi-vector+)) + (when (lapic-initialized-p) + (dolist (cpu *cpus*) + (when (eql (x86-64-cpu-state cpu) :online) + (wake-cpu cpu))))) (defun wakeup-ipi-handler (interrupt-frame info) (declare (ignore info)) @@ -203,44 +278,50 @@ The bootloader is loaded to #x7C00, so #x7000 should be safe.") (sys.int::defglobal *non-quiescent-cpus-remaining*) -;; FIXME: quiesce-cpus-for-world-stop and begin-tlb-shootdown both need to -;; prevent migration across CPUs. (defun quiesce-cpus-for-world-stop () "Bring all CPUs to a consistent state to stop the world. Protected by the world stop lock." + ;; Prevent migration during the broadcast and busy-wait. + (setf (cpu-inhibit-scheduling (local-cpu)) + (1+ (cpu-inhibit-scheduling (local-cpu)))) (setf *non-quiescent-cpus-remaining* (1- *n-up-cpus*)) (broadcast-ipi +ipi-type-fixed+ +quiesce-ipi-vector+) (loop (when (eql *non-quiescent-cpus-remaining* 0) (return)) - (sys.int::cpu-relax))) + (sys.int::cpu-relax)) + (setf (cpu-inhibit-scheduling (local-cpu)) + (max 0 (1- (cpu-inhibit-scheduling (local-cpu)))))) ;; Save the current thread's state and switch to the CPU's idle thread. (defun quiesce-ipi-handler (interrupt-frame info) (declare (ignore info)) (lapic-eoi) - (let* ((current (current-thread)) - (idle (local-cpu-idle-thread)) - (was-active (not (eql current idle)))) - (when was-active - (acquire-global-thread-lock) - ;; Return this thread to the run queue. - (setf (thread-state current) :runnable) - (push-run-queue current) - (preemption-timer-reset nil) - ;; Save thread state. - (save-fpu-state current) - (save-interrupted-state current interrupt-frame) - ;; Partially switch to the idle thread. - (setf (thread-state idle) :active) - (setf (sys.int::msr +msr-ia32-gs-base+) (sys.int::lisp-object-address idle))) - ;; Have now reached a quiescent state. - (sys.int::%atomic-fixnum-add-symbol '*non-quiescent-cpus-remaining* - -1) - (when was-active - ;; Finally, return to the idle thread. - (%%switch-to-thread-common idle - idle)))) + (cond (*debug-magic-button-hold-variable* + ;; Magic button active: save state and spin. + (magic-button-ipi-handler-1 interrupt-frame)) + (t + ;; Normal quiesce: switch to idle thread. + (let* ((current (current-thread)) + (idle (local-cpu-idle-thread)) + (was-active (not (eql current idle)))) + (when was-active + (acquire-global-thread-lock) + ;; Return this thread to the run queue. + (setf (thread-state current) :runnable) + (push-run-queue current) + (preemption-timer-reset nil) + ;; Save thread state. + (save-fpu-state current) + (save-interrupted-state current interrupt-frame) + ;; Partially switch to the idle thread. + (setf (thread-state idle) :active) + (setf (sys.int::msr +msr-ia32-gs-base+) (sys.int::lisp-object-address idle))) + ;; Have now reached a quiescent state. + (sys.int::%atomic-fixnum-add-symbol '*non-quiescent-cpus-remaining* -1) + (when was-active + ;; Finally, return to the idle thread. + (%%switch-to-thread-common idle idle)))))) ;; TODO: This needs to be fixed up to prevent multiple CPUs hitting it at ;; once. It can't currently happen because it is only used from IRQ handlers @@ -251,7 +332,7 @@ Protected by the world stop lock." (defun stop-other-cpus-for-debug-magic-button () (setf *debug-magic-button-ready-variable* (1- *n-up-cpus*) *debug-magic-button-hold-variable* t) - (broadcast-ipi +ipi-type-fixed+ +magic-button-ipi-vector+) + (broadcast-ipi +ipi-type-fixed+ +quiesce-ipi-vector+) ;; Wait for other CPUs to arrive, this ensures the thread state is actually ;; consistent. (loop until (eql *debug-magic-button-ready-variable* 0))) @@ -263,10 +344,17 @@ Protected by the world stop lock." ;; the hold variable going to NIL. (loop until (eql *debug-magic-button-ready-variable* 0))) -(defun magic-button-ipi-handler (interrupt-frame info) +(defun reschedule-ipi-handler (interrupt-frame info) (declare (ignore info)) - (magic-button-ipi-handler-1 interrupt-frame) - (lapic-eoi)) + (lapic-eoi) + ;; The CPU will reschedule on IRET; no further action needed. + nil) + +(defun wake-cpu (cpu) + "Send a directed wakeup IPI to a specific CPU. +If the CPU is idle, this will cause it to check for new threads." + (when (cpu-idle-p cpu) + (send-ipi-to-cpu cpu +ipi-type-fixed+ +wakeup-ipi-vector+))) (defun magic-button-ipi-handler-1 (interrupt-frame) (when (not *debug-magic-button-hold-variable*) @@ -283,13 +371,24 @@ Protected by the world stop lock." '*debug-magic-button-ready-variable* -1)) (sys.int::defglobal *tlb-shootdown-in-progress* nil) +(sys.int::defglobal *tlb-shootdown-n-targets* 0) (sys.int::defglobal *busy-tlb-shootdown-cpus*) - -;; TODO: This unconditionally invalidates the entire TLB. -;; Should be more fine-grained. - -(defun check-tlb-shootdown-not-in-progress () - (ensure (not *tlb-shootdown-in-progress*) "TLB shootdown in progress!")) +(sys.int::defglobal *current-tlb-generation*) + +(defun check-tlb-generation-consistency () + "If this CPU missed a TLB shootdown while idle, flush now. +If a shootdown is still in progress, flush but do not stamp the +generation. the context-switch path will re-check after the shootdown +finishes and the final generation is known." + (when (and (boundp '*current-tlb-generation*) + (boundp '*tlb-shootdown-in-progress*) + (or *tlb-shootdown-in-progress* + (not (eql (cpu-tlb-generation (local-cpu)) + *current-tlb-generation*)))) + (flush-tlb) + (unless *tlb-shootdown-in-progress* + (setf (cpu-tlb-generation (local-cpu)) + *current-tlb-generation*)))) (defun begin-tlb-shootdown () "Bring all CPUs to state ready for TLB shootdown. @@ -297,50 +396,78 @@ TLB shootdown must be protected by the VM lock." (ensure (rw-lock-write-held-p *vm-lock*) "VM lock not held when doing TLB shootdown!") (ensure (not *tlb-shootdown-in-progress*) "TLB shootdown already in progress!") (setf *tlb-shootdown-in-progress* t) + ;; Bump the generation before sending IPIs so idle CPUs that wake + ;; during the shootdown window will see the mismatch and flush on + ;; their next context switch. Idle CPUs are skipped; if they + ;; handle a device IRQ mid-shootdown the IRQ handler calls + ;; check-tlb-generation-consistency to flush before touching + ;; pageable memory. + (sys.int::%atomic-fixnum-add-symbol '*current-tlb-generation* 1) + ;; Prevent migration during shootdown. + (setf (cpu-inhibit-scheduling (local-cpu)) + (1+ (cpu-inhibit-scheduling (local-cpu)))) + ;; Initialise *busy-tlb-shootdown-cpus* to (n_cpus - 1) BEFORE any + ;; IPI so the handler's decrement always has a bound value. Then + ;; send IPIs to non-idle CPUs and decrement the counter for idle + ;; CPUs we skip. (setf *busy-tlb-shootdown-cpus* (1- *n-up-cpus*)) - (broadcast-ipi +ipi-type-fixed+ +tlb-shootdown-ipi-vector+) - ;; Wait for other CPUs to reach the handler. + (setf *tlb-shootdown-n-targets* 0) + (dolist (cpu *cpus*) + (when (and (eql (x86-64-cpu-state cpu) :online) + (not (eql cpu (local-cpu)))) + (if (cpu-idle-p cpu) + (sys.int::%atomic-fixnum-add-symbol '*busy-tlb-shootdown-cpus* -1) + (progn + (incf *tlb-shootdown-n-targets*) + (send-ipi-to-cpu cpu +ipi-type-fixed+ + +tlb-shootdown-ipi-vector+))))) + ;; Wait for targeted CPUs to reach the handler. (loop (when (eql *busy-tlb-shootdown-cpus* 0) (return)) (sys.int::cpu-relax))) -(defun tlb-shootdown-single (address) - (declare (ignore address)) - (ensure *tlb-shootdown-in-progress*)) - -(defun tlb-shootdown-range (base length) - (declare (ignore base length)) - (ensure *tlb-shootdown-in-progress*)) - -(defun tlb-shootdown-all () - (ensure *tlb-shootdown-in-progress*)) - (defun finish-tlb-shootdown () (ensure *tlb-shootdown-in-progress*) - (setf *busy-tlb-shootdown-cpus* (1- *n-up-cpus*)) + ;; Only wait for CPUs that actually received the IPI in begin-tlb-shootdown. + (setf *busy-tlb-shootdown-cpus* *tlb-shootdown-n-targets*) (setf *tlb-shootdown-in-progress* nil) ;; Wait for CPUs to leave the handler. (loop (when (eql *busy-tlb-shootdown-cpus* 0) (return)) - (sys.int::cpu-relax))) + (sys.int::cpu-relax)) + ;; Safe to allow migration again. + (setf (cpu-inhibit-scheduling (local-cpu)) + (max 0 (1- (cpu-inhibit-scheduling (local-cpu)))))) (defun tlb-shootdown-ipi-handler (interrupt-frame info) (declare (ignore info)) (lapic-eoi) + ;; Increment this CPU's inhibit-scheduling counter to prevent migration. + (setf (cpu-inhibit-scheduling (local-cpu)) + (1+ (cpu-inhibit-scheduling (local-cpu)))) (sys.int::%atomic-fixnum-add-symbol '*busy-tlb-shootdown-cpus* -1) (loop (when (not *tlb-shootdown-in-progress*) (return)) - ;; FIXME: hack... maybe this should sit with interrupts enabled? (when *debug-magic-button-hold-variable* (magic-button-ipi-handler-1 interrupt-frame)) (sys.int::cpu-relax)) + ;; Flush unconditionally. Once this CPU received the shootdown IPI it + ;; must invalidate its TLB even if it happens to be idle at this + ;; instant: a CPU that went idle between the initiator's idle-p read + ;; and the IPI delivery would otherwise skip the flush while still + ;; stamping its TLB generation as current, leaving stale entries in + ;; place until the next context switch. (flush-tlb) + ;; Record that this CPU's TLB is up to date wrt the current generation. + (setf (cpu-tlb-generation (local-cpu)) *current-tlb-generation*) (sys.int::%atomic-fixnum-add-symbol '*busy-tlb-shootdown-cpus* - -1)) + -1) + (setf (cpu-inhibit-scheduling (local-cpu)) + (max 0 (1- (cpu-inhibit-scheduling (local-cpu)))))) (sys.int::define-lap-function local-cpu (()) "Return the address of the local CPU's info vector." @@ -683,6 +810,11 @@ TLB shootdown must be protected by the VM lock." (defun %%ap-entry-point () ;; CPU vector has been configured for us, just load the required bits. (load-cpu-bits (local-cpu)) + ;; Enable x2APIC on APs if the BSP enabled it. + (when *lapic-x2apic-mode* + (let ((apic-base (sys.int::msr +msr-ia32-apic-base+))) + (setf (sys.int::msr +msr-ia32-apic-base+) + (logior apic-base +msr-ia32-apic-base-x2apic-enable+)))) (lapic-setup) ;; Signal that this CPU has booted successfully. (let ((old (sys.int::cas (x86-64-cpu-state (local-cpu)) :offline :online))) @@ -761,10 +893,11 @@ TLB shootdown must be protected by the VM lock." (debug-print-line " id: " (lapic-reg +lapic-reg-id+)) (debug-print-line " version: " (lapic-reg +lapic-reg-version+)) (debug-print-line " tpr: " (lapic-reg +lapic-reg-task-priority+)) - (debug-print-line " arp: " (lapic-reg +lapic-reg-arbitration-priority+)) - (debug-print-line " ppr: " (lapic-reg +lapic-reg-processor-priority+)) - (debug-print-line " logical-destination: " (lapic-reg +lapic-reg-logical-destination+)) - (debug-print-line " desination-format: " (lapic-reg +lapic-reg-destination-format+)) + (unless *lapic-x2apic-mode* + (debug-print-line " arp: " (lapic-reg +lapic-reg-arbitration-priority+)) + (debug-print-line " ppr: " (lapic-reg +lapic-reg-processor-priority+)) + (debug-print-line " logical-destination: " (lapic-reg +lapic-reg-logical-destination+)) + (debug-print-line " desination-format: " (lapic-reg +lapic-reg-destination-format+))) (debug-print-line " svr: " (lapic-reg +lapic-reg-spurious-interrupt-vector+)) (debug-print-line " isr: " (lapic-reg +lapic-reg-in-service-0+) " " @@ -794,7 +927,9 @@ TLB shootdown must be protected by the VM lock." (lapic-reg (+ +lapic-reg-interrupt-request-0+ 6)) " " (lapic-reg (+ +lapic-reg-interrupt-request-0+ 7))) (debug-print-line " esr: " (lapic-reg +lapic-reg-error-status+)) - (debug-print-line " icr: " (lapic-reg +lapic-reg-interrupt-command-high+) ":" (lapic-reg +lapic-reg-interrupt-command-low+)) + (if *lapic-x2apic-mode* + (debug-print-line " icr: " (sys.int::msr (+ +x2apic-msr-base+ +lapic-reg-interrupt-command-low+))) + (debug-print-line " icr: " (lapic-reg +lapic-reg-interrupt-command-high+) ":" (lapic-reg +lapic-reg-interrupt-command-low+))) (debug-print-line " lvt-timer: " (lapic-reg +lapic-reg-lvt-timer+)) (debug-print-line " lvt-thermal-sensor: " (lapic-reg +lapic-reg-lvt-thermal-sensor+)) (debug-print-line " lvt-pmc: " (lapic-reg +lapic-reg-lvt-performance-monitoring-counters+)) @@ -861,45 +996,52 @@ This is a one-shot timer and must be reset after firing." (declare (ignore interrupt-frame info)) (panic "Got LAPIC error interrupt")) +(defun lapic-timer-measure-one-tick () + "Start LAPIC timer at max count at a PIT tick boundary, wait one tick. +Returns (values lapic-cycles-elapsed pit-tick-duration-in-internal-time-units)." + (let ((start (wait-for-next-pit-tick (get-internal-run-time)))) + (write-lapic #xFFFFFFFF +lapic-reg-timer-initial-count+) + (let ((end (wait-for-next-pit-tick start))) + (let ((remaining (read-lapic +lapic-reg-timer-current-count+))) + (write-lapic 0 +lapic-reg-timer-initial-count+) + (values (- #xFFFFFFFF remaining) (- end start)))))) + (defun lapic-timer-calibrate-1 () - (let ((initial-time (get-internal-run-time)) - (start-time nil) - (end-time nil) - (end-counter nil)) - ;; Wait for the start of this tick. - (loop - (setf start-time (get-internal-run-time)) - (when (not (eq start-time initial-time)) - (return))) - ;; Start timer with the maximum count value - (setf (lapic-reg +lapic-reg-timer-initial-count+) #xFFFFFFFF) - ;; Wait for next tick. - (loop - (setf end-time (get-internal-run-time)) - (when (not (eq end-time start-time)) - (return))) - ;; Read current count & stop timer. - (setf end-counter (lapic-reg +lapic-reg-timer-current-count+)) - (setf (lapic-reg +lapic-reg-timer-initial-count+) 0) - (let* ((cycles (- #xFFFFFFFF end-counter)) - (total-time (- end-time start-time)) - ;; Use single floats here. Rationals & double-floats require - ;; allocation and this is called too early for that. - (time (/ (float total-time) internal-time-units-per-second)) - (cycles-per-second (/ cycles time))) + "Calibrate the LAPIC timer using the PIT as a time reference." + (multiple-value-bind (cycles total-time) + (lapic-timer-measure-one-tick) + (let* ((time (/ (float total-time) internal-time-units-per-second)) + (cycles-per-second (/ (float cycles) time))) cycles-per-second))) ;; Assume LAPIC timers across CPUs tick at the same rate. ;; This is a fixnum, timer cycles per second. (sys.int::defglobal *lapic-timer-calibration*) +;; Timer ticks per second as computed by the calibration. +(sys.int::defglobal *lapic-timer-ticks-per-second*) + +(defun lapic-timer-calibrate-tsc () + "Calibrate the LAPIC timer using the TSC as a high-resolution time reference. +*CPU-SPEED* must already be calibrated before calling this." + (let ((tsc-start (sys.int::tsc))) + (multiple-value-bind (lapic-cycles) + (lapic-timer-measure-one-tick) + (let* ((tsc-delta (- (sys.int::tsc) tsc-start)) + (cpu-speed (float *cpu-speed*))) + (if (zerop tsc-delta) + 0 + (/ (* (float lapic-cycles) cpu-speed) + (float tsc-delta))))))) + ;; TODO: Be more clever when picking the divisor. ;; Should dynamically adjust so a goldilocks calibration value is returned. (defun lapic-timer-calibrate () - (let ((n (lapic-timer-calibrate-1))) - (dotimes (i 5) - (setf n (/ (+ n (lapic-timer-calibrate-1)) 2))) - (setf *lapic-timer-calibration* (truncate n)))) + (let ((n (calibrate-average (if (tsc-deadline-available-p) + #'lapic-timer-calibrate-tsc + #'lapic-timer-calibrate-1)))) + (setf *lapic-timer-calibration* (truncate n) + *lapic-timer-ticks-per-second* (truncate n)))) ;; These two functions are interrupt-safe, they must not use ;; floats, bignums or ratios when converting. @@ -913,6 +1055,37 @@ This is a one-shot timer and must be reset after firing." (truncate (* duration-internal-time-units *lapic-timer-calibration*) internal-time-units-per-second))) +;; Early-boot CPUID: saves/restores EBX instead of relying on pseudo-atomic. +;; Safe to call before the thread/lock infrastructure is fully initialized. +;; CPUID leaf 1 subleaf 0, return ECX only. No pseudo-atomic needed. +(sys.int::define-lap-function %cpuid-1-ecx-early () + (:gc :no-frame :layout #*0) + (sys.lap-x86:mov64 :rax :r8) + (sys.lap-x86:sar64 :rax #.sys.int::+n-fixnum-bits+) + (sys.lap-x86:mov64 :rcx :r9) + (sys.lap-x86:sar64 :rcx #.sys.int::+n-fixnum-bits+) + (sys.lap-x86:push :rbx) + (sys.lap-x86:cpuid) + (sys.lap-x86:mov64 :r8 :rcx) + (sys.lap-x86:pop :rbx) + (sys.lap-x86:lea64 :r8 ((:r8 #.(ash 1 sys.int::+n-fixnum-bits+)))) + (sys.lap-x86:mov32 :ecx #.(ash 1 sys.int::+n-fixnum-bits+)) + (sys.lap-x86:ret)) + +(defun x2apic-supported-p () + (logbitp +cpuid-feature-x2apic+ (%cpuid-1-ecx-early 1 0))) + +(defun x2apic-enabled-by-firmware-p () + (logbitp 10 (sys.int::msr +msr-ia32-apic-base+))) + +(defun tsc-deadline-available-p () + (logbitp +cpuid-feature-tsc-deadline+ (%cpuid-1-ecx-early 1 0))) + +(defun read-local-apic-id () + (if *lapic-x2apic-mode* + (sys.int::msr #x802) + (ldb (byte 8 24) (read-lapic +lapic-reg-id+)))) + (defun initialize-early-cpu () (setf *lapic-address* nil)) @@ -920,25 +1093,55 @@ This is a one-shot timer and must be reset after firing." (setf *lapic-address* (logand (sys.int::msr +msr-ia32-apic-base+) (lognot #xFFF))) (map-physical-memory-early *lapic-address* #x1000 "LAPIC") - (lapic-setup) - (lapic-dump) + ;; Detect and enable x2APIC mode if available. + (cond ((x2apic-supported-p) + (cond ((x2apic-enabled-by-firmware-p) + (setf *lapic-x2apic-mode* t) + (debug-print-line "x2APIC already enabled by firmware")) + (t + ;; Enable x2APIC: set bit 10 (x2APIC enable) and bit 11 (APIC enable) in IA32_APIC_BASE MSR. + (let ((apic-base (sys.int::msr +msr-ia32-apic-base+))) + (setf (sys.int::msr +msr-ia32-apic-base+) + (logior apic-base + +msr-ia32-apic-base-x2apic-enable+ + +msr-ia32-apic-base-enable+))) + (setf *lapic-x2apic-mode* t) + (debug-print-line "x2APIC enabled on BSP")))) + (t + (setf *lapic-x2apic-mode* nil) + (debug-print-line "x2APIC not supported, using xAPIC MMIO"))) + (setf (x86-64-cpu-apic-in-x2apic-mode *bsp-cpu*) *lapic-x2apic-mode*) + (lapic-setup) + (lapic-dump) (map-physical-memory-early +ap-trampoline-physical-address+ #x1000 "AP Bootstrap") (setf *initial-pml4* (generate-initial-pml4)) (copy-ap-trampoline #'%%ap-bootstrap '%%ap-entry-point +ap-trampoline-physical-address+ *initial-pml4*) (setf (x86-64-cpu-page-fault-hook *bsp-cpu*) nil) - (setf (x86-64-cpu-apic-id *bsp-cpu*) (ldb (byte 8 24) (lapic-reg +lapic-reg-id+))) + (setf (x86-64-cpu-apic-id *bsp-cpu*) (read-local-apic-id)) + (setf (cpu-cpu-index *bsp-cpu*) 0) + (setf (cpu-idle-p *bsp-cpu*) nil) + (setf (cpu-inhibit-scheduling *bsp-cpu*) 0) + (setf (cpu-tlb-generation *bsp-cpu*) 0) + (setf *current-tlb-generation* 0) + (setf (cpu-timer-active *bsp-cpu*) nil) + ;; Allocate MCS node for BSP if not already allocated by cold-generator. + (when (null (cpu-mcs-node *bsp-cpu*)) + (setf (cpu-mcs-node *bsp-cpu*) (%make-mcs-node))) (debug-print-line "BSP has LAPIC ID " (x86-64-cpu-apic-id *bsp-cpu*)) (setf *cpus* '()) (push-wired *bsp-cpu* *cpus*) (setf *n-up-cpus* 1) - (hook-user-interrupt +lapic-vector-svr+ 'lapic-svr-handler) - (hook-user-interrupt +lapic-vector-err+ 'lapic-error-handler) - (hook-user-interrupt +lapic-vector-timer+ 'lapic-timer-handler) - (hook-user-interrupt +wakeup-ipi-vector+ 'wakeup-ipi-handler) - (hook-user-interrupt +panic-ipi-vector+ 'panic-ipi-handler) - (hook-user-interrupt +quiesce-ipi-vector+ 'quiesce-ipi-handler) - (hook-user-interrupt +tlb-shootdown-ipi-vector+ 'tlb-shootdown-ipi-handler) - (hook-user-interrupt +magic-button-ipi-vector+ 'magic-button-ipi-handler)) + (register-ipi-handler +lapic-vector-svr+ 'lapic-svr-handler) + (register-ipi-handler +lapic-vector-err+ 'lapic-error-handler) + (register-ipi-handler +lapic-vector-timer+ 'lapic-timer-handler) + (register-ipi-handler +wakeup-ipi-vector+ 'wakeup-ipi-handler) + (register-ipi-handler +panic-ipi-vector+ 'panic-ipi-handler) + (register-ipi-handler +quiesce-ipi-vector+ 'quiesce-ipi-handler) + (register-ipi-handler +tlb-shootdown-ipi-vector+ 'tlb-shootdown-ipi-handler) + (register-ipi-handler +reschedule-ipi-vector+ 'reschedule-ipi-handler)) + +(defun register-ipi-handler (vector handler) + (hook-user-interrupt vector handler)) (defun load-cpu-bits (cpu) (let* ((addr (- (sys.int::lisp-object-address cpu) @@ -994,7 +1197,15 @@ This is a one-shot timer and must be reset after firing." :wired-stack wired-stack :exception-stack exception-stack :irq-stack irq-stack - :page-fault-stack page-fault-stack))) + :page-fault-stack page-fault-stack + :cpu-index (length *cpus*) + :idle-p nil + :inhibit-scheduling 0 + :tlb-generation 0 + :timer-active nil + :apic-in-x2apic-mode *lapic-x2apic-mode*))) + ;; Allocate per-CPU MCS node for spinlocks. + (setf (cpu-mcs-node cpu) (%make-mcs-node)) (populate-cpu-info cpu (+ (stack-base wired-stack) (stack-size wired-stack)) (+ (stack-base exception-stack) (stack-size exception-stack)) @@ -1039,11 +1250,16 @@ This is a one-shot timer and must be reset after firing." (dotimes (i (sys.int::simple-vector-length (acpi-madt-table-controllers madt))) (let ((entry (svref (acpi-madt-table-controllers madt) i))) - (when (and (acpi-madt-processor-lapic-p entry) - (logbitp +acpi-madt-processor-lapic-flag-enabled+ - (acpi-madt-processor-lapic-flags entry)) - (not (eql (acpi-madt-processor-lapic-apic-id entry) bsp-apic-id))) - (register-secondary-cpu (acpi-madt-processor-lapic-apic-id entry)))))))) + (cond ((acpi-madt-processor-lapic-p entry) + (when (and (logbitp +acpi-madt-processor-lapic-flag-enabled+ + (acpi-madt-processor-lapic-flags entry)) + (not (eql (acpi-madt-processor-lapic-apic-id entry) bsp-apic-id))) + (register-secondary-cpu (acpi-madt-processor-lapic-apic-id entry)))) + ((acpi-madt-processor-x2apic-p entry) + (when (and (logbitp +acpi-madt-processor-lapic-flag-enabled+ + (acpi-madt-processor-x2apic-flags entry)) + (not (eql (acpi-madt-processor-x2apic-x2apic-id entry) bsp-apic-id))) + (register-secondary-cpu (acpi-madt-processor-x2apic-x2apic-id entry)))))))))) (defun boot-secondary-cpus () (detect-secondary-cpus) diff --git a/supervisor/x86-64/interrupts.lisp b/supervisor/x86-64/interrupts.lisp index 52186d970..ffc52af58 100644 --- a/supervisor/x86-64/interrupts.lisp +++ b/supervisor/x86-64/interrupts.lisp @@ -272,6 +272,9 @@ If clear, the fault occured in supervisor mode.") (debug-print-line "Spurious i8259 IRQ " irq ". Further spurious IRQs will not be reported.")) (incf *i8259-spurious-interrupt-count*) (return-from i8259-interrupt-handler))) + ;; If this CPU was idle during a TLB shootdown and missed the IPI, + ;; flush now before the IRQ handler touches any pageable memory. + (check-tlb-generation-consistency) (irq-deliver interrupt-frame (svref *i8259-irqs* irq)) (with-symbol-spinlock (*i8259-spinlock*) ;; Send EOI. @@ -303,7 +306,6 @@ If clear, the fault occured in supervisor mode.") (setf (sys.int::io-port/8 #xA1) (ldb (byte 8 8) *i8259-shadow-mask*))))))) (defun initialize-i8259 () - ;; TODO: do the APIC & IO-APIC as well. (when (not (boundp '*i8259-irqs*)) (setf *i8259-irqs* (sys.int::make-simple-vector 16 :wired) ;; fixme: do at cold-gen time. @@ -332,25 +334,41 @@ If clear, the fault occured in supervisor mode.") ;; Unmask the cascade IRQ, required for the 2nd chip to function. (i8259-unmask-irq 2)) +(declaim (inline io-apic-active-p)) +(defun io-apic-active-p () + (and (boundp '*io-apic-active-p*) *io-apic-active-p*)) + +(defun platform-irq-vector () + (if (io-apic-active-p) *io-apic-irqs* *i8259-irqs*)) + (defun platform-irq (number) - (cond ((<= 0 number 15) - (svref *i8259-irqs* number)) - (t nil))) + (if (io-apic-active-p) + (when (<= 0 number 255) + (let ((gsi (if (< number 16) + (sys.int::%object-ref-t *isa-irq-to-gsi* number) + number))) + (sys.int::%object-ref-t *io-apic-irqs* gsi))) + (when (<= 0 number 15) + (svref *i8259-irqs* number)))) (defun all-platform-irqs () - (loop - for i below (sys.int::%object-header-data *i8259-irqs*) - for irq = (svref *i8259-irqs* i) - collect irq)) + (loop with vector = (platform-irq-vector) + for i below (sys.int::%object-header-data vector) + for irq = (svref vector i) + when irq collect irq)) (defun map-platform-irqs (fn) - (loop - for i below (sys.int::%object-header-data *i8259-irqs*) - for irq = (svref *i8259-irqs* i) - do (funcall fn irq))) + (loop with vector = (platform-irq-vector) + for i below (sys.int::%object-header-data vector) + for irq = (svref vector i) + when irq do (funcall fn irq))) (defun platform-mask-irq (vector) - (i8259-mask-irq vector)) + (if (io-apic-active-p) + (io-apic-mask-irq vector) + (i8259-mask-irq vector))) (defun platform-unmask-irq (vector) - (i8259-unmask-irq vector)) + (if (io-apic-active-p) + (io-apic-unmask-irq vector) + (i8259-unmask-irq vector))) diff --git a/supervisor/x86-64/io-apic.lisp b/supervisor/x86-64/io-apic.lisp new file mode 100644 index 000000000..5341ffa62 --- /dev/null +++ b/supervisor/x86-64/io-apic.lisp @@ -0,0 +1,249 @@ +(in-package :mezzano.supervisor) + +;; IO-APIC MMIO registers. +(defconstant +io-apic-reg-index+ 0) +(defconstant +io-apic-reg-data+ #x10) + +;; IO-APIC register offsets. +(defconstant +io-apic-id+ #x00) +(defconstant +io-apic-version+ #x01) +(defconstant +io-apic-redirection+ #x10) + +;; Redirection entry bit fields. +(defconstant +io-apic-entry-mask+ #x00010000) +(defconstant +io-apic-entry-trigger-mode+ #x00008000) +(defconstant +io-apic-entry-polarity+ #x00002000) +(defconstant +io-apic-entry-destination-mode+ #x00000800) +(defconstant +io-apic-entry-delIVery-mode+ #x00000700) +(defconstant +io-apic-entry-vector+ #x000000FF) + +(defconstant +io-apic-base-vector+ 48) + +;; Device interrupt vectors are allocated above +IO-APIC-BASE-VECTOR+. +;; The IPI vectors (#x80 wakeup, #x81 panic, #x82 quiesce, #x83 tlb-shootdown, +;; #x85 reschedule, ...) live in the gap below. To keep the GSI->vector map +;; bijective (so the handler can recover the GSI from the vector) the range +;; #x80..#x8F is skipped: GSIs whose naive vector would land there are shifted +;; up past it. +(defconstant +io-apic-ipi-gap-start+ #x80) +(defconstant +io-apic-ipi-gap-end+ #x90) + +(declaim (inline gsi->vector vector->gsi)) +(defun gsi->vector (gsi) + (let ((v (+ +io-apic-base-vector+ gsi))) + (if (>= v +io-apic-ipi-gap-start+) + (+ v (- +io-apic-ipi-gap-end+ +io-apic-ipi-gap-start+)) + v))) + +(defun vector->gsi (vector) + (cond ((>= vector +io-apic-ipi-gap-end+) + (- vector +io-apic-base-vector+ + (- +io-apic-ipi-gap-end+ +io-apic-ipi-gap-start+))) + ((< vector +io-apic-ipi-gap-start+) + (- vector +io-apic-base-vector+)) + (t + ;; Vector fell in the reserved IPI gap; no valid GSI. + -1))) + +(sys.int::defglobal *io-apics* nil) +(sys.int::defglobal *io-apic-active-p* nil) +(sys.int::defglobal *io-apic-irqs* nil) +(sys.int::defglobal *isa-irq-to-gsi* nil) +(sys.int::defglobal *gsi-flags* nil) + +(defstruct (io-apic + (:area :wired)) + (id 0 :type (unsigned-byte 8)) + (gsi-base 0 :type (unsigned-byte 32)) + (address 0 :type (unsigned-byte 64)) + (mmio-base 0) + (max-redirection 0 :type (unsigned-byte 8))) + +(defun io-apic-read (apic offset) + (setf (physical-memref-unsigned-byte-32 + (+ (io-apic-mmio-base apic) +io-apic-reg-index+)) + offset) + (physical-memref-unsigned-byte-32 + (+ (io-apic-mmio-base apic) +io-apic-reg-data+))) + +(defun io-apic-write (apic offset value) + (setf (physical-memref-unsigned-byte-32 + (+ (io-apic-mmio-base apic) +io-apic-reg-index+)) + offset) + (setf (physical-memref-unsigned-byte-32 + (+ (io-apic-mmio-base apic) +io-apic-reg-data+)) + value)) + +(defun io-apic-read-redirection (apic entry) + (let ((low (io-apic-read apic (+ +io-apic-redirection+ (* entry 2)))) + (high (io-apic-read apic (+ +io-apic-redirection+ (* entry 2) 1)))) + (logior low (ash high 32)))) + +(defun io-apic-write-redirection (apic entry value) + (io-apic-write apic (+ +io-apic-redirection+ (* entry 2)) + (ldb (byte 32 0) value)) + (io-apic-write apic (+ +io-apic-redirection+ (* entry 2) 1) + (ldb (byte 32 32) value))) + +(defun io-apic-mask-irq (gsi) + (dolist (apic *io-apics*) + (let ((entry (- gsi (io-apic-gsi-base apic)))) + (when (and (<= 0 entry) (<= entry (io-apic-max-redirection apic))) + (io-apic-write-redirection apic entry + (logior (io-apic-read-redirection apic entry) + +io-apic-entry-mask+)) + (return t))))) + +(defun io-apic-unmask-irq (gsi) + (dolist (apic *io-apics*) + (let ((entry (- gsi (io-apic-gsi-base apic)))) + (when (and (<= 0 entry) (<= entry (io-apic-max-redirection apic))) + (io-apic-write-redirection apic entry + (logand (io-apic-read-redirection apic entry) + (lognot +io-apic-entry-mask+))) + (return t))))) + +(defun io-apic-find-entry (gsi) + (dolist (apic *io-apics*) + (let ((entry (- gsi (io-apic-gsi-base apic)))) + (when (and (<= 0 entry) (<= entry (io-apic-max-redirection apic))) + (return (values apic entry)))))) + +(defun io-apic-irq-spurious-p (gsi) + (declare (ignore gsi)) + nil) + +(defun gsi-flags-polarity (flags) + (if (eql (ldb (byte 2 0) flags) 3) :low :high)) + +(defun gsi-flags-trigger (flags) + (if (eql (ldb (byte 2 2) flags) 3) :level :edge)) + +(defun process-isa-overrides (controllers n-controllers) + (let ((mapping (sys.int::make-simple-vector 16 :wired))) + (dotimes (i 16) (setf (svref mapping i) i)) + (dotimes (i n-controllers) + (let ((entry (svref controllers i))) + (when (acpi-madt-interrupt-source-override-p entry) + (let ((bus (acpi-madt-interrupt-source-override-bus entry)) + (source (acpi-madt-interrupt-source-override-source entry)) + (gsi (acpi-madt-interrupt-source-override-global-system-interrupt entry)) + (flags (acpi-madt-interrupt-source-override-flags entry))) + (when (and (eql bus 0) (< source 16)) + (setf (svref mapping source) gsi) + (when (< gsi 256) + (setf (svref *gsi-flags* gsi) flags)) + (debug-print-line "MADT override: ISA IRQ " source " -> GSI " gsi)))))) + (setf *isa-irq-to-gsi* mapping))) + +(defun init-one-io-apic (entry bsp-apic-id) + (let* ((phys-addr (acpi-madt-ioapic-address entry)) + (id (acpi-madt-ioapic-id entry)) + (gsi-base (acpi-madt-ioapic-global-system-interrupt-base entry))) + (map-physical-memory-early (align-down phys-addr +4k-page-size+) + +4k-page-size+ "IO-APIC") + (let* ((apic (make-io-apic :id id :gsi-base gsi-base + :address phys-addr :mmio-base phys-addr)) + (max-redir (ldb (byte 8 16) (io-apic-read apic +io-apic-version+))) + (n-entries (1+ max-redir))) + (setf (io-apic-max-redirection apic) max-redir) + (push-wired apic *io-apics*) + (dotimes (e n-entries) + (io-apic-write-redirection apic e +io-apic-entry-mask+) + (let* ((gsi (+ gsi-base e)) + (vector (gsi->vector gsi)) + (flags (if (< gsi 256) (svref *gsi-flags* gsi) 0))) + (when (< vector 256) + (io-apic-configure-entry gsi vector bsp-apic-id + :trigger-mode (gsi-flags-trigger flags) + :polarity (gsi-flags-polarity flags) + :masked t)))) + (debug-print-line "IO-APIC " id " at " phys-addr + " GSI base " gsi-base " max redirect " max-redir)))) + +(defun init-io-apic-controllers (controllers n-controllers) + (setf *io-apic-irqs* (sys.int::make-simple-vector 256 :wired)) + (dotimes (i 256) + (setf (svref *io-apic-irqs* i) (make-irq :platform-number i))) + (setf *io-apics* '()) + (let ((bsp-id (if (boundp '*bsp-cpu*) (x86-64-cpu-apic-id *bsp-cpu*) 0))) + (dotimes (i n-controllers) + (let ((entry (svref controllers i))) + (when (acpi-madt-ioapic-p entry) + (init-one-io-apic entry bsp-id)))))) + +(defun hook-io-apic-interrupt-handlers () + (debug-print-line "IO-APIC init done. Hooking handlers...") + (let ((max-gsi 0)) + (dolist (apic *io-apics*) + (setf max-gsi (max max-gsi + (+ (io-apic-gsi-base apic) + (io-apic-max-redirection apic))))) + (dotimes (gsi (1+ max-gsi)) + (let ((vector (gsi->vector gsi))) + (when (< vector 256) + (hook-user-interrupt vector 'io-apic-interrupt-handler)))))) + +(defun finalize-io-apic-init () + (setf *io-apic-active-p* t) + (when (boundp '*i8259-shadow-mask*) + (setf (sys.int::io-port/8 #x21) #xFF + (sys.int::io-port/8 #xA1) #xFF + *i8259-shadow-mask* #xFFFF)) + (write-lapic (logior (read-lapic +lapic-reg-lvt-lint0+) +lapic-lvt-mask+) + +lapic-reg-lvt-lint0+) + (write-lapic (logior (read-lapic +lapic-reg-lvt-lint1+) +lapic-lvt-mask+) + +lapic-reg-lvt-lint1+)) + +(defun initialize-io-apics () + (setf *io-apic-active-p* nil) + (unless (boundp '*io-apics*) (setf *io-apics* nil)) + (when *io-apics* (return-from initialize-io-apics)) + (setf *io-apic-irqs* nil *isa-irq-to-gsi* nil *gsi-flags* nil) + (let ((madt (acpi-get-table 'acpi-madt-table-p))) + (unless madt + (debug-print-line "No MADT table, IO-APIC not available.") + (return-from initialize-io-apics)) + (setf *gsi-flags* (sys.int::make-simple-vector 256 :wired)) + (let* ((controllers (acpi-madt-table-controllers madt)) + (n (sys.int::simple-vector-length controllers))) + (process-isa-overrides controllers n) + (init-io-apic-controllers controllers n) + (when *io-apics* + (hook-io-apic-interrupt-handlers) + (finalize-io-apic-init))))) + +(defun io-apic-interrupt-handler (interrupt-frame info) + ;; If this CPU was idle during a TLB shootdown and missed the IPI, + ;; flush now before the IRQ handler touches any pageable memory. + (check-tlb-generation-consistency) + (let ((gsi (vector->gsi info))) + (when (and (<= 0 gsi) (< gsi 256)) + (irq-deliver interrupt-frame (svref *io-apic-irqs* gsi))) + (lapic-eoi))) + +(defun io-apic-configure-entry (gsi vector destination-apic-id + &key (trigger-mode :edge) (polarity :high) (masked t)) + (multiple-value-bind (apic entry) (io-apic-find-entry gsi) + (unless apic + (debug-print-line "No IO-APIC for GSI " gsi) + (return-from io-apic-configure-entry nil)) + (let ((entry-value (logior vector + (if masked +io-apic-entry-mask+ 0) + (ecase trigger-mode + (:edge 0) + (:level +io-apic-entry-trigger-mode+)) + (ecase polarity + (:high 0) + (:low +io-apic-entry-polarity+))))) + ;; The IO-APIC redirection-entry destination field is always bits + ;; 63:56 regardless of whether the local APIC is in xAPIC or x2APIC + ;; mode: the IO-APIC is a separate device that emits an MSI write + ;; whose 8-bit physical destination is matched against the low 8 + ;; bits of each CPU's (x2)APIC ID. Encoding the destination at bits + ;; 39:32 (as an earlier version did in x2APIC mode) routes every + ;; external interrupt to APIC ID 0. + (setf entry-value (logior entry-value + (ash (ldb (byte 8 0) destination-apic-id) 56))) + (io-apic-write-redirection apic entry entry-value) + t))) diff --git a/supervisor/x86-64/platform.lisp b/supervisor/x86-64/platform.lisp index 80fbc9df1..db4785903 100644 --- a/supervisor/x86-64/platform.lisp +++ b/supervisor/x86-64/platform.lisp @@ -21,6 +21,9 @@ (t (debug-print-line "No ACPI FADT table detected."))) (initialize-cpu) + (unless (boundp '*io-apic-active-p*) + (setf *io-apic-active-p* nil)) + (initialize-io-apics) (initialize-platform-time) (mezzano.supervisor.intel-8042:probe) (initialize-pci) diff --git a/supervisor/x86-64/snapshot.lisp b/supervisor/x86-64/snapshot.lisp index 566fd47d1..161a6ffaf 100644 --- a/supervisor/x86-64/snapshot.lisp +++ b/supervisor/x86-64/snapshot.lisp @@ -44,6 +44,5 @@ (loop for i from 65 below 256 ; stack area to end of persistent memory. do (mark-pml4e-cow i)))) (begin-tlb-shootdown) - (flush-tlb) (tlb-shootdown-all) (finish-tlb-shootdown)) diff --git a/supervisor/x86-64/thread.lisp b/supervisor/x86-64/thread.lisp index 50dfff908..75c7bb9f9 100644 --- a/supervisor/x86-64/thread.lisp +++ b/supervisor/x86-64/thread.lisp @@ -42,14 +42,8 @@ (setf (sys.int::msr +msr-ia32-gs-base+) (sys.int::lisp-object-address thread))) (sys.int::define-lap-function %%restore-full-save-thread ((thread)) - ;; Drop the global thread lock. - ;; This must be done here, not in %%switch-to-thread-common to prevent - ;; another CPU from switching on to the old thread's stack while it is - ;; still in use. - (sys.lap-x86:mov64 :r9 (:symbol-global-cell *global-thread-lock*)) - (sys.lap-x86:mov64 :r10 (:constant :unlocked)) - (sys.lap-x86:mov64 (:object :r9 #.sys.int::+symbol-value-cell-value+) :r10) - ;; Returning to an interrupted thread. Restore saved registers and stuff. + ;; The global thread lock is released by Lisp code before calling + ;; this function. Returning to an interrupted thread. (sys.lap-x86:lea64 :rsp (:object :r8 #.+thread-interrupt-save-area+)) (sys.lap-x86:pop :r15) (sys.lap-x86:pop :r14) @@ -69,13 +63,8 @@ (sys.lap-x86:iret)) (sys.int::define-lap-function %%restore-partial-save-thread ((thread)) - ;; Drop the global thread lock. - ;; This must be done here, not in %%switch-to-thread-common to prevent - ;; another CPU from switching on to the old thread's stack while it is - ;; still in use. - (sys.lap-x86:mov64 :r9 (:symbol-global-cell *global-thread-lock*)) - (sys.lap-x86:mov64 :r10 (:constant :unlocked)) - (sys.lap-x86:mov64 (:object :r9 #.sys.int::+symbol-value-cell-value+) :r10) + ;; The global thread lock is released by Lisp code before calling + ;; this function. ;; Restore stack pointer. (sys.lap-x86:mov64 :rsp (:object :r8 #.+thread-state-rsp+)) ;; Restore frame pointer. diff --git a/supervisor/x86-64/time.lisp b/supervisor/x86-64/time.lisp index 8c6ba9ab0..edcfffc72 100644 --- a/supervisor/x86-64/time.lisp +++ b/supervisor/x86-64/time.lisp @@ -22,34 +22,33 @@ (setf (sys.int::io-port/8 #x40) (ldb (byte 8 0) divisor) (sys.int::io-port/8 #x40) (ldb (byte 8 8) divisor)))) +(defun wait-for-next-pit-tick (current-time) + (loop + (let ((new (get-internal-run-time))) + (when (not (eql new current-time)) + (return new))))) + +(defun calibrate-average (fn) + (let ((n (funcall fn))) + (dotimes (i 5) + (setf n (/ (+ n (funcall fn)) 2))) + n)) + (defun calibrate-tsc-1 () "Return the number of cycles per second, approximately." - (let ((initial-time (get-internal-run-time)) - (start-time nil) - (end-time nil) + (let ((start-time (wait-for-next-pit-tick (get-internal-run-time))) (start-cycle nil) (end-cycle nil)) - ;; Wait for the start of this tick. - (loop - (setf start-time (get-internal-run-time)) - (when (not (eq start-time initial-time)) - (return))) (setf start-cycle (sys.int::tsc)) - (loop - (setf end-time (get-internal-run-time)) - (when (not (eq end-time start-time)) - (return))) - (setf end-cycle (sys.int::tsc)) - (let* ((cycles (- end-cycle start-cycle)) - (time (/ (float (- end-time start-time)) internal-time-units-per-second)) - (cycles-per-second (/ cycles time))) - cycles-per-second))) + (let ((end-time (wait-for-next-pit-tick start-time))) + (setf end-cycle (sys.int::tsc)) + (let* ((cycles (- end-cycle start-cycle)) + (time (/ (float (- end-time start-time)) internal-time-units-per-second)) + (cycles-per-second (/ cycles time))) + cycles-per-second)))) (defun calibrate-tsc () - (let ((n (calibrate-tsc-1))) - (dotimes (i 5) - (setf n (/ (+ n (calibrate-tsc-1)) 2))) - (setf *cpu-speed* (floor n)))) + (setf *cpu-speed* (floor (calibrate-average #'calibrate-tsc-1)))) (defun high-precision-time-units-to-internal-time-units (tsc-time) (if (boundp '*cpu-speed*) @@ -72,7 +71,7 @@ HIGH-PRECISION-TIME-UNITS-TO-INTERNAL-TIME-UNITS." (defun initialize-platform-time () (when (not (boundp '*rtc-lock*)) - (setf *rtc-lock* (place-spinlock-initializer))) + (setf *rtc-lock* :unlocked)) (configure-pit-tick-rate 100) (irq-attach (platform-irq +pit-irq+) 'pit-irq-handler diff --git a/tools/cold-generator2/arm64.lisp b/tools/cold-generator2/arm64.lisp index f236c9809..41c254595 100644 --- a/tools/cold-generator2/arm64.lisp +++ b/tools/cold-generator2/arm64.lisp @@ -32,8 +32,10 @@ :name (env:translate-symbol environment 'sys.int::%%funcallable-instance-trampoline%%))) (setf (env:cross-symbol-value environment 'mezzano.supervisor::*bsp-wired-stack*) (env:make-stack environment (* 128 1024))) - (setf (env:cross-symbol-value environment 'mezzano.supervisor::*bsp-cpu*) - (env:make-structure environment 'mezzano.supervisor::arm64-cpu)) + (let ((bsp-cpu (env:make-structure environment 'mezzano.supervisor::arm64-cpu))) + (setf (env:structure-slot-value environment bsp-cpu 'mezzano.supervisor::mcs-node) + (env:make-structure environment 'mezzano.supervisor::mcs-node)) + (setf (env:cross-symbol-value environment 'mezzano.supervisor::*bsp-cpu*) bsp-cpu)) (setf (env:cross-symbol-value environment 'mezzano.supervisor::*arm64-exception-vector*) (env:compile-lap environment (loop repeat (/ (+ 2048 +exception-vector-alignment+) 8) ; for alignment diff --git a/tools/cold-generator2/cold-generator.lisp b/tools/cold-generator2/cold-generator.lisp index 623df3f65..3e41f3d1c 100644 --- a/tools/cold-generator2/cold-generator.lisp +++ b/tools/cold-generator2/cold-generator.lisp @@ -29,6 +29,7 @@ ("supervisor/arm64/psci.lisp" :arm64) "supervisor/interrupts.lisp" ("supervisor/x86-64/interrupts.lisp" :x86-64) + ("supervisor/x86-64/io-apic.lisp" :x86-64) ("supervisor/arm64/interrupts.lisp" :arm64) ("supervisor/arm64/gic.lisp" :arm64) "supervisor/debug.lisp" diff --git a/tools/cold-generator2/x86-64.lisp b/tools/cold-generator2/x86-64.lisp index 60cfcf576..edd9ef764 100644 --- a/tools/cold-generator2/x86-64.lisp +++ b/tools/cold-generator2/x86-64.lisp @@ -228,8 +228,10 @@ (env:make-stack environment (* 128 1024))) (setf (env:cross-symbol-value environment 'sys.int::*bsp-wired-stack*) (env:make-stack environment (* 128 1024))) - (setf (env:cross-symbol-value environment 'mezzano.supervisor::*bsp-cpu*) - (env:make-structure environment 'mezzano.supervisor::x86-64-cpu))) + (let ((bsp-cpu (env:make-structure environment 'mezzano.supervisor::x86-64-cpu))) + (setf (env:structure-slot-value environment bsp-cpu 'mezzano.supervisor::mcs-node) + (env:make-structure environment 'mezzano.supervisor::mcs-node)) + (setf (env:cross-symbol-value environment 'mezzano.supervisor::*bsp-cpu*) bsp-cpu))) (defmethod post-serialize-image-for-target (image environment (target (eql :x86-64))) nil)