diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index e6b8da20..dbce4dae 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -2947,6 +2947,25 @@ extension ContentView { private func beginDictationRecording(for slot: SettingsStore.DictationShortcutSlot, mode: ActiveRecordingMode) { DebugLogger.shared.debug("Begin dictation recording for slot \(slot.rawValue)", source: "ContentView") + + // Fire the duck first, then the start sound — at the very top of this + // function so they happen alongside the hotkey press, not 80ms later + // behind captureRecordingContext + setActiveRecordingMode + + // setOverlayMode. Duck-before-sound matters because CoreAudio's sound + // playback latency (~30-50ms) is shorter than the time it takes the + // fade to drop the music to a perceptibly quieter level; if the + // sound went first, you'd hear it before the music dipped. + let willStart = !self.asr.isRunning + var preAppliedMediaAction: MediaSessionAction = .none + if willStart, SettingsStore.shared.mediaBehaviorDuringTranscription == .duck { + if let prev = MediaPlaybackService.shared.duckSystemVolume() { + preAppliedMediaAction = .ducked(previousVolume: prev) + } + } + if willStart, SettingsStore.shared.enableTranscriptionSounds { + TranscriptionSoundPlayer.shared.playStartSound() + } + self.captureRecordingContext() self.applyDictationShortcutSelectionContext(for: slot) self.setActiveRecordingMode(mode) @@ -2954,11 +2973,8 @@ extension ContentView { self.menuBarManager.setOverlayMode(.dictation) guard !self.asr.isRunning else { return } - if SettingsStore.shared.enableTranscriptionSounds { - TranscriptionSoundPlayer.shared.playStartSound() - } Task { - await self.asr.start() + await self.asr.start(preAppliedMediaAction: preAppliedMediaAction) } } diff --git a/Sources/Fluid/Persistence/BackupService.swift b/Sources/Fluid/Persistence/BackupService.swift index deab0202..dbb10f67 100644 --- a/Sources/Fluid/Persistence/BackupService.swift +++ b/Sources/Fluid/Persistence/BackupService.swift @@ -61,6 +61,11 @@ struct SettingsBackupPayload: Codable, Equatable { let removeFillerWordsEnabled: Bool let gaavModeEnabled: Bool let pauseMediaDuringTranscription: Bool + /// Lossless capture of the unified media-behaviour enum (none / pause / + /// duck). Optional so that backups created by older builds (which only + /// wrote the legacy bool) still decode cleanly. New builds prefer this + /// field on restore and fall back to the bool only when it's nil. + let mediaBehaviorDuringTranscription: SettingsStore.MediaBehaviorDuringTranscription? let vocabularyBoostingEnabled: Bool let customDictionaryEntries: [SettingsStore.CustomDictionaryEntry] let selectedDictationPromptID: String? diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 35410389..e3e2575f 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2261,6 +2261,7 @@ final class SettingsStore: ObservableObject { removeFillerWordsEnabled: self.removeFillerWordsEnabled, gaavModeEnabled: self.gaavModeEnabled, pauseMediaDuringTranscription: self.pauseMediaDuringTranscription, + mediaBehaviorDuringTranscription: self.mediaBehaviorDuringTranscription, vocabularyBoostingEnabled: self.vocabularyBoostingEnabled, customDictionaryEntries: self.customDictionaryEntries, selectedDictationPromptID: self.selectedDictationPromptID, @@ -2332,7 +2333,15 @@ final class SettingsStore: ObservableObject { self.fillerWords = payload.fillerWords self.removeFillerWordsEnabled = payload.removeFillerWordsEnabled self.gaavModeEnabled = payload.gaavModeEnabled - self.pauseMediaDuringTranscription = payload.pauseMediaDuringTranscription + // Prefer the lossless enum if the backup carried it (new builds); + // fall back to the legacy bool for backups from older versions. + // Either way the assignment is deterministic — current state on the + // restoring machine never decides the outcome. + if let mode = payload.mediaBehaviorDuringTranscription { + self.mediaBehaviorDuringTranscription = mode + } else { + self.mediaBehaviorDuringTranscription = payload.pauseMediaDuringTranscription ? .pause : .none + } self.vocabularyBoostingEnabled = payload.vocabularyBoostingEnabled self.customDictionaryEntries = payload.customDictionaryEntries @@ -2899,13 +2908,72 @@ final class SettingsStore: ObservableObject { // MARK: - Media Playback Control - /// When enabled, automatically pauses system media playback when transcription starts. - /// Only resumes if FluidVoice was the one that paused it. + /// What FluidVoice does to system media playback when transcription starts. + enum MediaBehaviorDuringTranscription: String, Codable, CaseIterable, Identifiable { + /// Leave media alone. + case none + /// Pause currently playing media; resume on stop if FluidVoice paused it. + case pause + /// Drop the system output volume to a low value during transcription + /// and restore it on stop. Music keeps playing, just quietly. + case duck + + var id: String { self.rawValue } + + var displayName: String { + switch self { + case .none: return "Leave Playing" + case .pause: return "Pause" + case .duck: return "Lower Volume" + } + } + } + + /// What to do with system media playback while transcribing. + /// New unified setting; reads migrate cleanly from the legacy + /// `pauseMediaDuringTranscription` boolean if present. + var mediaBehaviorDuringTranscription: MediaBehaviorDuringTranscription { + get { + if let raw = self.defaults.string(forKey: Keys.mediaBehaviorDuringTranscription), + let mode = MediaBehaviorDuringTranscription(rawValue: raw) { + return mode + } + // Migrate from legacy bool key on first read. + if self.defaults.object(forKey: Keys.pauseMediaDuringTranscription) != nil { + return self.defaults.bool(forKey: Keys.pauseMediaDuringTranscription) ? .pause : .none + } + return .none + } + set { + objectWillChange.send() + self.defaults.set(newValue.rawValue, forKey: Keys.mediaBehaviorDuringTranscription) + // Keep the legacy bool in sync so backup/restore round-trips don't + // surprise users who roll back to an older build. + self.defaults.set(newValue == .pause, forKey: Keys.pauseMediaDuringTranscription) + } + } + + /// Legacy boolean view of `mediaBehaviorDuringTranscription`. Kept so + /// `BackupService`'s payload (which still exports a `Bool` for backward + /// compatibility with older builds) round-trips through the same key. + /// Deterministic in both directions: `true` selects `.pause`, `false` + /// selects `.none`. Restore paths should prefer the lossless enum field + /// on the payload when available so `.duck` survives a round trip. var pauseMediaDuringTranscription: Bool { - get { self.defaults.object(forKey: Keys.pauseMediaDuringTranscription) as? Bool ?? false } + get { self.mediaBehaviorDuringTranscription == .pause } + set { + self.mediaBehaviorDuringTranscription = newValue ? .pause : .none + } + } + + /// When enabled, FluidVoice creates an `IOPMAssertion` while a recording + /// is active so the display doesn't sleep and the screen doesn't lock + /// mid-dictation. Released as soon as recording stops. + var preventSleepDuringTranscription: Bool { + get { self.defaults.object(forKey: Keys.preventSleepDuringTranscription) as? Bool ?? true } set { objectWillChange.send() - self.defaults.set(newValue, forKey: Keys.pauseMediaDuringTranscription) + self.defaults.set(newValue, forKey: Keys.preventSleepDuringTranscription) } } @@ -3670,6 +3738,8 @@ private extension SettingsStore { /// Media Playback Control static let pauseMediaDuringTranscription = "PauseMediaDuringTranscription" + static let mediaBehaviorDuringTranscription = "MediaBehaviorDuringTranscription" + static let preventSleepDuringTranscription = "PreventSleepDuringTranscription" /// Custom Dictation Prompt static let customDictationPrompt = "CustomDictationPrompt" diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 984d5117..f67c5259 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -506,9 +506,9 @@ final class ASRService: ObservableObject { private let audioRouteRecoveryDelayNanoseconds: UInt64 = 1_000_000_000 private var isRecoveringAudioRoute = false - /// Tracks whether we paused system media for this recording session. - /// Used to resume playback only if we were the ones who paused it. - private var didPauseMediaForThisSession: Bool = false + /// What `MediaPlaybackService` did at the start of this session (paused, + /// ducked, or nothing). Used to undo that action on stop. + private var mediaSessionAction: MediaSessionAction = .none private var audioLevelSubject = PassthroughSubject() var audioLevelPublisher: AnyPublisher { self.audioLevelSubject.eraseToAnyPublisher() } @@ -739,20 +739,30 @@ final class ASRService: ObservableObject { /// ## Errors /// If audio session configuration fails, the method will silently fail /// and `isRunning` will remain `false`. Check the debug logs for details. - func start() async { + /// - Parameter preAppliedMediaAction: Optionally, a media action the + /// caller has already taken before invoking start() — used by + /// ContentView to fire the duck ramp the instant the hotkey fires + /// (alongside the start sound) rather than waiting ~80ms behind the + /// pre-recording UI work. If `.none`, start() will fire the duck + /// itself based on the user's setting. + func start(preAppliedMediaAction: MediaSessionAction = .none) async { DebugLogger.shared.info("🎤 START() called - beginning recording session", source: "ASRService") guard self.micStatus == .authorized else { DebugLogger.shared.error("❌ START() blocked - mic not authorized", source: "ASRService") + // The caller may have pre-fired a duck for a session we never get + // to run; undo it so the user's volume isn't left at 10%. + await MediaPlaybackService.shared.restore(from: preAppliedMediaAction) return } guard self.isRunning == false, self.isStarting == false else { DebugLogger.shared.warning("⚠️ START() blocked - already running (started: \(self.isRunning), starting: \(self.isStarting))", source: "ASRService") + await MediaPlaybackService.shared.restore(from: preAppliedMediaAction) return } - // Reset media pause state for this session - self.didPauseMediaForThisSession = false + // Adopt any media action the caller already took, otherwise reset. + self.mediaSessionAction = preAppliedMediaAction self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false @@ -788,14 +798,35 @@ final class ASRService: ObservableObject { try self.setupEngineTap() DebugLogger.shared.debug("✅ Engine tap setup complete", source: "ASRService") - // Pause system media AFTER successful audio setup but BEFORE setting isRunning - // This ensures we only pause media when we know recording will succeed - if SettingsStore.shared.pauseMediaDuringTranscription { + // Apply media pause behaviour AFTER successful audio setup but + // BEFORE setting isRunning, so we only pause if recording will + // actually start. Duck is the responsibility of the caller — + // ContentView pre-fires it the instant the hotkey arrives so the + // fade visibly starts alongside the start sound rather than + // waiting behind audio engine setup. If a caller doesn't supply + // a pre-applied duck and the setting is .duck, we fire it here + // as a fallback so the behaviour still works for any code path + // that hasn't been hoisted. + switch SettingsStore.shared.mediaBehaviorDuringTranscription { + case .none: + self.mediaSessionAction = .none + case .pause: let didPause = await MediaPlaybackService.shared.pauseIfPlaying() - self.didPauseMediaForThisSession = didPause + self.mediaSessionAction = didPause ? .paused : .none if didPause { DebugLogger.shared.info("🎵 Paused system media for transcription", source: "ASRService") } + case .duck: + if case .ducked = self.mediaSessionAction { + // Duck was pre-fired by the caller — nothing more to do. + } else if let previousVolume = MediaPlaybackService.shared.duckSystemVolume() { + self.mediaSessionAction = .ducked(previousVolume: previousVolume) + } + } + + // Hold the display awake while recording, if the user opted in. + if SettingsStore.shared.preventSleepDuringTranscription { + SleepPreventionService.shared.preventSleep() } self.isRunning = true @@ -821,12 +852,11 @@ final class ASRService: ObservableObject { } catch { DebugLogger.shared.error("Failed to start ASR session: \(error)", source: "ASRService") - // Resume media if we paused it before the failure - if self.didPauseMediaForThisSession { - await MediaPlaybackService.shared.resumeIfWePaused(true) - self.didPauseMediaForThisSession = false - DebugLogger.shared.info("🎵 Resumed system media after start failure", source: "ASRService") - } + // Undo any media action we took before the failure. + await MediaPlaybackService.shared.restore(from: self.mediaSessionAction) + self.mediaSessionAction = .none + // Always release any sleep assertion we created. + SleepPreventionService.shared.allowSleep() // Provide user-friendly error feedback let errorMessage: String @@ -894,9 +924,14 @@ final class ASRService: ObservableObject { self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false - // Capture media pause state before we reset it, for resuming at the end - let shouldResumeMedia = SettingsStore.shared.pauseMediaDuringTranscription && self.didPauseMediaForThisSession - self.didPauseMediaForThisSession = false // Reset for next session + // Capture the media action so we can undo it at every exit path. + let pendingMediaRestore = self.mediaSessionAction + self.mediaSessionAction = .none // Reset for next session + + // Always release the sleep assertion at the start of stop — recording + // is over from the user's point of view, even if transcription is + // still running. + SleepPreventionService.shared.allowSleep() DebugLogger.shared.debug("📍 Preparing final transcription", source: "ASRService") @@ -928,6 +963,12 @@ final class ASRService: ObservableObject { // New engine will be lazily created on next access via computed property DebugLogger.shared.debug("✅ Engine instance recreated", source: "ASRService") + // Restore media as soon as the audio engine is fully torn down — there's + // no risk of recording the volume bump now that capture has stopped, and + // it lines the volume restore up with the moment the user lifts the + // hotkey rather than the moment transcription finishes. + await MediaPlaybackService.shared.restore(from: pendingMediaRestore) + // CRITICAL FIX: Await completion of streaming task AND any pending transcriptions // This prevents use-after-free crashes (EXC_BAD_ACCESS) when clearing buffer DebugLogger.shared.debug("⏳ Awaiting stopStreamingTimerAndAwait()...", source: "ASRService") @@ -955,10 +996,6 @@ final class ASRService: ObservableObject { "Final ASR result | provider=\(self.transcriptionProvider.name) | samples=0 | textChars=0 | confidence=nil | reason=no_audio", source: "ASRService" ) - if shouldResumeMedia { - await MediaPlaybackService.shared.resumeIfWePaused(true) - DebugLogger.shared.info("🎵 Resumed system media after empty audio", source: "ASRService") - } return "" } @@ -984,11 +1021,6 @@ final class ASRService: ObservableObject { guard self.transcriptionProvider.isReady else { DebugLogger.shared.error("Transcription provider is not ready", source: "ASRService") - // Resume media playback if we paused it - if shouldResumeMedia { - await MediaPlaybackService.shared.resumeIfWePaused(true) - DebugLogger.shared.info("🎵 Resumed system media after provider not ready", source: "ASRService") - } return "" } @@ -1026,12 +1058,6 @@ final class ASRService: ObservableObject { self.recordWordBoostHitIfAny(transcribedText: cleanedText) DebugLogger.shared.debug("After post-processing: '\(cleanedText)'", source: "ASRService") - // Resume media playback if we paused it - if shouldResumeMedia { - await MediaPlaybackService.shared.resumeIfWePaused(true) - DebugLogger.shared.info("🎵 Resumed system media after transcription", source: "ASRService") - } - return cleanedText } catch { DebugLogger.shared.error("ASR transcription failed: \(error)", source: "ASRService") @@ -1055,12 +1081,6 @@ final class ASRService: ObservableObject { // (e.g., accidental hotkey press) and would disrupt the user's workflow. // Errors are logged for debugging purposes. - // Resume media playback if we paused it - if shouldResumeMedia { - await MediaPlaybackService.shared.resumeIfWePaused(true) - DebugLogger.shared.info("🎵 Resumed system media after transcription failure", source: "ASRService") - } - return "" } } @@ -1073,9 +1093,12 @@ final class ASRService: ObservableObject { self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false - // Capture media pause state before we reset it, for resuming at the end - let shouldResumeMedia = SettingsStore.shared.pauseMediaDuringTranscription && self.didPauseMediaForThisSession - self.didPauseMediaForThisSession = false // Reset for next session + // Capture the media action so we can undo it after teardown. + let pendingMediaRestore = self.mediaSessionAction + self.mediaSessionAction = .none // Reset for next session + + // Release the sleep assertion as soon as recording stops. + SleepPreventionService.shared.allowSleep() DebugLogger.shared.info("🛑 Stopping recording - releasing audio devices", source: "ASRService") @@ -1117,11 +1140,7 @@ final class ASRService: ObservableObject { self.lastStreamingChunkFailureAnalyticsAt = nil self.refreshWordBoostStatus() - // Resume media playback if we paused it - if shouldResumeMedia { - await MediaPlaybackService.shared.resumeIfWePaused(true) - DebugLogger.shared.info("🎵 Resumed system media after stopping without transcription", source: "ASRService") - } + await MediaPlaybackService.shared.restore(from: pendingMediaRestore) } private func configureSession() throws { diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 72160960..fbc72904 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -3,11 +3,46 @@ import Foundation import MediaRemoteAdapter #endif +/// What `MediaPlaybackService` did at the start of a transcription session. +/// Stored on `ASRService` so the matching restore at stop knows whether to +/// resume playback, restore the system volume, or do nothing. +/// +/// The duck case carries a `SystemVolumeSnapshot` rather than a single +/// `Float` so output devices that expose only per-channel volume (no master) +/// can have their stereo balance restored exactly. A flat scalar would +/// collapse L/R to their average on every duck cycle. +enum MediaSessionAction: Equatable { + case none + case paused + case ducked(previousVolume: SystemVolumeSnapshot) +} + +/// Volume the system output is dropped to while ducking. 10% of full scale — +/// quiet enough that the music doesn't compete with dictation, loud enough +/// that the user knows something's still playing. +private let kDuckTargetVolume: Float = 0.10 + +/// Length of the fade ramp in seconds. Short enough that the duck has +/// fully landed before the user starts dictating, long enough to read as a +/// fade rather than a hard cut. +private let kFadeDuration: TimeInterval = 0.1 + +/// Number of discrete steps in the fade ramp. 30 steps over 100ms is 300 Hz, +/// well above the threshold where you'd hear the staircase. The fade only +/// covers the second half of the duck (the first half is a synchronous snap +/// in `duckSystemVolume()` for snappy feel) so a relaxed 100ms tail reads +/// as a soft landing rather than a long fade. +private let kFadeSteps = 30 + /// Service that wraps MediaRemoteAdapter's MediaController to provide -/// controlled pause/resume functionality during transcription. +/// controlled pause/resume functionality during transcription, plus a +/// volume-duck path for users who want music to keep playing quietly. /// -/// This service ensures we only pause media if it's currently playing, -/// and only resume if we were the ones who paused it. +/// Pause path: only pauses if media is currently playing, and only resumes +/// if we were the ones who paused it. +/// +/// Duck path: snapshots the current default output device volume, sets it to +/// `kDuckTargetVolume`, and restores the snapshotted value on stop. @MainActor final class MediaPlaybackService { static let shared = MediaPlaybackService() @@ -16,6 +51,11 @@ final class MediaPlaybackService { private let mediaController = MediaController() #endif + /// Holds the in-flight volume-fade task so a new fade can cancel any + /// previous one (e.g. the user releases the hotkey before the + /// fade-down has finished, and the fade-up needs to take over cleanly). + private var activeFadeTask: Task? + private init() {} // MARK: - Public API @@ -148,4 +188,122 @@ final class MediaPlaybackService { // No-op on Intel } #endif + + // MARK: - Duck path + + /// Snapshots the current system output volume and starts a background + /// fade-down to `kDuckTargetVolume`. Returns the snapshot so the caller + /// can hand it back to `restoreSystemVolume(previous:)` on stop. + /// + /// Returns `nil` if the volume couldn't be read, or if the user's + /// volume is already at or below the duck target — in either case we + /// don't touch the volume at all (and the matching restore becomes a + /// no-op). + func duckSystemVolume() -> SystemVolumeSnapshot? { + guard let snapshot = SystemVolumeController.currentSnapshot() else { + DebugLogger.shared.debug( + "MediaPlaybackService: Couldn't read system volume, skipping duck", + source: "MediaPlaybackService" + ) + return nil + } + let previousScalar = snapshot.averageScalar + guard previousScalar > kDuckTargetVolume else { + DebugLogger.shared.debug( + "MediaPlaybackService: Volume \(String(format: "%.2f", previousScalar)) already ≤ duck target, skipping", + source: "MediaPlaybackService" + ) + return nil + } + + // Snap the volume halfway down to the duck target SYNCHRONOUSLY before + // starting the detached fade. This puts a clearly audible drop on the + // user's ear within the round-trip time of one CoreAudio property + // write (sub-millisecond), bypassing both Task.detached scheduling + // latency and the fade ramp's first few steps where the per-step + // volume change is too small to perceive. The detached fade then + // smoothly lands the rest of the way to kDuckTargetVolume. + let immediateDrop = (previousScalar + kDuckTargetVolume) / 2 + SystemVolumeController.setVolume(immediateDrop) + + DebugLogger.shared.info( + "🔉 Snapped \(String(format: "%.2f", previousScalar)) → \(String(format: "%.2f", immediateDrop)), fading to \(String(format: "%.2f", kDuckTargetVolume)) over \(kFadeDuration)s", + source: "MediaPlaybackService" + ) + self.startFade(from: immediateDrop, to: kDuckTargetVolume, restoreSnapshot: nil) + return snapshot + } + + /// Fades the system output volume back up to the snapshot captured by + /// `duckSystemVolume()`. Reads the live volume first so a mid-fade + /// interruption (user released the hotkey before the duck-down had + /// finished) restarts cleanly from wherever the volume actually is. + /// Re-applies the snapshot exactly at the end of the ramp so per-channel + /// detail (e.g. uneven L/R balance) comes back precisely rather than + /// flattened to the fade scalar. + func restoreSystemVolume(previous: SystemVolumeSnapshot?) { + guard let previous else { return } + let start = SystemVolumeController.currentVolume() ?? kDuckTargetVolume + let target = previous.averageScalar + DebugLogger.shared.info( + "🔊 Fading system volume \(String(format: "%.2f", start)) → \(String(format: "%.2f", target)) over \(kFadeDuration)s", + source: "MediaPlaybackService" + ) + self.startFade(from: start, to: target, restoreSnapshot: previous) + } + + /// Cancels any in-flight fade and starts a new one from `start` to + /// `target` over `kFadeDuration`. Runs detached so the main actor isn't + /// blocked between steps; CoreAudio property writes are thread-safe. + /// + /// - Parameter restoreSnapshot: If non-nil, this snapshot is applied + /// exactly at the end of the ramp instead of writing the scalar + /// `target`. Used by the fade-up so per-channel volume detail + /// (uneven L/R balance) is restored precisely. Pass `nil` for the + /// fade-down — there's nothing to preserve at the duck target. + private func startFade(from start: Float, to target: Float, restoreSnapshot: SystemVolumeSnapshot?) { + self.activeFadeTask?.cancel() + + let stepCount = kFadeSteps + let stepDelay = kFadeDuration / Double(stepCount) + let stepDelayNanos = UInt64(stepDelay * 1_000_000_000) + let delta = (target - start) / Float(stepCount) + + self.activeFadeTask = Task.detached(priority: .userInitiated) { + for step in 1...stepCount { + if Task.isCancelled { return } + let value = start + delta * Float(step) + _ = SystemVolumeController.setVolume(value) + if step < stepCount { + try? await Task.sleep(nanoseconds: stepDelayNanos) + } + } + // Land exactly on the target if we weren't cancelled — + // floating-point drift across the steps could otherwise leave + // us a hair off (e.g. 0.0997 instead of 0.10). If the caller + // asked for an exact snapshot restore, prefer that over the + // scalar target so per-channel detail comes back intact. + if !Task.isCancelled { + if let restoreSnapshot { + _ = SystemVolumeController.restore(restoreSnapshot) + } else { + _ = SystemVolumeController.setVolume(target) + } + } + } + } + + // MARK: - Unified restore + + /// Undoes whatever `MediaSessionAction` was taken at recording start. + func restore(from action: MediaSessionAction) async { + switch action { + case .none: + return + case .paused: + await self.resumeIfWePaused(true) + case .ducked(let previousVolume): + self.restoreSystemVolume(previous: previousVolume) + } + } } diff --git a/Sources/Fluid/Services/SleepPreventionService.swift b/Sources/Fluid/Services/SleepPreventionService.swift new file mode 100644 index 00000000..5e22a1bc --- /dev/null +++ b/Sources/Fluid/Services/SleepPreventionService.swift @@ -0,0 +1,68 @@ +import Foundation +import IOKit.pwr_mgt + +/// Holds an `IOPMAssertion` that prevents the display (and the system) from +/// going idle while the user is dictating. Released as soon as recording +/// stops so the laptop returns to its normal sleep behaviour. +/// +/// Uses `kIOPMAssertionTypePreventUserIdleDisplaySleep` rather than +/// `kIOPMAssertionTypePreventUserIdleSystemSleep`. The display assertion +/// implies the system one (display can't be on if the system's asleep), so +/// it's strictly stronger; and it stops the screen-lock timer that fires +/// after display sleep, which is the visible symptom Andrew was seeing. +@MainActor +final class SleepPreventionService { + static let shared = SleepPreventionService() + + private var assertionID: IOPMAssertionID = 0 + private var isActive = false + + private init() {} + + /// Creates the sleep-prevention assertion. No-op if already active so the + /// service is safe to call from re-entrant code paths. + func preventSleep(reason: String = "FluidVoice transcribing") { + guard !self.isActive else { return } + + var newID: IOPMAssertionID = 0 + let result = IOPMAssertionCreateWithName( + kIOPMAssertionTypePreventUserIdleDisplaySleep as CFString, + IOPMAssertionLevel(kIOPMAssertionLevelOn), + reason as CFString, + &newID + ) + + if result == kIOReturnSuccess { + self.assertionID = newID + self.isActive = true + DebugLogger.shared.info( + "☕ Sleep prevention assertion created (\(reason))", + source: "SleepPreventionService" + ) + } else { + DebugLogger.shared.warning( + "SleepPreventionService: IOPMAssertionCreateWithName failed (\(result))", + source: "SleepPreventionService" + ) + } + } + + /// Releases the assertion. No-op if there's nothing to release. + func allowSleep() { + guard self.isActive else { return } + let result = IOPMAssertionRelease(self.assertionID) + self.assertionID = 0 + self.isActive = false + if result == kIOReturnSuccess { + DebugLogger.shared.info( + "💤 Sleep prevention assertion released", + source: "SleepPreventionService" + ) + } else { + DebugLogger.shared.warning( + "SleepPreventionService: IOPMAssertionRelease failed (\(result))", + source: "SleepPreventionService" + ) + } + } +} diff --git a/Sources/Fluid/Services/SystemVolumeController.swift b/Sources/Fluid/Services/SystemVolumeController.swift new file mode 100644 index 00000000..9d47712a --- /dev/null +++ b/Sources/Fluid/Services/SystemVolumeController.swift @@ -0,0 +1,186 @@ +import CoreAudio +import Foundation + +/// A captured snapshot of the system output volume that survives the duck +/// cycle and can be restored exactly. Keeps left/right channel values +/// independent for devices that don't expose a master volume property — +/// otherwise a non-centred balance setup would have one duck cycle +/// permanently flatten its channels to the average. +enum SystemVolumeSnapshot: Equatable { + case master(Float) + case channels(left: Float?, right: Float?) + + /// Scalar used as the "from" value of a fade ramp — fades interpolate a + /// single value, then we restore the exact snapshot at the end so any + /// per-channel detail comes back precisely. + var averageScalar: Float { + switch self { + case .master(let v): + return v + case .channels(let l, let r): + switch (l, r) { + case let (l?, r?): return (l + r) / 2 + case let (l?, nil): return l + case let (nil, r?): return r + case (nil, nil): return 0 + } + } + } +} + +/// Reads and writes the default output device's volume via CoreAudio's +/// `AudioObjectGetPropertyData` / `AudioObjectSetPropertyData`. +/// +/// macOS doesn't expose per-app output volume in any stable public API, so +/// adjusting the system output level is the closest equivalent. Side effect: +/// notification dings and other system sounds duck along with media for the +/// duration. That's intentional — the user is dictating, they don't want +/// surprises through the speakers. +/// +/// CoreAudio's `AudioObject*` APIs are thread-safe, so this enum is callable +/// from any actor or detached task — useful for background fade ramps. +enum SystemVolumeController { + /// Returns the current default output device's master scalar volume in + /// `0.0...1.0`, or `nil` if the device or master volume property isn't + /// available. Used as the "from" value of the duck-down fade ramp. + static func currentVolume() -> Float? { + currentSnapshot()?.averageScalar + } + + /// Captures the current default output device's full volume state for + /// later exact restoration. Prefers the master scalar, falls back to a + /// per-channel snapshot for devices that don't expose master volume. + static func currentSnapshot() -> SystemVolumeSnapshot? { + guard let deviceID = defaultOutputDeviceID() else { return nil } + + var masterAddress = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioDevicePropertyScopeOutput, + mElement: kAudioObjectPropertyElementMain + ) + + if AudioObjectHasProperty(deviceID, &masterAddress) { + var volume: Float32 = 0 + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(deviceID, &masterAddress, 0, nil, &size, &volume) + if status == noErr { + return .master(volume) + } + } + + let left = readChannelVolume(deviceID: deviceID, channel: 1) + let right = readChannelVolume(deviceID: deviceID, channel: 2) + if left != nil || right != nil { + return .channels(left: left, right: right) + } + return nil + } + + /// Sets the default output device's volume to `value` (clamped to `0.0...1.0`). + /// Writes master if available, falls back to writing channels 1 and 2. + /// Used by the duck-down fade ramp where balance preservation isn't + /// meaningful (the duck target is uniform); restore-up uses + /// `restore(_:)` to re-apply the original per-channel values exactly. + @discardableResult + static func setVolume(_ value: Float) -> Bool { + guard let deviceID = defaultOutputDeviceID() else { return false } + let clamped = max(0, min(1, value)) + + var masterAddress = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioDevicePropertyScopeOutput, + mElement: kAudioObjectPropertyElementMain + ) + + if AudioObjectHasProperty(deviceID, &masterAddress) { + var newValue = clamped + let size = UInt32(MemoryLayout.size) + let status = AudioObjectSetPropertyData(deviceID, &masterAddress, 0, nil, size, &newValue) + if status == noErr { return true } + } + + let leftOK = writeChannelVolume(deviceID: deviceID, channel: 1, value: clamped) + let rightOK = writeChannelVolume(deviceID: deviceID, channel: 2, value: clamped) + return leftOK || rightOK + } + + /// Re-applies a snapshot exactly. For master snapshots this writes the + /// master scalar; for per-channel snapshots this writes the original + /// left and right values independently, preserving stereo balance that + /// `setVolume(_:)` would otherwise have flattened. + @discardableResult + static func restore(_ snapshot: SystemVolumeSnapshot) -> Bool { + guard let deviceID = defaultOutputDeviceID() else { return false } + + switch snapshot { + case .master(let value): + var masterAddress = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioDevicePropertyScopeOutput, + mElement: kAudioObjectPropertyElementMain + ) + guard AudioObjectHasProperty(deviceID, &masterAddress) else { return false } + var newValue = max(0, min(1, value)) + let size = UInt32(MemoryLayout.size) + let status = AudioObjectSetPropertyData(deviceID, &masterAddress, 0, nil, size, &newValue) + return status == noErr + + case .channels(let left, let right): + var anyOK = false + if let left { + anyOK = writeChannelVolume(deviceID: deviceID, channel: 1, value: max(0, min(1, left))) || anyOK + } + if let right { + anyOK = writeChannelVolume(deviceID: deviceID, channel: 2, value: max(0, min(1, right))) || anyOK + } + return anyOK + } + } + + // MARK: - Private + + private static func defaultOutputDeviceID() -> AudioObjectID? { + var deviceID = AudioObjectID(kAudioObjectUnknown) + var size = UInt32(MemoryLayout.size) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain + ) + let status = AudioObjectGetPropertyData( + AudioObjectID(kAudioObjectSystemObject), + &address, + 0, + nil, + &size, + &deviceID + ) + return (status == noErr && deviceID != kAudioObjectUnknown) ? deviceID : nil + } + + private static func readChannelVolume(deviceID: AudioObjectID, channel: UInt32) -> Float? { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioDevicePropertyScopeOutput, + mElement: channel + ) + guard AudioObjectHasProperty(deviceID, &address) else { return nil } + var volume: Float32 = 0 + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(deviceID, &address, 0, nil, &size, &volume) + return status == noErr ? volume : nil + } + + private static func writeChannelVolume(deviceID: AudioObjectID, channel: UInt32, value: Float) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioDevicePropertyScopeOutput, + mElement: channel + ) + guard AudioObjectHasProperty(deviceID, &address) else { return false } + var newValue = value + let size = UInt32(MemoryLayout.size) + let status = AudioObjectSetPropertyData(deviceID, &address, 0, nil, size, &newValue) + return status == noErr + } +} diff --git a/Sources/Fluid/UI/SettingsView.swift b/Sources/Fluid/UI/SettingsView.swift index 10c6384e..fc099e2a 100644 --- a/Sources/Fluid/UI/SettingsView.swift +++ b/Sources/Fluid/UI/SettingsView.swift @@ -836,12 +836,36 @@ struct SettingsView: View { ) Divider().opacity(0.2) + HStack(alignment: .center) { + VStack(alignment: .leading, spacing: 2) { + Text("Music During Transcription") + .font(.body) + Text("Leave playing, pause it, or lower the system volume to 10% while you dictate (restored when you stop).") + .font(.caption) + .foregroundStyle(.secondary) + } + + Spacer() + + Picker("", selection: Binding( + get: { SettingsStore.shared.mediaBehaviorDuringTranscription }, + set: { SettingsStore.shared.mediaBehaviorDuringTranscription = $0 } + )) { + ForEach(SettingsStore.MediaBehaviorDuringTranscription.allCases) { mode in + Text(mode.displayName).tag(mode) + } + } + .frame(width: 160) + .labelsHidden() + } + Divider().opacity(0.2) + self.optionToggleRow( - title: "Pause Media During Transcription", - description: "Automatically pause currently playing audio/video when transcription starts. Resumes only if FluidVoice paused it.", + title: "Keep Mac Awake While Dictating", + description: "Prevents the display from sleeping or locking while a recording is active. Released as soon as you stop.", isOn: Binding( - get: { SettingsStore.shared.pauseMediaDuringTranscription }, - set: { SettingsStore.shared.pauseMediaDuringTranscription = $0 } + get: { SettingsStore.shared.preventSleepDuringTranscription }, + set: { SettingsStore.shared.preventSleepDuringTranscription = $0 } ) ) Divider().opacity(0.2)