diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..e84a250a --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +# VisionClaw external state machine relay +# Copy to .env and fill values before building Android sample. + +OPENCLAW_TAILSCALE_IP=100.64.30.99 +OPENCLAW_TAILNET_ID=YOUR_TAILNET_ID +OPENCLAW_BEARER_TOKEN= +GEMINI_API_KEY=YOUR_GEMINI_API_KEY diff --git a/.gitignore b/.gitignore index 530f7155..438b66ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ .DS_Store +# Environment +.env + # Android samples/CameraAccessAndroid/app/src/main/java/**/Secrets.kt samples/CameraAccessAndroid/local.properties diff --git a/README.md b/README.md index 1e66a649..d3ea0bd2 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,20 @@ git clone https://github.com/sseanliu/VisionClaw.git Open `samples/CameraAccessAndroid/` in Android Studio. +### 1.5 Configure environment variables (.env) + +At repository root, copy the example and set your values: + +```bash +cp .env.example .env +``` + +Set: +- `GEMINI_API_KEY` +- `OPENCLAW_TAILSCALE_IP` + +These are injected into Android `BuildConfig` at build time and used as defaults by the app settings layer. + ### 2. Configure GitHub Packages (DAT SDK) The Meta DAT Android SDK is distributed via GitHub Packages. You need a GitHub Personal Access Token with `read:packages` scope. @@ -285,7 +299,7 @@ All source code is in `samples/CameraAccessAndroid/app/src/main/java/.../cameraa ### Tool Calling -Gemini Live supports function calling. Both apps declare a single `execute` tool that routes everything through OpenClaw: +Gemini Live supports function calling. Both apps declare `execute` for OpenClaw actions. Android also declares `log_sop_step` to forward SOP step logs to an external state machine endpoint. 1. User says "Add eggs to my shopping list" 2. Gemini speaks "Sure, adding that now" (verbal acknowledgment before tool call) diff --git a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj index 1e7dbda4..76c71be4 100644 --- a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj +++ b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj @@ -38,6 +38,11 @@ 9DD6CB0E2F3C64F400ED7098 /* WebRTCOverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CB0D2F3C64F400ED7098 /* WebRTCOverlayView.swift */; }; 9DD894B22F4047630090B9B9 /* SettingsManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD894AF2F4047630090B9B9 /* SettingsManager.swift */; }; 9DD894B32F4047630090B9B9 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD894B02F4047630090B9B9 /* SettingsView.swift */; }; + B10000012F50000100AA0001 /* DesignSystem.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000012F50000100AA0002 /* DesignSystem.swift */; }; + B10000012F50000200AA0001 /* HomeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000012F50000200AA0002 /* HomeView.swift */; }; + B10000012F50000300AA0001 /* CaptureView.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000012F50000300AA0002 /* CaptureView.swift */; }; + B10000012F50004000AA0001 /* IPhoneCameraPreviewSurface.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000012F50004000AA0002 /* IPhoneCameraPreviewSurface.swift */; }; + B10000012F50000400AA0001 /* HistoryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000012F50000400AA0002 /* HistoryView.swift */; }; 9DD895962F405E0E0090B9B9 /* RTCVideoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD895952F405E0E0090B9B9 /* RTCVideoView.swift */; }; 9DD895972F405E0E0090B9B9 /* PiPVideoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD895942F405E0E0090B9B9 /* PiPVideoView.swift */; }; A1B2C3D42F0A000200000001 /* GeminiConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000001 /* GeminiConfig.swift */; }; @@ -45,6 +50,8 @@ A1B2C3D42F0A000200000003 /* AudioManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000003 /* AudioManager.swift */; }; A1B2C3D42F0A000200000004 /* GeminiSessionViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */; }; A1B2C3D42F0A000200000005 /* GeminiOverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000005 /* GeminiOverlayView.swift */; }; + A1B2C3D42F0A000200000006 /* SopRelayClient.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000006 /* SopRelayClient.swift */; }; + A1B2C3D42F0A000200000007 /* GeminiLiveSpotter.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000007 /* GeminiLiveSpotter.swift */; }; E66D30242E7DA71900470B48 /* MockDeviceKitButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = E66D30232E7DA71900470B48 /* MockDeviceKitButton.swift */; }; E6A188482EB918740097D0E1 /* StreamView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6A188472EB918740097D0E1 /* StreamView.swift */; }; E6DA451D2E79A63100E3F688 /* MockDeviceCardView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6DA45182E79A63100E3F688 /* MockDeviceCardView.swift */; }; @@ -108,6 +115,11 @@ 9DD6CB0D2F3C64F400ED7098 /* WebRTCOverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebRTCOverlayView.swift; sourceTree = ""; }; 9DD894AF2F4047630090B9B9 /* SettingsManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsManager.swift; sourceTree = ""; }; 9DD894B02F4047630090B9B9 /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = ""; }; + B10000012F50000100AA0002 /* DesignSystem.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DesignSystem.swift; sourceTree = ""; }; + B10000012F50000200AA0002 /* HomeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomeView.swift; sourceTree = ""; }; + B10000012F50000300AA0002 /* CaptureView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CaptureView.swift; sourceTree = ""; }; + B10000012F50004000AA0002 /* IPhoneCameraPreviewSurface.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = IPhoneCameraPreviewSurface.swift; sourceTree = ""; }; + B10000012F50000400AA0002 /* HistoryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HistoryView.swift; sourceTree = ""; }; 9DD895942F405E0E0090B9B9 /* PiPVideoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PiPVideoView.swift; sourceTree = ""; }; 9DD895952F405E0E0090B9B9 /* RTCVideoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RTCVideoView.swift; sourceTree = ""; }; A1B2C3D42F0A000100000001 /* GeminiConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiConfig.swift; sourceTree = ""; }; @@ -115,6 +127,8 @@ A1B2C3D42F0A000100000003 /* AudioManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioManager.swift; sourceTree = ""; }; A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiSessionViewModel.swift; sourceTree = ""; }; A1B2C3D42F0A000100000005 /* GeminiOverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiOverlayView.swift; sourceTree = ""; }; + A1B2C3D42F0A000100000006 /* SopRelayClient.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SopRelayClient.swift; sourceTree = ""; }; + A1B2C3D42F0A000100000007 /* GeminiLiveSpotter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiLiveSpotter.swift; sourceTree = ""; }; E66D30232E7DA71900470B48 /* MockDeviceKitButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MockDeviceKitButton.swift; sourceTree = ""; }; E699CC952E8150670052C240 /* CameraAccessTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = CameraAccessTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; E6A188472EB918740097D0E1 /* StreamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamView.swift; sourceTree = ""; }; @@ -126,7 +140,6 @@ /* Begin PBXFileSystemSynchronizedRootGroup section */ 9D3C69602F367CF700E641A5 /* iPhone */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = iPhone; sourceTree = ""; }; - 9D85EB992F35EC46006C44D1 /* OpenClaw */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); name = OpenClaw; path = CameraAccess/OpenClaw; sourceTree = SOURCE_ROOT; }; E699CC962E8150670052C240 /* CameraAccessTests */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = CameraAccessTests; sourceTree = ""; }; /* End PBXFileSystemSynchronizedRootGroup section */ @@ -188,6 +201,11 @@ children = ( 8FFD5FF42E8422580035E446 /* Components */, E6DA451B2E79A63100E3F688 /* MockDeviceKit */, + B10000012F50000100AA0002 /* DesignSystem.swift */, + B10000012F50000200AA0002 /* HomeView.swift */, + B10000012F50000300AA0002 /* CaptureView.swift */, + B10000012F50004000AA0002 /* IPhoneCameraPreviewSurface.swift */, + B10000012F50000400AA0002 /* HistoryView.swift */, 8FFD605E2E84A2F70035E446 /* DebugMenuView.swift */, 8FD96B722E6F0A9800F56AB1 /* HomeScreenView.swift */, 8FFD605F2E84A2F70035E446 /* MainAppView.swift */, @@ -215,7 +233,6 @@ 8FD96B782E6F0A9800F56AB1 /* CameraAccess.entitlements */, 8FD96B792E6F0A9800F56AB1 /* CameraAccessApp.swift */, 8FD96B7B2E6F0A9800F56AB1 /* Info.plist */, - 9D85EB992F35EC46006C44D1 /* OpenClaw */, ); path = CameraAccess; sourceTree = ""; @@ -272,7 +289,9 @@ A1B2C3D42F0A000100000003 /* AudioManager.swift */, A1B2C3D42F0A000100000001 /* GeminiConfig.swift */, A1B2C3D42F0A000100000002 /* GeminiLiveService.swift */, + A1B2C3D42F0A000100000007 /* GeminiLiveSpotter.swift */, A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */, + A1B2C3D42F0A000100000006 /* SopRelayClient.swift */, ); path = Gemini; sourceTree = ""; @@ -305,7 +324,6 @@ ); fileSystemSynchronizedGroups = ( 9D3C69602F367CF700E641A5 /* iPhone */, - 9D85EB992F35EC46006C44D1 /* OpenClaw */, ); name = CameraAccess; productName = CameraAccess; @@ -397,7 +415,12 @@ AAAAAAAAAAAAAAAAAAAAAA /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; - files = ( + files = ( + B10000012F50000100AA0001 /* DesignSystem.swift in Sources */, + B10000012F50000200AA0001 /* HomeView.swift in Sources */, + B10000012F50000300AA0001 /* CaptureView.swift in Sources */, + B10000012F50004000AA0001 /* IPhoneCameraPreviewSurface.swift in Sources */, + B10000012F50000400AA0001 /* HistoryView.swift in Sources */, 8FD96B7F2E6F0A9800F56AB1 /* CameraAccessApp.swift in Sources */, 8FD96B812E6F0A9800F56AB1 /* HomeScreenView.swift in Sources */, 8F2D23802E856711002D0588 /* DebugMenuViewModel.swift in Sources */, @@ -432,8 +455,10 @@ E6FD3BCE2EB4D53A00E7FE5D /* NonStreamView.swift in Sources */, A1B2C3D42F0A000200000001 /* GeminiConfig.swift in Sources */, A1B2C3D42F0A000200000002 /* GeminiLiveService.swift in Sources */, + A1B2C3D42F0A000200000007 /* GeminiLiveSpotter.swift in Sources */, A1B2C3D42F0A000200000003 /* AudioManager.swift in Sources */, A1B2C3D42F0A000200000004 /* GeminiSessionViewModel.swift in Sources */, + A1B2C3D42F0A000200000006 /* SopRelayClient.swift in Sources */, 9DD894B22F4047630090B9B9 /* SettingsManager.swift in Sources */, 9DD894B32F4047630090B9B9 /* SettingsView.swift in Sources */, A1B2C3D42F0A000200000005 /* GeminiOverlayView.swift in Sources */, @@ -469,7 +494,7 @@ CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_ASSET_PATHS = ""; - DEVELOPMENT_TEAM = WY253UX7FC; + DEVELOPMENT_TEAM = CVABQ59BK3; ENABLE_PREVIEWS = YES; FRAMEWORK_SEARCH_PATHS = ""; INFOPLIST_FILE = CameraAccess/Info.plist; @@ -479,7 +504,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = com.xiaoanliu.VisionClaw; + PRODUCT_BUNDLE_IDENTIFIER = com.lucascunha.VisionClaw.SOP; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_VERSION = 5.0; @@ -498,7 +523,7 @@ CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_ASSET_PATHS = ""; - DEVELOPMENT_TEAM = WY253UX7FC; + DEVELOPMENT_TEAM = CVABQ59BK3; ENABLE_PREVIEWS = YES; INFOPLIST_FILE = CameraAccess/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 17.0; @@ -507,7 +532,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = com.xiaoanliu.VisionClaw; + PRODUCT_BUNDLE_IDENTIFIER = com.lucascunha.VisionClaw.SOP; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_VERSION = 5.0; diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json index f7757d41..f4344003 100644 --- a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json +++ b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -1,7 +1,7 @@ { "images" : [ { - "filename" : "imagine_a_film_camera_in_the_style.jpeg", + "filename" : "logo.png", "idiom" : "universal", "platform" : "ios", "size" : "1024x1024" diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg deleted file mode 100644 index b91090c8..00000000 Binary files a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg and /dev/null differ diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/logo.png b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/logo.png new file mode 100644 index 00000000..44d9779b Binary files /dev/null and b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/logo.png differ diff --git a/samples/CameraAccess/CameraAccess/CameraAccessApp.swift b/samples/CameraAccess/CameraAccess/CameraAccessApp.swift index 1fedafda..1267d8c3 100644 --- a/samples/CameraAccess/CameraAccess/CameraAccessApp.swift +++ b/samples/CameraAccess/CameraAccess/CameraAccessApp.swift @@ -19,13 +19,13 @@ import Foundation import MWDATCore import SwiftUI -#if canImport(MWDATMockDevice) +#if DEBUG && canImport(MWDATMockDevice) import MWDATMockDevice #endif @main struct CameraAccessApp: App { - #if canImport(MWDATMockDevice) + #if DEBUG && canImport(MWDATMockDevice) // Debug menu for simulating device connections during development @StateObject private var debugMenuViewModel = DebugMenuViewModel(mockDeviceKit: MockDeviceKit.shared) #endif @@ -58,7 +58,7 @@ struct CameraAccessApp: App { } message: { Text(wearablesViewModel.errorMessage) } - #if canImport(MWDATMockDevice) + #if DEBUG && canImport(MWDATMockDevice) .sheet(isPresented: $debugMenuViewModel.showDebugMenu) { MockDeviceKitView(viewModel: debugMenuViewModel.mockDeviceKitViewModel) } diff --git a/samples/CameraAccess/CameraAccess/Gemini/AudioManager.swift b/samples/CameraAccess/CameraAccess/Gemini/AudioManager.swift index 3c6c38d4..c16e2487 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/AudioManager.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/AudioManager.swift @@ -2,20 +2,355 @@ import AVFoundation import Foundation import UIKit -class AudioManager { +enum WorkerAudioRouteOwner: String, Sendable { + case aiGuide = "ai_guide" + case backOfficeWebRTC = "back_office_webrtc" + case holdToTalk = "hold_to_talk" + case viewer = "viewer" +} + +struct WorkerAudioRouteLease: Equatable, Sendable { + let owner: WorkerAudioRouteOwner + let token: UUID + let generation: UInt64 + + var payload: [String: Any] { + [ + "owner": owner.rawValue, + "token": token.uuidString, + "generation": generation + ] + } +} + +struct WorkerAudioRouteSnapshot { + let lease: WorkerAudioRouteLease + let owner: WorkerAudioRouteOwner + let mode: StreamingMode + let reason: String + let category: String + let audioMode: String + let inputs: [String] + let outputs: [String] + let preferredInput: String? + let usesHandsFreeRoute: Bool + let fallbackMessage: String? + let sampleRate: Double + let ioBufferDuration: Double + + var payload: [String: Any] { + [ + "owner": owner.rawValue, + "token": lease.token.uuidString, + "generation": lease.generation, + "mode": mode == .iPhone ? "iphone" : "glasses", + "reason": reason, + "category": category, + "audio_mode": audioMode, + "inputs": inputs, + "outputs": outputs, + "preferred_input": preferredInput ?? NSNull(), + "uses_hands_free_route": usesHandsFreeRoute, + "fallback_message": fallbackMessage ?? NSNull(), + "sample_rate": sampleRate, + "io_buffer_duration": ioBufferDuration + ] + } +} + +final class WorkerAudioRouteCoordinator: @unchecked Sendable { + static let shared = WorkerAudioRouteCoordinator() + + private let stateQueue = DispatchQueue(label: "worker.audio.route.state") + private let deactivationQueue = DispatchQueue( + label: "worker.audio.route.deactivation", + qos: .userInitiated + ) + private var activeLease: WorkerAudioRouteLease? + private var routeGeneration: UInt64 = 0 + + @discardableResult + func acquire( + owner: WorkerAudioRouteOwner, + mode: StreamingMode, + reason: String, + forceSpeaker: Bool = false, + preferredSampleRate: Double? = nil, + preferredIOBufferDuration: TimeInterval = 0.02 + ) throws -> WorkerAudioRouteSnapshot { + let lease = stateQueue.sync { () -> WorkerAudioRouteLease in + routeGeneration &+= 1 + let lease = WorkerAudioRouteLease( + owner: owner, + token: UUID(), + generation: routeGeneration + ) + activeLease = lease + return lease + } + + let session = AVAudioSession.sharedInstance() + do { + var options: AVAudioSession.CategoryOptions = [.allowBluetoothHFP] + let audioMode: AVAudioSession.Mode + + switch mode { + case .iPhone: + audioMode = .voiceChat + options.formUnion([.defaultToSpeaker, .duckOthers]) + case .glasses: + audioMode = forceSpeaker ? .voiceChat : .videoChat + if forceSpeaker { + options.formUnion([.defaultToSpeaker]) + } + } + + try session.setCategory(.playAndRecord, mode: audioMode, options: options) + if let preferredSampleRate { + try session.setPreferredSampleRate(preferredSampleRate) + } + try session.setPreferredIOBufferDuration(preferredIOBufferDuration) + try session.setActive(true, options: .notifyOthersOnDeactivation) + + var preferredInputName: String? + if let input = preferredBluetoothHandsFreeInput(session) { + try session.setPreferredInput(input) + preferredInputName = "\(input.portType.rawValue):\(input.portName)" + try session.setActive(true, options: .notifyOthersOnDeactivation) + } + + let routeBeforeOverride = session.currentRoute + let hasHandsFree = hasBluetoothHandsFreeRoute(routeBeforeOverride) + let fallbackMessage: String? + + if mode == .iPhone || forceSpeaker { + try session.overrideOutputAudioPort(.speaker) + fallbackMessage = mode == .glasses + ? "Glasses audio route unavailable. Using phone speaker." + : nil + } else if hasHandsFree { + try session.overrideOutputAudioPort(.none) + fallbackMessage = nil + } else { + try session.overrideOutputAudioPort(.speaker) + fallbackMessage = "Meta audio route unavailable. Using phone audio until Bluetooth HFP connects." + } + + let snapshot = WorkerAudioRouteSnapshot( + lease: lease, + owner: owner, + mode: mode, + reason: reason, + category: session.category.rawValue, + audioMode: session.mode.rawValue, + inputs: describePorts(session.currentRoute.inputs), + outputs: describePorts(session.currentRoute.outputs), + preferredInput: preferredInputName, + usesHandsFreeRoute: hasBluetoothHandsFreeRoute(session.currentRoute), + fallbackMessage: fallbackMessage, + sampleRate: session.sampleRate, + ioBufferDuration: session.ioBufferDuration + ) + log(snapshot) + Task { + await WorkerTelemetry.shared.record( + "audio_route_acquired", + source: "ios_audio", + stage: owner.rawValue, + payload: snapshot.payload + ) + } + return snapshot + } catch { + stateQueue.sync { + if activeLease?.token == lease.token { + activeLease = nil + } + } + throw error + } + } + + func release( + lease: WorkerAudioRouteLease, + afterAudioGraphStops: @escaping () async -> Void = {} + ) async { + let didRelease = stateQueue.sync { () -> Bool in + guard activeLease?.token == lease.token else { return false } + activeLease = nil + return true + } + + guard didRelease else { + let currentLease = stateQueue.sync { activeLease } + print("[Audio] Stale release ignored; newer session active") + var payload: [String: Any] = [ + "release_owner": lease.owner.rawValue, + "release_token": lease.token.uuidString, + "release_generation": lease.generation + ] + if let currentLease { + payload["current_owner"] = currentLease.owner.rawValue + payload["current_token"] = currentLease.token.uuidString + payload["current_generation"] = currentLease.generation + } else { + payload["current_owner"] = NSNull() + payload["current_token"] = NSNull() + payload["current_generation"] = NSNull() + } + await WorkerTelemetry.shared.record( + "audio_route_stale_release_ignored", + source: "ios_audio", + stage: lease.owner.rawValue, + payload: payload + ) + return + } + + NSLog("[WorkerAudio] released owner=%@ token=%@", lease.owner.rawValue, lease.token.uuidString) + await WorkerTelemetry.shared.record( + "audio_route_released", + source: "ios_audio", + stage: lease.owner.rawValue, + payload: lease.payload + ) + + await afterAudioGraphStops() + await Task.yield() + + let currentState = stateQueue.sync { () -> (lease: WorkerAudioRouteLease?, generation: UInt64) in + (activeLease, routeGeneration) + } + let currentLease = currentState.lease + let currentGeneration = currentState.generation + let shouldDeactivate = currentLease == nil && currentGeneration == lease.generation + + guard shouldDeactivate else { + var payload: [String: Any] = [ + "owner": lease.owner.rawValue, + "token": lease.token.uuidString, + "release_generation": lease.generation, + "current_generation": currentGeneration + ] + payload["current_owner"] = currentLease?.owner.rawValue ?? NSNull() + payload["current_token"] = currentLease?.token.uuidString ?? NSNull() + NSLog( + "[WorkerAudio] skip deactivate owner=%@ token=%@ generation=%llu currentOwner=%@ currentGeneration=%llu", + lease.owner.rawValue, + lease.token.uuidString, + lease.generation, + currentLease?.owner.rawValue ?? "none", + currentGeneration + ) + await WorkerTelemetry.shared.record( + "audio_route_deactivation_skipped", + source: "ios_audio", + stage: lease.owner.rawValue, + payload: payload + ) + return + } + + let result = await deactivateSharedAudioSession() + switch result { + case .success: + NSLog("[WorkerAudio] deactivated owner=%@ token=%@", lease.owner.rawValue, lease.token.uuidString) + await WorkerTelemetry.shared.record( + "audio_route_deactivated", + source: "ios_audio", + stage: lease.owner.rawValue, + payload: lease.payload + ) + case .failure(let error): + NSLog("[WorkerAudio] deactivate failed owner=%@ token=%@ error=%@", + lease.owner.rawValue, lease.token.uuidString, error.localizedDescription) + await WorkerTelemetry.shared.record( + "audio_route_deactivate_failed", + source: "ios_audio", + stage: lease.owner.rawValue, + payload: [ + "owner": lease.owner.rawValue, + "token": lease.token.uuidString, + "generation": lease.generation, + "error": error.localizedDescription + ] + ) + } + } + + private func deactivateSharedAudioSession() async -> Result { + await withCheckedContinuation { (continuation: CheckedContinuation, Never>) in + deactivationQueue.async { + do { + try AVAudioSession.sharedInstance().setActive( + false, + options: .notifyOthersOnDeactivation + ) + continuation.resume(returning: .success(())) + } catch { + continuation.resume(returning: .failure(error)) + } + } + } + } + + private func preferredBluetoothHandsFreeInput(_ session: AVAudioSession) -> AVAudioSessionPortDescription? { + session.availableInputs?.first { + $0.portType == .bluetoothHFP || $0.portType == .bluetoothLE + } + } + + private func hasBluetoothHandsFreeRoute(_ route: AVAudioSessionRouteDescription) -> Bool { + route.inputs.contains { + $0.portType == .bluetoothHFP || $0.portType == .bluetoothLE + } || route.outputs.contains { + $0.portType == .bluetoothHFP || $0.portType == .bluetoothLE + } + } + + private func describePorts(_ ports: [AVAudioSessionPortDescription]) -> [String] { + ports.map { "\($0.portType.rawValue):\($0.portName)" } + } + + private func log(_ snapshot: WorkerAudioRouteSnapshot) { + NSLog( + "[WorkerAudio] owner=%@ mode=%@ reason=%@ inputs=%@ outputs=%@ fallback=%@", + snapshot.owner.rawValue, + snapshot.mode == .iPhone ? "iphone" : "glasses", + snapshot.reason, + snapshot.inputs.joined(separator: ","), + snapshot.outputs.joined(separator: ","), + snapshot.fallbackMessage ?? "none" + ) + } +} + +final class AudioManager: @unchecked Sendable { var onAudioCaptured: ((Data) -> Void)? + // Keep the engine container permanent for the process lifetime. Teardown only + // stops and detaches child nodes; it never nils or replaces this engine. private let audioEngine = AVAudioEngine() + private let audioLifecycleQueue = DispatchQueue( + label: "gemini.audio.lifecycle", + qos: .userInitiated + ) + private let audioLifecycleQueueKey = DispatchSpecificKey() private let playerNode = AVAudioPlayerNode() private var isCapturing = false + private var isInputTapInstalled = false + private var isPlayerNodeAttached = false private var wasCapturingBeforeInterruption = false private var useIPhoneMode = false + private var audioRouteLease: WorkerAudioRouteLease? + private var audioGraphGeneration: UInt64 = 0 private let outputFormat: AVAudioFormat // Accumulate resampled PCM into ~100ms chunks before sending private let sendQueue = DispatchQueue(label: "audio.accumulator") private var accumulatedData = Data() + private var accumulatorGeneration: UInt64 = 0 private let minSendBytes = 3200 // 100ms at 16kHz mono Int16 = 1600 frames * 2 bytes // Notification observers for background resilience @@ -31,51 +366,57 @@ class AudioManager { channels: GeminiConfig.audioChannels, interleaved: true )! + audioLifecycleQueue.setSpecific(key: audioLifecycleQueueKey, value: ()) } func setupAudioSession(useIPhoneMode: Bool = false) throws { self.useIPhoneMode = useIPhoneMode - let session = AVAudioSession.sharedInstance() - // voiceChat: aggressive echo cancellation (mic + speaker co-located on phone) - // videoChat: mild AEC (mic on glasses, speaker on glasses) - // When Speaker Output is ON, speaker is on phone so always use voiceChat AEC let forceSpeaker = SettingsManager.shared.speakerOutputEnabled - if useIPhoneMode || forceSpeaker { - try session.setCategory( - .playAndRecord, - mode: .voiceChat, - options: [.defaultToSpeaker, .allowBluetooth, .mixWithOthers] - ) - } else { - try session.setCategory( - .playAndRecord, - mode: .videoChat, - options: [.allowBluetoothHFP, .mixWithOthers, .defaultToSpeaker] - ) + let captureMode: StreamingMode = useIPhoneMode ? .iPhone : .glasses + let snapshot = try WorkerAudioRouteCoordinator.shared.acquire( + owner: .aiGuide, + mode: captureMode, + reason: "gemini_live", + forceSpeaker: forceSpeaker, + preferredSampleRate: GeminiConfig.inputAudioSampleRate, + preferredIOBufferDuration: 0.064 + ) + audioRouteLease = snapshot.lease + if let fallback = snapshot.fallbackMessage { + NSLog("[Audio] %@", fallback) } - try session.setPreferredSampleRate(GeminiConfig.inputAudioSampleRate) - try session.setPreferredIOBufferDuration(0.064) - try session.setActive(true) - if SettingsManager.shared.speakerOutputEnabled { - try session.overrideOutputAudioPort(.speaker) - NSLog("[Audio] Speaker output override: ON (iPhone speaker)") - } - NSLog("[Audio] Session mode: %@", useIPhoneMode ? "voiceChat (iPhone)" : "videoChat (glasses)") + removeObservers() setupInterruptionHandling() setupAppLifecycleObservers() } func startCapture() throws { + try syncOnAudioLifecycleQueue { + try startCaptureOnAudioLifecycleQueue() + } + } + + private func startCaptureOnAudioLifecycleQueue() throws { guard !isCapturing else { return } - audioEngine.attach(playerNode) + if isInputTapInstalled { + audioEngine.inputNode.removeTap(onBus: 0) + isInputTapInstalled = false + } + + if !isPlayerNodeAttached { + audioEngine.attach(playerNode) + isPlayerNodeAttached = true + } + let playerFormat = AVAudioFormat( commonFormat: .pcmFormatFloat32, sampleRate: GeminiConfig.outputAudioSampleRate, channels: GeminiConfig.audioChannels, interleaved: false )! + audioEngine.disconnectNodeOutput(playerNode) audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: playerFormat) let inputNode = audioEngine.inputNode @@ -93,7 +434,12 @@ class AudioManager { NSLog("[Audio] Needs resample: %@", needsResample ? "YES" : "NO") - sendQueue.async { self.accumulatedData = Data() } + audioGraphGeneration &+= 1 + let captureGeneration = audioGraphGeneration + sendQueue.sync { + accumulatedData = Data() + accumulatorGeneration = captureGeneration + } var converter: AVAudioConverter? if needsResample { @@ -111,6 +457,11 @@ class AudioManager { guard let self else { return } tapCount += 1 + let currentTapCount = tapCount + let rmsValue = self.computeRMS(buffer) + if currentTapCount % 15 == 0 { + print("[Audio Monitor] App Mic Level: \(rmsValue)") + } let pcmData: Data if let converter { @@ -121,7 +472,7 @@ class AudioManager { interleaved: false )! guard let resampled = self.convertBuffer(buffer, using: converter, targetFormat: resampleFormat) else { - if tapCount <= 3 { NSLog("[Audio] Resample failed for tap #%d", tapCount) } + if currentTapCount <= 3 { NSLog("[Audio] Resample failed for tap #%d", currentTapCount) } return } pcmData = self.float32BufferToInt16Data(resampled) @@ -131,11 +482,12 @@ class AudioManager { // Accumulate into ~100ms chunks before sending to Gemini self.sendQueue.async { + guard self.accumulatorGeneration == captureGeneration else { return } self.accumulatedData.append(pcmData) if self.accumulatedData.count >= self.minSendBytes { let chunk = self.accumulatedData self.accumulatedData = Data() - if tapCount <= 3 { + if currentTapCount <= 3 { NSLog("[Audio] Sending chunk: %d bytes (~%dms)", chunk.count, chunk.count / 32) // 16kHz * 2 bytes = 32 bytes/ms } @@ -143,14 +495,27 @@ class AudioManager { } } } + isInputTapInstalled = true - try audioEngine.start() - playerNode.play() - isCapturing = true + do { + try audioEngine.start() + playerNode.play() + isCapturing = true + } catch { + tearDownEngineGraphOnAudioLifecycleQueue(flushPendingAudio: false) + throw error + } } func playAudio(data: Data) { - guard isCapturing, !data.isEmpty else { return } + guard !data.isEmpty else { return } + audioLifecycleQueue.async { [weak self] in + self?.playAudioOnAudioLifecycleQueue(data: data) + } + } + + private func playAudioOnAudioLifecycleQueue(data: Data) { + guard isCapturing, isPlayerNodeAttached, audioEngine.isRunning, !data.isEmpty else { return } let playerFormat = AVAudioFormat( commonFormat: .pcmFormatFloat32, @@ -180,26 +545,41 @@ class AudioManager { } func stopPlayback() { - playerNode.stop() - playerNode.play() + audioLifecycleQueue.async { [weak self] in + self?.stopPlaybackOnAudioLifecycleQueue() + } } - func stopCapture() { - guard isCapturing else { return } - audioEngine.inputNode.removeTap(onBus: 0) + private func stopPlaybackOnAudioLifecycleQueue() { + guard isPlayerNodeAttached else { return } playerNode.stop() - audioEngine.stop() - audioEngine.detach(playerNode) - isCapturing = false - // Flush any remaining accumulated audio - sendQueue.async { - if !self.accumulatedData.isEmpty { - let chunk = self.accumulatedData - self.accumulatedData = Data() - self.onAudioCaptured?(chunk) + if isCapturing, audioEngine.isRunning { + playerNode.play() + } + } + + func stopCapture() async { + // AVAudioEngine graph teardown runs on a serial lifecycle queue so callers + // can await the barrier without blocking the MainActor on audio hardware. + let lease = await withCheckedContinuation { (continuation: CheckedContinuation) in + audioLifecycleQueue.async { [weak self] in + guard let self else { + continuation.resume(returning: nil) + return + } + + let lease = self.audioRouteLease + self.audioRouteLease = nil + self.tearDownEngineGraphOnAudioLifecycleQueue(flushPendingAudio: true) { + self.removeObservers() + continuation.resume(returning: lease) + } } } - removeObservers() + + if let lease { + await WorkerAudioRouteCoordinator.shared.release(lease: lease) + } } // MARK: - Audio Interruption & Route Change Handling @@ -257,10 +637,7 @@ class AudioManager { ) { [weak self] _ in guard let self else { return } NSLog("[Audio] App will enter foreground") - if self.isCapturing && !self.audioEngine.isRunning { - NSLog("[Audio] Audio engine stopped while backgrounded, attempting reset") - self.attemptAudioReset() - } + self.checkAndResetStoppedEngine() } } @@ -268,9 +645,12 @@ class AudioManager { switch type { case .began: NSLog("[Audio] Audio interruption began (e.g. phone call)") - wasCapturingBeforeInterruption = isCapturing - if isCapturing { - audioEngine.pause() + audioLifecycleQueue.async { [weak self] in + guard let self else { return } + self.wasCapturingBeforeInterruption = self.isCapturing + if self.isCapturing { + self.audioEngine.pause() + } } case .ended: NSLog("[Audio] Audio interruption ended (shouldResume=%@)", shouldResume ? "true" : "false") @@ -286,11 +666,10 @@ class AudioManager { switch reason { case .newDeviceAvailable: NSLog("[Audio] New audio device available") + attemptAudioReset() case .oldDeviceUnavailable: NSLog("[Audio] Audio device removed") - if isCapturing { - attemptAudioReset() - } + attemptAudioReset() case .categoryChange, .override, .wakeFromSleep, .routeConfigurationChange: NSLog("[Audio] Audio route change: %d", reason.rawValue) default: @@ -298,38 +677,130 @@ class AudioManager { } } + private func preferredBluetoothHFPInput(_ session: AVAudioSession) -> AVAudioSessionPortDescription? { + session.availableInputs?.first { + $0.portType == .bluetoothHFP || $0.portType == .bluetoothLE + } + } + + private func hasBluetoothHandsFreeRoute(_ route: AVAudioSessionRouteDescription) -> Bool { + route.inputs.contains { + $0.portType == .bluetoothHFP || + $0.portType == .bluetoothLE + } || + route.outputs.contains { + $0.portType == .bluetoothHFP || + $0.portType == .bluetoothLE + } + } + private func resumeAudioAfterInterruption() { NSLog("[Audio] Resuming audio after interruption") - let audioSession = AVAudioSession.sharedInstance() - do { - try audioSession.setActive(true) - try audioEngine.start() - NSLog("[Audio] Audio resumed successfully") - } catch { - NSLog("[Audio] Failed to resume audio: %@", error.localizedDescription) - attemptAudioReset() + audioLifecycleQueue.async { [weak self] in + guard let self else { return } + let audioSession = AVAudioSession.sharedInstance() + do { + try audioSession.setActive(true) + if self.isCapturing, !self.audioEngine.isRunning { + try self.audioEngine.start() + } + if self.isCapturing, self.isPlayerNodeAttached, !self.playerNode.isPlaying { + self.playerNode.play() + } + NSLog("[Audio] Audio resumed successfully") + } catch { + NSLog("[Audio] Failed to resume audio: %@", error.localizedDescription) + self.attemptAudioReset() + } } } private func attemptAudioReset() { NSLog("[Audio] Attempting audio reset") - let wasCapturing = isCapturing - - if audioEngine.isRunning { - audioEngine.stop() + audioLifecycleQueue.async { [weak self] in + guard let self else { return } + let wasCapturing = self.isCapturing + let useIPhoneMode = self.useIPhoneMode + + self.tearDownEngineGraphOnAudioLifecycleQueue(flushPendingAudio: false) { [weak self] in + guard let self, wasCapturing else { return } + DispatchQueue.main.async { [weak self] in + guard let self else { return } + do { + try self.setupAudioSession(useIPhoneMode: useIPhoneMode) + try self.startCapture() + NSLog("[Audio] Audio reset successful") + } catch { + NSLog("[Audio] Audio reset failed: %@", error.localizedDescription) + } + } + } } - audioEngine.inputNode.removeTap(onBus: 0) + } + + private func tearDownEngineGraphOnAudioLifecycleQueue(flushPendingAudio: Bool, completion: (() -> Void)? = nil) { + audioGraphGeneration &+= 1 isCapturing = false - if wasCapturing { - do { - try setupAudioSession(useIPhoneMode: useIPhoneMode) - try startCapture() - NSLog("[Audio] Audio reset successful") - } catch { - NSLog("[Audio] Audio reset failed: %@", error.localizedDescription) + let inputNode = audioEngine.inputNode + audioEngine.stop() + + if isInputTapInstalled { + inputNode.removeTap(onBus: 0) + isInputTapInstalled = false + } + + if isPlayerNodeAttached { + if playerNode.isPlaying { + playerNode.stop() } + audioEngine.disconnectNodeOutput(playerNode) + audioEngine.detach(playerNode) + isPlayerNodeAttached = false + } + + sendQueue.async { + defer { completion?() } + self.accumulatorGeneration = 0 + guard !self.accumulatedData.isEmpty else { return } + let chunk = self.accumulatedData + self.accumulatedData = Data() + if flushPendingAudio { + self.onAudioCaptured?(chunk) + } + } + } + + private func waitForAudioGraphClean() async { + await withCheckedContinuation { (continuation: CheckedContinuation) in + audioLifecycleQueue.async { [weak self] in + guard let self else { + continuation.resume() + return + } + + self.tearDownEngineGraphOnAudioLifecycleQueue(flushPendingAudio: false) { + continuation.resume() + } + } + } + } + + private func checkAndResetStoppedEngine() { + audioLifecycleQueue.async { [weak self] in + guard let self else { return } + if self.isCapturing, !self.audioEngine.isRunning { + NSLog("[Audio] Audio engine stopped while backgrounded, attempting reset") + self.attemptAudioReset() + } + } + } + + private func syncOnAudioLifecycleQueue(_ work: () throws -> T) rethrows -> T { + if DispatchQueue.getSpecific(key: audioLifecycleQueueKey) != nil { + return try work() } + return try audioLifecycleQueue.sync(execute: work) } private func removeObservers() { diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift index 5c124f66..db15f596 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift @@ -1,66 +1,64 @@ import Foundation +struct GeminiLiveCredential: Equatable { + let token: String + let queryParameterName: String + let websocketBaseURL: String + let model: String +} + enum GeminiConfig { - static let websocketBaseURL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent" - static let model = "models/gemini-2.5-flash-native-audio-preview-12-2025" + static let ephemeralTokenWebsocketBaseURL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained" + static let model = "models/gemini-live-2.5-flash-native-audio" static let inputAudioSampleRate: Double = 16000 static let outputAudioSampleRate: Double = 24000 static let audioChannels: UInt32 = 1 static let audioBitsPerSample: UInt32 = 16 - static let videoFrameInterval: TimeInterval = 1.0 + static let videoFrameInterval: TimeInterval = 3.0 static let videoJPEGQuality: CGFloat = 0.5 - static var systemInstruction: String { SettingsManager.shared.geminiSystemPrompt } - static let defaultSystemInstruction = """ - You are an AI assistant for someone wearing Meta Ray-Ban smart glasses. You can see through their camera and have a voice conversation. Keep responses concise and natural. - - CRITICAL: You have NO memory, NO storage, and NO ability to take actions on your own. You cannot remember things, keep lists, set reminders, search the web, send messages, or do anything persistent. You are ONLY a voice interface. - - You have exactly ONE tool: execute. This connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. + You are a live frontline worker copilot for SOP execution sessions. - ALWAYS use execute when the user asks you to: - - Send a message to someone (any platform: WhatsApp, Telegram, iMessage, Slack, etc.) - - Search or look up anything (web, local info, facts, news) - - Add, create, or modify anything (shopping lists, reminders, notes, todos, events) - - Research, analyze, or draft anything - - Control or interact with apps, devices, or services - - Remember or store any information for later + Your job is to converse naturally with the worker, guide the current task step by step, and use connected tools when they help move the job forward. - Be detailed in your task description. Include all relevant context: names, content, platforms, quantities, etc. The assistant works better with complete information. - - NEVER pretend to do these things yourself. - - IMPORTANT: Before calling execute, ALWAYS speak a brief acknowledgment first. For example: - - "Sure, let me add that to your shopping list." then call execute. - - "Got it, searching for that now." then call execute. - - "On it, sending that message." then call execute. - Never call execute silently -- the user needs verbal confirmation that you heard them and are working on it. The tool may take several seconds to complete, so the acknowledgment lets them know something is happening. - - For messages, confirm recipient and content before delegating unless clearly urgent. + Rules: + - Ground answers in the live camera feed, the current SOP context, and the worker's request. + - If visual evidence is insufficient, say what you need to see next. + - Keep spoken responses short, clear, and useful for hands-free work. + - Offer direct next actions instead of long explanations. + - Use available tools for task execution, logging, and memory when appropriate. + - Never pretend you verified something you could not actually observe or infer. """ // User-configurable values (Settings screen overrides, falling back to Secrets.swift) - static var apiKey: String { SettingsManager.shared.geminiAPIKey } - static var openClawHost: String { SettingsManager.shared.openClawHost } - static var openClawPort: Int { SettingsManager.shared.openClawPort } - static var openClawHookToken: String { SettingsManager.shared.openClawHookToken } - static var openClawGatewayToken: String { SettingsManager.shared.openClawGatewayToken } - - static func websocketURL() -> URL? { - guard apiKey != "YOUR_GEMINI_API_KEY" && !apiKey.isEmpty else { return nil } - return URL(string: "\(websocketBaseURL)?key=\(apiKey)") + static var deviceID: String { SettingsManager.shared.deviceID } + static var workerLoginCode: String { SettingsManager.shared.workerLoginCode } + static var workerEmail: String { SettingsManager.shared.workerEmail } + static var opsBaseURL: String { SettingsManager.shared.opsBaseURL } + static var adminBaseURL: String { SettingsManager.shared.adminBaseURL } + static var signalBaseURL: String { SettingsManager.shared.signalBaseURL } + static var workerAPIBearerToken: String { SettingsManager.shared.workerAPIBearerToken } + + static func websocketURL(credential: GeminiLiveCredential) -> URL? { + guard var components = URLComponents(string: credential.websocketBaseURL) else { return nil } + var queryItems = components.queryItems ?? [] + queryItems.append( + URLQueryItem(name: credential.queryParameterName, value: credential.token) + ) + components.queryItems = queryItems + return components.url } - static var isConfigured: Bool { - return apiKey != "YOUR_GEMINI_API_KEY" && !apiKey.isEmpty + static var isOpsConfigured: Bool { + let trimmed = opsBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + return !trimmed.isEmpty && !trimmed.contains("YOUR_") } - static var isOpenClawConfigured: Bool { - return openClawGatewayToken != "YOUR_OPENCLAW_GATEWAY_TOKEN" - && !openClawGatewayToken.isEmpty - && openClawHost != "http://YOUR_MAC_HOSTNAME.local" + static var isAdminConfigured: Bool { + let trimmed = adminBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + return !trimmed.isEmpty && !trimmed.contains("YOUR_") } } diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift index 248f2f02..6f7d0f9d 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift @@ -1,4 +1,5 @@ import Foundation +import QuartzCore import UIKit enum GeminiConnectionState: Equatable { @@ -9,6 +10,32 @@ enum GeminiConnectionState: Equatable { case error(String) } +enum GeminiRecoverableDisconnectReason: Equatable { + case socketClosed(String) + case socketError(String) + case receiveError(String) + case sendError(String) + case pingError(String) + case goAway(seconds: Int) + + var message: String { + switch self { + case .socketClosed(let message): + return message + case .socketError(let message): + return message + case .receiveError(let message): + return message + case .sendError(let message): + return message + case .pingError(let message): + return message + case .goAway(let seconds): + return "Server closing (time left: \(seconds)s)" + } + } +} + @MainActor class GeminiLiveService: ObservableObject { @Published var connectionState: GeminiConnectionState = .disconnected @@ -20,8 +47,9 @@ class GeminiLiveService: ObservableObject { var onDisconnected: ((String?) -> Void)? var onInputTranscription: ((String) -> Void)? var onOutputTranscription: ((String) -> Void)? - var onToolCall: ((GeminiToolCall) -> Void)? - var onToolCallCancellation: ((GeminiToolCallCancellation) -> Void)? + var onSocketOpened: (() -> Void)? + var onSocketClosed: ((String?) -> Void)? + var onRecoverableDisconnect: ((GeminiRecoverableDisconnectReason) -> Void)? // Latency tracking private var lastUserSpeechEnd: Date? @@ -29,10 +57,23 @@ class GeminiLiveService: ObservableObject { private var webSocketTask: URLSessionWebSocketTask? private var receiveTask: Task? + private var pingTask: Task? private var connectContinuation: CheckedContinuation? + private var closeWaitContinuation: CheckedContinuation? private let delegate = WebSocketDelegate() private var urlSession: URLSession! private let sendQueue = DispatchQueue(label: "gemini.send", qos: .userInitiated) + private var latestVideoFrameBase64: String? + private var setupSystemInstruction: String = GeminiConfig.defaultSystemInstruction + private var setupModel: String = GeminiConfig.model + private var videoFrameSendCount: Int64 = 0 + private var videoFrameStatsWindowStart = CACurrentMediaTime() + private var connectionGeneration = 0 + private var isClosingIntentionally = false + private var didNotifyRecoverableDisconnect = false + private let keepaliveIntervalNanoseconds: UInt64 = 15_000_000_000 + + var lastVideoFrameBase64: String? { latestVideoFrameBase64 } init() { let config = URLSessionConfiguration.default @@ -40,13 +81,23 @@ class GeminiLiveService: ObservableObject { self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil) } - func connect() async -> Bool { - guard let url = GeminiConfig.websocketURL() else { - connectionState = .error("No API key configured") + func connect( + systemInstruction: String? = nil, + credential: GeminiLiveCredential + ) async -> Bool { + guard let url = GeminiConfig.websocketURL(credential: credential) else { + connectionState = .error("Gemini Live credential is invalid") return false } + setupSystemInstruction = resolvedSystemInstruction(systemInstruction) + setupModel = credential.model connectionState = .connecting + connectionGeneration += 1 + let generation = connectionGeneration + isClosingIntentionally = false + didNotifyRecoverableDisconnect = false + stopKeepalive() let result = await withCheckedContinuation { (continuation: CheckedContinuation) in self.connectContinuation = continuation @@ -54,7 +105,18 @@ class GeminiLiveService: ObservableObject { self.delegate.onOpen = { [weak self] protocol_ in guard let self else { return } Task { @MainActor in + guard self.connectionGeneration == generation else { return } + Task { + await WorkerTelemetry.shared.record( + "gemini_socket_open", + source: "gemini_live", + stage: "connected", + payload: ["protocol": protocol_ ?? NSNull()] + ) + } + self.onSocketOpened?() self.connectionState = .settingUp + self.startKeepalive(generation: generation) self.sendSetupMessage() self.startReceiving() } @@ -64,10 +126,28 @@ class GeminiLiveService: ObservableObject { guard let self else { return } let reasonStr = reason.flatMap { String(data: $0, encoding: .utf8) } ?? "no reason" Task { @MainActor in - self.resolveConnect(success: false) - self.connectionState = .disconnected - self.isModelSpeaking = false - self.onDisconnected?("Connection closed (code \(code.rawValue): \(reasonStr))") + guard self.connectionGeneration == generation else { return } + Task { + await WorkerTelemetry.shared.record( + "gemini_socket_closed", + source: "gemini_live", + stage: "closed", + payload: [ + "code": code.rawValue, + "reason": reasonStr + ] + ) + } + let message = "Connection closed (code \(code.rawValue): \(reasonStr))" + if self.isClosingIntentionally || code == .normalClosure { + self.stopKeepalive() + self.resolveConnect(success: false) + self.connectionState = .disconnected + self.isModelSpeaking = false + self.resolveCloseWait() + return + } + self.notifyRecoverableDisconnect(.socketClosed(message), state: .disconnected) } } @@ -75,10 +155,24 @@ class GeminiLiveService: ObservableObject { guard let self else { return } let msg = error?.localizedDescription ?? "Unknown error" Task { @MainActor in - self.resolveConnect(success: false) - self.connectionState = .error(msg) - self.isModelSpeaking = false - self.onDisconnected?(msg) + guard self.connectionGeneration == generation else { return } + Task { + await WorkerTelemetry.shared.record( + "gemini_socket_error", + source: "gemini_live", + stage: "failed", + payload: ["error": msg] + ) + } + guard !self.isClosingIntentionally else { + self.stopKeepalive() + self.resolveConnect(success: false) + self.connectionState = .disconnected + self.isModelSpeaking = false + self.resolveCloseWait() + return + } + self.notifyRecoverableDisconnect(.socketError(msg), state: .error(msg)) } } @@ -89,6 +183,7 @@ class GeminiLiveService: ObservableObject { Task { try? await Task.sleep(nanoseconds: 15_000_000_000) await MainActor.run { + guard self.connectionGeneration == generation else { return } self.resolveConnect(success: false) if self.connectionState == .connecting || self.connectionState == .settingUp { self.connectionState = .error("Connection timed out") @@ -101,6 +196,9 @@ class GeminiLiveService: ObservableObject { } func disconnect() { + isClosingIntentionally = true + connectionGeneration += 1 + stopKeepalive() receiveTask?.cancel() receiveTask = nil webSocketTask?.cancel(with: .normalClosure, reason: nil) @@ -108,8 +206,49 @@ class GeminiLiveService: ObservableObject { delegate.onOpen = nil delegate.onClose = nil delegate.onError = nil - onToolCall = nil - onToolCallCancellation = nil + onSocketOpened = nil + onSocketClosed = nil + onRecoverableDisconnect = nil + connectionState = .disconnected + isModelSpeaking = false + resolveConnect(success: false) + resolveCloseWait() + } + + func disconnectAndWaitForClose(timeout: TimeInterval = 1.0) async { + isClosingIntentionally = true + stopKeepalive() + receiveTask?.cancel() + receiveTask = nil + + guard let task = webSocketTask, connectionState != .disconnected else { + disconnect() + return + } + + resolveCloseWait() + await withCheckedContinuation { (continuation: CheckedContinuation) in + closeWaitContinuation = continuation + let boundedTimeout = DispatchTimeInterval.milliseconds(Int(max(timeout, 0.05) * 1_000)) + DispatchQueue.main.asyncAfter(deadline: .now() + boundedTimeout) { [weak self] in + Task { @MainActor in + guard let self else { return } + if self.closeWaitContinuation != nil { + NSLog("[Gemini] WebSocket close wait timed out") + } + self.resolveCloseWait() + } + } + task.cancel(with: .normalClosure, reason: nil) + } + + webSocketTask = nil + delegate.onOpen = nil + delegate.onClose = nil + delegate.onError = nil + onSocketOpened = nil + onSocketClosed = nil + onRecoverableDisconnect = nil connectionState = .disconnected isModelSpeaking = false resolveConnect(success: false) @@ -127,14 +266,20 @@ class GeminiLiveService: ObservableObject { ] ] ] - self?.sendJSON(json) + Task { @MainActor [weak self] in + self?.sendJSON(json) + } } } func sendVideoFrame(image: UIImage) { guard connectionState == .ready else { return } + let frameStartedAt = CACurrentMediaTime() sendQueue.async { [weak self] in + guard let self else { return } + let encodeStartedAt = CACurrentMediaTime() guard let jpegData = image.jpegData(compressionQuality: GeminiConfig.videoJPEGQuality) else { return } + let encodeDurationMs = (CACurrentMediaTime() - encodeStartedAt) * 1000 let base64 = jpegData.base64EncodedString() let json: [String: Any] = [ "realtimeInput": [ @@ -144,13 +289,17 @@ class GeminiLiveService: ObservableObject { ] ] ] - self?.sendJSON(json) - } - } - - func sendToolResponse(_ response: [String: Any]) { - sendQueue.async { [weak self] in - self?.sendJSON(response) + Task { @MainActor [weak self] in + guard let self else { return } + self.videoFrameSendCount += 1 + self.logVideoSendStatsIfNeeded( + payloadBytes: jpegData.count, + encodeDurationMs: encodeDurationMs, + totalDurationMs: (CACurrentMediaTime() - frameStartedAt) * 1000 + ) + self.latestVideoFrameBase64 = base64 + self.sendJSON(json) + } } } @@ -164,8 +313,47 @@ class GeminiLiveService: ObservableObject { ] ] ] - self?.sendJSON(msg) + Task { @MainActor [weak self] in + self?.sendJSON(msg) + } + } + } + + private func logVideoSendStatsIfNeeded( + payloadBytes: Int, + encodeDurationMs: Double, + totalDurationMs: Double + ) { + guard videoFrameSendCount == 1 || videoFrameSendCount % 10 == 0 else { return } + let now = CACurrentMediaTime() + let elapsed = max(now - videoFrameStatsWindowStart, 0.001) + let fps = Double(videoFrameSendCount) / elapsed + NSLog( + "[Gemini] Vision lane frames=%lld rate=%.2ffps encode=%.1fms total=%.1fms payload=%dB", + videoFrameSendCount, + fps, + encodeDurationMs, + totalDurationMs, + payloadBytes + ) + Task { + await WorkerTelemetry.shared.record( + "gemini_video_frame_sent", + source: "gemini_live", + stage: "video", + durationMs: totalDurationMs, + metricValue: Double(payloadBytes), + metricUnit: "bytes", + payload: [ + "frames": Int(videoFrameSendCount), + "fps": fps, + "encode_ms": encodeDurationMs, + "payload_bytes": payloadBytes + ] + ) } + videoFrameStatsWindowStart = now + videoFrameSendCount = 0 } // MARK: - Private @@ -177,10 +365,77 @@ class GeminiLiveService: ObservableObject { } } + private func resolveCloseWait() { + guard let cont = closeWaitContinuation else { return } + closeWaitContinuation = nil + cont.resume() + } + + private func startKeepalive(generation: Int) { + stopKeepalive() + let interval = keepaliveIntervalNanoseconds + pingTask = Task { [weak self] in + while !Task.isCancelled { + try? await Task.sleep(nanoseconds: interval) + guard !Task.isCancelled else { break } + await MainActor.run { + self?.sendKeepalivePingIfCurrent(generation: generation) + } + } + } + } + + private func stopKeepalive() { + pingTask?.cancel() + pingTask = nil + } + + private func sendKeepalivePingIfCurrent(generation: Int) { + guard connectionGeneration == generation, + !isClosingIntentionally, + let task = webSocketTask else { + return + } + + task.sendPing { [weak self] error in + guard let error else { return } + Task { @MainActor in + guard let self, self.connectionGeneration == generation else { return } + self.notifyRecoverableDisconnect( + .pingError(error.localizedDescription), + state: .disconnected + ) + } + } + } + + private func notifyRecoverableDisconnect( + _ reason: GeminiRecoverableDisconnectReason, + state: GeminiConnectionState + ) { + guard !isClosingIntentionally, !didNotifyRecoverableDisconnect else { return } + didNotifyRecoverableDisconnect = true + stopKeepalive() + resolveConnect(success: false) + connectionState = state + isModelSpeaking = false + resolveCloseWait() + onRecoverableDisconnect?(reason) + } + + private func resolvedSystemInstruction(_ override: String?) -> String { + let candidate = override?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !candidate.isEmpty { + return candidate + } + + return GeminiConfig.defaultSystemInstruction + } + private func sendSetupMessage() { let setup: [String: Any] = [ "setup": [ - "model": GeminiConfig.model, + "model": setupModel, "generationConfig": [ "responseModalities": ["AUDIO"], "thinkingConfig": [ @@ -189,12 +444,7 @@ class GeminiLiveService: ObservableObject { ], "systemInstruction": [ "parts": [ - ["text": GeminiConfig.systemInstruction] - ] - ], - "tools": [ - [ - "functionDeclarations": ToolDeclarations.allDeclarations() + ["text": setupSystemInstruction] ] ], "realtimeInputConfig": [ @@ -225,7 +475,16 @@ class GeminiLiveService: ObservableObject { let string = String(data: data, encoding: .utf8) else { return } - webSocketTask?.send(.string(string)) { _ in } + webSocketTask?.send(.string(string)) { [weak self] error in + guard let self, let error else { return } + Task { @MainActor in + NSLog("[Gemini] WebSocket send failed: %@", error.localizedDescription) + self.notifyRecoverableDisconnect( + .sendError(error.localizedDescription), + state: .error("WebSocket send failed: \(error.localizedDescription)") + ) + } + } } private func startReceiving() { @@ -249,10 +508,10 @@ class GeminiLiveService: ObservableObject { if !Task.isCancelled { let reason = error.localizedDescription await MainActor.run { - self.resolveConnect(success: false) - self.connectionState = .disconnected - self.isModelSpeaking = false - self.onDisconnected?(reason) + self.notifyRecoverableDisconnect( + .receiveError(reason), + state: .disconnected + ) } } break @@ -267,9 +526,32 @@ class GeminiLiveService: ObservableObject { return } + // Server-provided error payload + if let errorObj = json["error"] as? [String: Any] { + let status = errorObj["status"] as? String ?? "UNKNOWN" + let message = errorObj["message"] as? String ?? "Unknown Gemini server error" + let full = "Gemini setup error [\(status)]: \(message)" + NSLog("[Gemini] %@", full) + connectionState = .error(full) + isModelSpeaking = false + resolveConnect(success: false) + resolveCloseWait() + onSocketClosed?(full) + onDisconnected?(full) + return + } + // Setup complete if json["setupComplete"] != nil { connectionState = .ready + Task { + await WorkerTelemetry.shared.record( + "gemini_setup_complete", + source: "gemini_live", + stage: "ready", + payload: ["model": setupModel] + ) + } resolveConnect(success: true) return } @@ -278,23 +560,7 @@ class GeminiLiveService: ObservableObject { if let goAway = json["goAway"] as? [String: Any] { let timeLeft = goAway["timeLeft"] as? [String: Any] let seconds = timeLeft?["seconds"] as? Int ?? 0 - connectionState = .disconnected - isModelSpeaking = false - onDisconnected?("Server closing (time left: \(seconds)s)") - return - } - - // Tool call from model (top-level message, not inside serverContent) - if let toolCall = GeminiToolCall(json: json) { - NSLog("[Gemini] Tool call received: %d function(s)", toolCall.functionCalls.count) - onToolCall?(toolCall) - return - } - - // Tool call cancellation (user interrupted during tool execution) - if let cancellation = GeminiToolCallCancellation(json: json) { - NSLog("[Gemini] Tool call cancellation: %@", cancellation.ids.joined(separator: ", ")) - onToolCallCancellation?(cancellation) + notifyRecoverableDisconnect(.goAway(seconds: seconds), state: .disconnected) return } @@ -320,6 +586,16 @@ class GeminiLiveService: ObservableObject { if let speechEnd = lastUserSpeechEnd, !responseLatencyLogged { let latency = Date().timeIntervalSince(speechEnd) NSLog("[Latency] %.0fms (user speech end -> first audio)", latency * 1000) + Task { + await WorkerTelemetry.shared.record( + "gemini_first_audio_latency", + source: "gemini_live", + stage: "first_audio", + durationMs: latency * 1000, + metricValue: latency * 1000, + metricUnit: "ms" + ) + } responseLatencyLogged = true } } diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveSpotter.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveSpotter.swift new file mode 100644 index 00000000..804b1c94 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveSpotter.swift @@ -0,0 +1,118 @@ +import Foundation +import UIKit + +final class GeminiLiveSpotter { + private weak var api: WorkerAdminAPI? + + struct SpotterRequestItem: Hashable { + let id: String + let name: String + let aiPrompt: String + let expectedObjects: [String] + let preconditions: [String] + let postconditions: [String] + let skipRisk: String + let evidenceRequired: Bool + let validation: String + let critical: Bool + } + + struct SpotterMatch: Equatable { + let id: String + let matched: Bool + let confidence: Double + let reason: String + let evidenceTimestamp: String + let threshold: Double + let autoComplete: Bool + let advancedToStepIndex: Int? + let completedSop: Bool + let evidenceWindowSatisfied: Bool? + let activeDurationSatisfied: Bool? + let stableObservations: Int? + let stableObservationsRequired: Int? + } + + func configure(api: WorkerAdminAPI?) { + self.api = api + } + + func detectVisibleItemMatches( + image: UIImage, + items: [SpotterRequestItem], + sessionID: String?, + elapsedActiveMs: Int? = nil + ) async throws -> [SpotterMatch] { + guard !items.isEmpty else { return [] } + guard let api, let sessionID, !sessionID.isEmpty else { return [] } + guard let imagePayload = Self.encodedSpotterImage(image) else { return [] } + + let capturedAt = ISO8601DateFormatter().string(from: Date()) + + var matches: [SpotterMatch] = [] + for item in items { + let response = try await api.requestGeminiSpotter( + GeminiSpotterRequest( + sessionID: sessionID, + stepID: item.id, + stepTitle: item.name, + aiPrompt: item.aiPrompt, + expectedObjects: item.expectedObjects, + preconditions: item.preconditions, + postconditions: item.postconditions, + skipRisk: item.skipRisk, + evidenceRequired: item.evidenceRequired, + imageBase64: imagePayload.base64, + imageMimeType: imagePayload.mimeType, + capturedAt: capturedAt, + critical: item.critical, + allowAIComplete: item.validation.lowercased() == "visual", + elapsedActiveMs: elapsedActiveMs + ) + ) + + matches.append( + SpotterMatch( + id: item.id, + matched: response.matched, + confidence: response.confidence, + reason: response.reason, + evidenceTimestamp: response.evidenceTimestamp, + threshold: response.threshold ?? (item.critical ? 0.94 : ((item.evidenceRequired || item.skipRisk == "high") ? 0.9 : 0.88)), + autoComplete: response.autoComplete, + advancedToStepIndex: response.advancedToStepIndex, + completedSop: response.completedSop ?? false, + evidenceWindowSatisfied: response.evidenceWindowSatisfied, + activeDurationSatisfied: response.activeDurationSatisfied, + stableObservations: response.stableObservations, + stableObservationsRequired: response.stableObservationsRequired + ) + ) + } + return matches + } + + private static func encodedSpotterImage(_ image: UIImage) -> (base64: String, mimeType: String)? { + let resized = image.resizedForSpotter(maxDimension: 768) + guard let jpegData = resized.jpegData(compressionQuality: 0.45) else { return nil } + return (jpegData.base64EncodedString(), "image/jpeg") + } +} + +private extension UIImage { + func resizedForSpotter(maxDimension: CGFloat) -> UIImage { + let width = size.width + let height = size.height + guard width > 0, height > 0 else { return self } + + let longest = max(width, height) + guard longest > maxDimension else { return self } + + let scale = maxDimension / longest + let targetSize = CGSize(width: width * scale, height: height * scale) + let renderer = UIGraphicsImageRenderer(size: targetSize) + return renderer.image { _ in + self.draw(in: CGRect(origin: .zero, size: targetSize)) + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index e7d9d902..9b4caca0 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -1,60 +1,213 @@ import Foundation import SwiftUI +import UIKit @MainActor class GeminiSessionViewModel: ObservableObject { @Published var isGeminiActive: Bool = false + @Published var isAudioReady: Bool = false @Published var connectionState: GeminiConnectionState = .disconnected @Published var isModelSpeaking: Bool = false @Published var errorMessage: String? @Published var userTranscript: String = "" @Published var aiTranscript: String = "" - @Published var toolCallStatus: ToolCallStatus = .idle - @Published var openClawConnectionState: OpenClawConnectionState = .notConfigured + + private struct LiveSessionConfig { + let credential: GeminiLiveCredential + let systemInstruction: String + let diagnosticsID: String? + let provider: String? + } + + private enum GeminiSessionIntent { + case idle + case active + case humanSupport + } + private let geminiService = GeminiLiveService() - private let openClawBridge = OpenClawBridge() - private var toolCallRouter: ToolCallRouter? private let audioManager = AudioManager() - private let eventClient = OpenClawEventClient() private var lastVideoFrameTime: Date = .distantPast private var stateObservation: Task? + private weak var workerAdminAPI: WorkerAdminAPI? + private var adminExecutionSessionID: String? + private var currentLiveCredential: GeminiLiveCredential? + private var currentSessionInstruction: String? + private var lastDiagnosticsID: String? + private var isStoppingSession = false + private var sessionIntent: GeminiSessionIntent = .idle + private var sessionGeneration = 0 + private var reconnectTask: Task? + private var autoReconnectAttempts = 0 + private let maxAutoReconnectAttempts = 3 var streamingMode: StreamingMode = .glasses + var onInputCommand: ((String) -> Void)? + var onInputAudioChunk: ((Data) -> Void)? + var onOutputAudioChunk: ((Data) -> Void)? + + func configureWorkerAdminAPI(_ api: WorkerAdminAPI?, sessionID: String? = nil) { + workerAdminAPI = api + adminExecutionSessionID = sessionID + } + + func startSession(systemInstruction: String? = nil) async { + guard !isGeminiActive else { + await refreshSessionInstruction(systemInstruction) + return + } + + sessionGeneration += 1 + sessionIntent = .active + autoReconnectAttempts = 0 + reconnectTask?.cancel() + reconnectTask = nil + errorMessage = nil + userTranscript = "" + aiTranscript = "" + isStoppingSession = false + + guard let liveConfig = await resolveLiveSessionConfig(fallbackInstruction: systemInstruction) else { + sessionIntent = .idle + errorMessage = "Gemini Live token unavailable. Check Admin AI Settings and the worker backend connection." + return + } + currentLiveCredential = liveConfig.credential + currentSessionInstruction = liveConfig.systemInstruction + lastDiagnosticsID = liveConfig.diagnosticsID + + isAudioReady = false + configureRealtimeCallbacks() + startStateObservation() + + do { + try audioManager.setupAudioSession(useIPhoneMode: streamingMode == .iPhone) + } catch { + sessionIntent = .idle + await resetToIdle(message: "Audio setup failed: \(error.localizedDescription)") + return + } - func startSession() async { - guard !isGeminiActive else { return } + let setupOk = await geminiService.connect( + systemInstruction: liveConfig.systemInstruction, + credential: liveConfig.credential + ) + + if !setupOk { + let message = liveConnectionError( + fallback: "Failed to connect to Gemini", + diagnosticsID: liveConfig.diagnosticsID + ) + sessionIntent = .idle + await resetToIdle(message: message) + await recordTelemetry( + "gemini_live_connect_failed", + stage: "failed", + payload: [ + "diagnostics_id": liveConfig.diagnosticsID ?? NSNull(), + "error": message + ] + ) + return + } - guard GeminiConfig.isConfigured else { - errorMessage = "Gemini API key not configured. Open GeminiConfig.swift and replace YOUR_GEMINI_API_KEY with your key from https://aistudio.google.com/apikey" + do { + try audioManager.startCapture() + } catch { + sessionIntent = .idle + await resetToIdle(message: "Mic capture failed: \(error.localizedDescription)") return } isGeminiActive = true + isAudioReady = true + await recordTelemetry( + "gemini_live_session_started", + stage: "ready", + payload: [ + "model": liveConfig.credential.model, + "provider": liveConfig.provider ?? "gemini", + "diagnostics_id": liveConfig.diagnosticsID ?? NSNull() + ] + ) + } + + func stopSession() async { + sessionIntent = .idle + sessionGeneration += 1 + reconnectTask?.cancel() + reconnectTask = nil + await resetToIdle(message: nil) + } + + func stopSessionForHumanSupportHandoff() async { + sessionIntent = .humanSupport + sessionGeneration += 1 + reconnectTask?.cancel() + reconnectTask = nil + await resetToIdle(message: nil) + } + + func refreshSessionInstruction(_ systemInstruction: String?) async { + guard isGeminiActive else { return } + guard let liveConfig = await resolveLiveSessionConfig(fallbackInstruction: systemInstruction) else { + sessionIntent = .idle + sessionGeneration += 1 + reconnectTask?.cancel() + reconnectTask = nil + await resetToIdle(message: "Gemini Live token refresh failed. Check Admin AI Settings and try again.") + return + } + + guard liveConfig.systemInstruction != currentSessionInstruction || + liveConfig.credential.model != currentLiveCredential?.model else { + return + } + + await reconnectTransport(with: liveConfig) + } + + func sendVideoFrameIfThrottled(image: UIImage) { + guard SettingsManager.shared.videoStreamingEnabled else { return } + guard isGeminiActive, connectionState == .ready else { return } + let now = Date() + guard now.timeIntervalSince(lastVideoFrameTime) >= GeminiConfig.videoFrameInterval else { return } + lastVideoFrameTime = now + geminiService.sendVideoFrame(image: image) + } - // Wire audio callbacks + private func configureRealtimeCallbacks() { audioManager.onAudioCaptured = { [weak self] data in guard let self else { return } Task { @MainActor in - // Mute mic while model speaks when speaker is on the phone - // (loudspeaker + co-located mic overwhelms iOS echo cancellation) + guard self.isGeminiActive, !self.isStoppingSession else { return } let speakerOnPhone = self.streamingMode == .iPhone || SettingsManager.shared.speakerOutputEnabled if speakerOnPhone && self.geminiService.isModelSpeaking { return } + self.onInputAudioChunk?(data) self.geminiService.sendAudio(data: data) } } geminiService.onAudioReceived = { [weak self] data in - self?.audioManager.playAudio(data: data) + guard let self else { return } + Task { @MainActor in + guard self.isGeminiActive, !self.isStoppingSession else { return } + self.onOutputAudioChunk?(data) + self.audioManager.playAudio(data: data) + } } geminiService.onInterrupted = { [weak self] in - self?.audioManager.stopPlayback() + guard let self else { return } + Task { @MainActor in + guard self.isGeminiActive, !self.isStoppingSession else { return } + self.audioManager.stopPlayback() + } } geminiService.onTurnComplete = { [weak self] in guard let self else { return } Task { @MainActor in - // Clear user transcript when AI finishes responding self.userTranscript = "" } } @@ -64,6 +217,7 @@ class GeminiSessionViewModel: ObservableObject { Task { @MainActor in self.userTranscript += text self.aiTranscript = "" + self.onInputCommand?(self.userTranscript) } } @@ -74,131 +228,361 @@ class GeminiSessionViewModel: ObservableObject { } } - // Handle unexpected disconnection - geminiService.onDisconnected = { [weak self] reason in + geminiService.onRecoverableDisconnect = { [weak self] reason in guard let self else { return } Task { @MainActor in - guard self.isGeminiActive else { return } - self.stopSession() - self.errorMessage = "Connection lost: \(reason ?? "Unknown error")" + await self.handleRecoverableDisconnect(reason) } } - // Check OpenClaw connectivity and start fresh session - await openClawBridge.checkConnection() - openClawBridge.resetSession() - - // Wire tool call handling - toolCallRouter = ToolCallRouter(bridge: openClawBridge) - - geminiService.onToolCall = { [weak self] toolCall in + geminiService.onDisconnected = { [weak self] reason in guard let self else { return } Task { @MainActor in - for call in toolCall.functionCalls { - self.toolCallRouter?.handleToolCall(call) { [weak self] response in - self?.geminiService.sendToolResponse(response) - } - } + await self.handleFatalDisconnect("Gemini connection lost: \(reason ?? "Unknown error")") } } - geminiService.onToolCallCancellation = { [weak self] cancellation in + geminiService.onSocketClosed = { [weak self] reason in guard let self else { return } Task { @MainActor in - self.toolCallRouter?.cancelToolCalls(ids: cancellation.ids) + await self.handleFatalDisconnect("Gemini socket closed: \(reason ?? "Unknown error")") } } + } - // Observe service state + private func startStateObservation() { + stateObservation?.cancel() stateObservation = Task { [weak self] in guard let self else { return } while !Task.isCancelled { - try? await Task.sleep(nanoseconds: 100_000_000) // 100ms + try? await Task.sleep(nanoseconds: 100_000_000) guard !Task.isCancelled else { break } self.connectionState = self.geminiService.connectionState self.isModelSpeaking = self.geminiService.isModelSpeaking - self.toolCallStatus = self.openClawBridge.lastToolCallStatus - self.openClawConnectionState = self.openClawBridge.connectionState } } + } + + private func handleFatalDisconnect(_ message: String) async { + guard isGeminiActive, !isStoppingSession else { return } + sessionIntent = .idle + sessionGeneration += 1 + reconnectTask?.cancel() + reconnectTask = nil + await resetToIdle(message: message) + } + + private func handleRecoverableDisconnect(_ reason: GeminiRecoverableDisconnectReason) async { + guard isGeminiActive, !isStoppingSession, sessionIntent == .active else { return } + isAudioReady = false + isModelSpeaking = false + + let generation = sessionGeneration + guard reconnectTask == nil else { return } + reconnectTask = Task { @MainActor [weak self] in + await self?.runAutoReconnect(reason: reason, generation: generation) + } + } + + private func shouldContinueAutoReconnect(generation: Int) -> Bool { + sessionIntent == .active && sessionGeneration == generation + } + + private func shouldUseReconnectGeneration(_ generation: Int?) -> Bool { + guard let generation else { return true } + return shouldContinueAutoReconnect(generation: generation) + } + + private func runAutoReconnect( + reason: GeminiRecoverableDisconnectReason, + generation: Int + ) async { + defer { + if sessionGeneration == generation { + reconnectTask = nil + } + } + + while autoReconnectAttempts < maxAutoReconnectAttempts { + guard shouldContinueAutoReconnect(generation: generation), !Task.isCancelled else { return } + autoReconnectAttempts += 1 + let attempt = autoReconnectAttempts + let delayNanoseconds = UInt64(Double(attempt) * 750_000_000) + + try? await Task.sleep(nanoseconds: delayNanoseconds) + guard shouldContinueAutoReconnect(generation: generation), !Task.isCancelled else { return } + + await recordTelemetry( + "gemini_live_auto_reconnect_attempt", + stage: "retrying", + payload: [ + "attempt": attempt, + "max_attempts": maxAutoReconnectAttempts, + "reason": reason.message + ] + ) + + let fallbackInstruction = currentSessionInstruction + guard let liveConfig = await resolveLiveSessionConfig(fallbackInstruction: fallbackInstruction) else { + continue + } + guard shouldContinueAutoReconnect(generation: generation), !Task.isCancelled else { return } + + let didReconnect = await reconnectTransport( + with: liveConfig, + preserveTranscripts: true, + resetOnFailure: false, + requiredGeneration: generation + ) + guard shouldContinueAutoReconnect(generation: generation), !Task.isCancelled else { return } + + if didReconnect { + autoReconnectAttempts = 0 + await recordTelemetry( + "gemini_live_auto_reconnect_succeeded", + stage: "ready", + payload: [ + "attempt": attempt, + "reason": reason.message, + "diagnostics_id": liveConfig.diagnosticsID ?? NSNull() + ] + ) + return + } + } + + guard shouldContinueAutoReconnect(generation: generation) else { return } + reconnectTask = nil + sessionIntent = .idle + sessionGeneration += 1 + await resetToIdle(message: "Gemini connection lost: \(reason.message)") + } + + @discardableResult + private func reconnectTransport( + with liveConfig: LiveSessionConfig, + preserveTranscripts: Bool = false, + resetOnFailure: Bool = true, + requiredGeneration: Int? = nil + ) async -> Bool { + let wasActive = isGeminiActive + isGeminiActive = false + isAudioReady = false + isModelSpeaking = false + isStoppingSession = true + clearGeminiCallbacks() + audioManager.stopPlayback() + await audioManager.stopCapture() + await Task.yield() + await geminiService.disconnectAndWaitForClose(timeout: 1.0) + stateObservation?.cancel() + stateObservation = nil + isStoppingSession = false + + guard wasActive || shouldUseReconnectGeneration(requiredGeneration) else { return false } + guard shouldUseReconnectGeneration(requiredGeneration) else { return false } + + errorMessage = nil + if !preserveTranscripts { + userTranscript = "" + aiTranscript = "" + } + currentLiveCredential = liveConfig.credential + currentSessionInstruction = liveConfig.systemInstruction + lastDiagnosticsID = liveConfig.diagnosticsID + + configureRealtimeCallbacks() + startStateObservation() - // Setup audio do { try audioManager.setupAudioSession(useIPhoneMode: streamingMode == .iPhone) } catch { - errorMessage = "Audio setup failed: \(error.localizedDescription)" - isGeminiActive = false - return + if resetOnFailure { + await resetToIdle(message: "Audio setup failed: \(error.localizedDescription)") + } else { + errorMessage = "Audio setup failed: \(error.localizedDescription)" + } + return false } + guard shouldUseReconnectGeneration(requiredGeneration) else { return false } - // Connect to Gemini and wait for setupComplete - let setupOk = await geminiService.connect() - - if !setupOk { - let msg: String - if case .error(let err) = geminiService.connectionState { - msg = err + let setupOk = await geminiService.connect( + systemInstruction: liveConfig.systemInstruction, + credential: liveConfig.credential + ) + guard setupOk else { + let message = liveConnectionError( + fallback: "Failed to reconnect to Gemini", + diagnosticsID: liveConfig.diagnosticsID + ) + if resetOnFailure { + await resetToIdle(message: message) } else { - msg = "Failed to connect to Gemini" - } - errorMessage = msg - geminiService.disconnect() - stateObservation?.cancel() - stateObservation = nil - isGeminiActive = false - connectionState = .disconnected - return + errorMessage = message + await geminiService.disconnectAndWaitForClose(timeout: 1.0) + } + return false + } + guard shouldUseReconnectGeneration(requiredGeneration) else { + await geminiService.disconnectAndWaitForClose(timeout: 1.0) + return false } - // Start mic capture do { try audioManager.startCapture() } catch { - errorMessage = "Mic capture failed: \(error.localizedDescription)" - geminiService.disconnect() - stateObservation?.cancel() - stateObservation = nil - isGeminiActive = false - connectionState = .disconnected - return + if resetOnFailure { + await resetToIdle(message: "Mic capture failed: \(error.localizedDescription)") + } else { + errorMessage = "Mic capture failed: \(error.localizedDescription)" + } + return false + } + guard shouldUseReconnectGeneration(requiredGeneration) else { + return false } - // Connect to OpenClaw event stream for proactive notifications - if SettingsManager.shared.proactiveNotificationsEnabled { - eventClient.onNotification = { [weak self] text in - guard let self else { return } - Task { @MainActor in - guard self.isGeminiActive, self.connectionState == .ready else { return } - self.geminiService.sendTextMessage(text) - } - } - eventClient.connect() + isGeminiActive = true + isAudioReady = true + return true + } + + private func resolveLiveSessionConfig(fallbackInstruction: String?) async -> LiveSessionConfig? { + guard let workerAdminAPI, GeminiConfig.isAdminConfigured else { + await recordTelemetry( + "gemini_live_token_failed", + stage: "not_configured", + payload: ["reason": "admin_api_unavailable"] + ) + return nil + } + + do { + let token = try await workerAdminAPI.requestGeminiLiveToken( + model: nil, + sessionID: adminExecutionSessionID + ) + let instruction = resolvedInstruction( + serverInstruction: token.systemInstruction, + fallbackInstruction: fallbackInstruction + ) + await recordTelemetry( + "gemini_live_token_received", + stage: "ready", + payload: [ + "model": token.credential.model, + "provider": token.provider ?? "gemini", + "expires_at": token.expiresAt, + "diagnostics_id": token.diagnosticsID ?? NSNull() + ] + ) + return LiveSessionConfig( + credential: token.credential, + systemInstruction: instruction, + diagnosticsID: token.diagnosticsID, + provider: token.provider + ) + } catch { + let message = error.localizedDescription + await recordTelemetry( + "gemini_live_token_failed", + stage: "failed", + payload: ["error": message] + ) + errorMessage = "Gemini token request failed: \(message)" + return nil + } + } + + private func normalizedSystemInstruction(_ instruction: String?) -> String? { + let trimmed = instruction?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + return trimmed.isEmpty ? nil : trimmed + } + + private func resolvedInstruction( + serverInstruction: String?, + fallbackInstruction: String? + ) -> String { + let server = normalizedSystemInstruction(serverInstruction) + let fallback = normalizedSystemInstruction(fallbackInstruction) + + switch (server, fallback) { + case let (server?, fallback?) where !server.contains(fallback): + return """ + \(server) + + Local active-step context from the phone UI: + \(fallback) + """ + case let (server?, _): + return server + case let (nil, fallback?): + return fallback + default: + return GeminiConfig.defaultSystemInstruction } } - func stopSession() { - eventClient.disconnect() - toolCallRouter?.cancelAll() - toolCallRouter = nil - audioManager.stopCapture() - geminiService.disconnect() + private func liveConnectionError(fallback: String, diagnosticsID: String?) -> String { + let base: String + if case .error(let err) = geminiService.connectionState { + base = err + } else { + base = fallback + } + if let diagnosticsID, !diagnosticsID.isEmpty { + return "\(base). Diagnostics: \(diagnosticsID)." + } + return base + } + + private func resetToIdle(message: String?) async { + isStoppingSession = true + isGeminiActive = false + isAudioReady = false + isModelSpeaking = false + autoReconnectAttempts = 0 stateObservation?.cancel() stateObservation = nil - isGeminiActive = false + clearGeminiCallbacks() + audioManager.stopPlayback() + // This is the Gemini-to-WebRTC hardware barrier: the engine graph and + // accumulator finish before the socket is allowed to close. + await audioManager.stopCapture() + await Task.yield() + await geminiService.disconnectAndWaitForClose(timeout: 1.0) + connectionState = .disconnected - isModelSpeaking = false userTranscript = "" aiTranscript = "" - toolCallStatus = .idle + currentSessionInstruction = nil + currentLiveCredential = nil + errorMessage = normalizedSystemInstruction(message) + isStoppingSession = false } - func sendVideoFrameIfThrottled(image: UIImage) { - guard SettingsManager.shared.videoStreamingEnabled else { return } - guard isGeminiActive, connectionState == .ready else { return } - let now = Date() - guard now.timeIntervalSince(lastVideoFrameTime) >= GeminiConfig.videoFrameInterval else { return } - lastVideoFrameTime = now - geminiService.sendVideoFrame(image: image) + private func clearGeminiCallbacks() { + geminiService.onDisconnected = nil + geminiService.onSocketClosed = nil + geminiService.onRecoverableDisconnect = nil + geminiService.onSocketOpened = nil + geminiService.onAudioReceived = nil + geminiService.onInterrupted = nil + geminiService.onTurnComplete = nil + geminiService.onInputTranscription = nil + geminiService.onOutputTranscription = nil } + private func recordTelemetry( + _ name: String, + stage: String, + payload: [String: Any] = [:] + ) async { + await WorkerTelemetry.shared.record( + name, + source: "gemini_live", + stage: stage, + payload: payload + ) + } } diff --git a/samples/CameraAccess/CameraAccess/Gemini/SopRelayClient.swift b/samples/CameraAccess/CameraAccess/Gemini/SopRelayClient.swift new file mode 100644 index 00000000..2ed9827e --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gemini/SopRelayClient.swift @@ -0,0 +1,2274 @@ +import Foundation + +private extension KeyedDecodingContainer { + func decodeFirstString(forKeys keys: [K]) throws -> String? { + for key in keys { + if let stringValue = try decodeIfPresent(String.self, forKey: key), + !stringValue.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return stringValue + } + } + return nil + } + + func decodeLossyInt(forKeys keys: [K]) throws -> Int? { + for key in keys { + if let intValue = try? decodeIfPresent(Int.self, forKey: key) { + return intValue + } + if let stringValue = try? decodeIfPresent(String.self, forKey: key), + !stringValue.isEmpty { + if let intValue = Int(stringValue) { + return intValue + } + if let doubleValue = Double(stringValue) { + return Int(doubleValue.rounded(.down)) + } + } + if let doubleValue = try? decodeIfPresent(Double.self, forKey: key) { + return Int(doubleValue.rounded(.down)) + } + } + return nil + } + + func decodeLossyBool(forKeys keys: [K]) throws -> Bool? { + for key in keys { + if let boolValue = try? decodeIfPresent(Bool.self, forKey: key) { + return boolValue + } + if let intValue = try? decodeIfPresent(Int.self, forKey: key) { + return intValue != 0 + } + if let stringValue = try? decodeIfPresent(String.self, forKey: key), + !stringValue.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + switch stringValue.lowercased() { + case "true", "1", "yes", "active": + return true + case "false", "0", "no", "inactive": + return false + default: + continue + } + } + } + return nil + } + + func decodeLossyString(forKeys keys: [K]) throws -> String? { + for key in keys { + if let stringValue = try? decodeIfPresent(String.self, forKey: key) { + let trimmed = stringValue.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { + return trimmed + } + } + if let intValue = try? decodeIfPresent(Int.self, forKey: key) { + return String(intValue) + } + if let doubleValue = try? decodeIfPresent(Double.self, forKey: key) { + if doubleValue.rounded(.towardZero) == doubleValue { + return String(Int(doubleValue)) + } + return String(doubleValue) + } + if let boolValue = try? decodeIfPresent(Bool.self, forKey: key) { + return boolValue ? "true" : "false" + } + } + return nil + } +} + +private func canonicalLucasSopTitle(for sopID: String) -> String? { + switch sopID { + case "22222222-2222-2222-2222-222222222222": + return "Cold Chain Verification SOP" + case "a1000001-0000-0000-0000-000000000001": + return "Burger Assembly" + case "a1000002-0000-0000-0000-000000000002": + return "Fries Assembly" + case "a1000003-0000-0000-0000-000000000003": + return "Drink Prep" + default: + return nil + } +} + +private func canonicalLucasPackageTitle(for packageID: String?) -> String? { + guard let packageID else { return nil } + switch packageID { + case "33333333-3333-3333-3333-333333333333": + return "Inbound Cold Chain Audit" + case "b2000001-0000-0000-0000-000000000001": + return "QSR Value Meal Order" + default: + return nil + } +} + +private func canonicalLucasSopSortOrder(for sopID: String) -> Int? { + switch sopID { + case "22222222-2222-2222-2222-222222222222": + return 1 + case "a1000001-0000-0000-0000-000000000001": + return 2 + case "a1000002-0000-0000-0000-000000000002": + return 3 + case "a1000003-0000-0000-0000-000000000003": + return 4 + default: + return nil + } +} + +struct BackendWorker: Identifiable, Decodable, Equatable { + let id: String + let loginCode: String? + let email: String? + let displayName: String + let role: String? + let status: String? + + private enum CodingKeys: String, CodingKey { + case id + case loginCode = "login_code" + case loginCodeCamel = "loginCode" + case email + case displayName = "display_name" + case displayNameCamel = "displayName" + case name + case role + case status + case active + } + + init( + id: String, + loginCode: String?, + email: String? = nil, + displayName: String, + role: String?, + status: String? + ) { + self.id = id + self.loginCode = loginCode + self.email = email + self.displayName = displayName + self.role = role + self.status = status + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(String.self, forKey: .id) + loginCode = try container.decodeFirstString(forKeys: [.loginCode, .loginCodeCamel]) + email = try container.decodeIfPresent(String.self, forKey: .email) + displayName = + try container.decodeFirstString(forKeys: [.displayName, .displayNameCamel, .name]) + ?? "Unassigned Worker" + role = try container.decodeIfPresent(String.self, forKey: .role) + status = + try container.decodeIfPresent(String.self, forKey: .status) + ?? ((try container.decodeLossyBool(forKeys: [.active]) ?? false) ? "active" : nil) + } +} + +struct BackendDevice: Identifiable, Decodable, Equatable { + let id: String + let workerID: String? + let platform: String? + let deviceLabel: String? + + private enum CodingKeys: String, CodingKey { + case id + case workerID = "worker_id" + case workerIDCamel = "workerId" + case platform + case deviceLabel = "device_label" + case deviceLabelCamel = "deviceLabel" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(String.self, forKey: .id) + workerID = try container.decodeFirstString(forKeys: [.workerID, .workerIDCamel]) + platform = try container.decodeIfPresent(String.self, forKey: .platform) + deviceLabel = try container.decodeFirstString(forKeys: [.deviceLabel, .deviceLabelCamel]) + } +} + +struct BackendPackage: Identifiable, Decodable, Equatable { + let id: String + let title: String + let description: String? + let outcome: String? + let version: Int? + let status: String? + + private enum CodingKeys: String, CodingKey { + case id + case title + case name + case description + case outcome + case version + case status + } + + init( + id: String, + title: String, + description: String?, + outcome: String?, + version: Int?, + status: String? + ) { + self.id = id + self.title = title + self.description = description + self.outcome = outcome + self.version = version + self.status = status + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(String.self, forKey: .id) + title = + try container.decodeIfPresent(String.self, forKey: .title) + ?? container.decodeIfPresent(String.self, forKey: .name) + ?? "Untitled Package" + description = try container.decodeIfPresent(String.self, forKey: .description) + outcome = try container.decodeIfPresent(String.self, forKey: .outcome) + version = try container.decodeLossyInt(forKeys: [.version]) + status = try container.decodeIfPresent(String.self, forKey: .status) + } +} + +struct BackendAssignedPackage: Identifiable, Decodable, Equatable { + let id: String + let title: String + let description: String? + let outcome: String? + let version: Int? + let shiftName: String? + let active: Bool? + let packageRunID: String? + let packageRunStatus: String? + let packageRunStartedAt: String? + let packageRunCompletedAt: String? + + private enum CodingKeys: String, CodingKey { + case id + case packageID = "package_id" + case packageIDCamel = "packageId" + case title + case packageTitle = "package_title" + case packageTitleCamel = "packageTitle" + case description + case packageDescription = "package_description" + case packageDescriptionCamel = "packageDescription" + case outcome + case packageOutcome = "package_outcome" + case packageOutcomeCamel = "packageOutcome" + case version + case packageVersion = "package_version" + case packageVersionCamel = "packageVersion" + case shiftName = "shift_name" + case shiftNameCamel = "shiftName" + case active + case packageRunID = "package_run_id" + case packageRunIDCamel = "packageRunId" + case packageRunStatus = "package_run_status" + case packageRunStatusCamel = "packageRunStatus" + case packageRunStartedAt = "package_run_started_at" + case packageRunStartedAtCamel = "packageRunStartedAt" + case packageRunCompletedAt = "package_run_completed_at" + case packageRunCompletedAtCamel = "packageRunCompletedAt" + } + + init( + id: String, + title: String, + description: String?, + outcome: String?, + version: Int?, + shiftName: String?, + active: Bool?, + packageRunID: String?, + packageRunStatus: String?, + packageRunStartedAt: String?, + packageRunCompletedAt: String? + ) { + self.id = id + self.title = title + self.description = description + self.outcome = outcome + self.version = version + self.shiftName = shiftName + self.active = active + self.packageRunID = packageRunID + self.packageRunStatus = packageRunStatus + self.packageRunStartedAt = packageRunStartedAt + self.packageRunCompletedAt = packageRunCompletedAt + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let rawID = + try container.decodeFirstString(forKeys: [.id, .packageID, .packageIDCamel]) + ?? UUID().uuidString + let rawTitle = + try container.decodeFirstString(forKeys: [.title, .packageTitle, .packageTitleCamel]) + ?? canonicalLucasPackageTitle(for: rawID) + ?? "Untitled Package" + + self.init( + id: rawID, + title: rawTitle, + description: try container.decodeFirstString(forKeys: [.description, .packageDescription, .packageDescriptionCamel]), + outcome: try container.decodeFirstString(forKeys: [.outcome, .packageOutcome, .packageOutcomeCamel]), + version: try container.decodeLossyInt(forKeys: [.version, .packageVersion, .packageVersionCamel]), + shiftName: try container.decodeFirstString(forKeys: [.shiftName, .shiftNameCamel]), + active: try container.decodeLossyBool(forKeys: [.active]), + packageRunID: try container.decodeFirstString(forKeys: [.packageRunID, .packageRunIDCamel]), + packageRunStatus: try container.decodeFirstString(forKeys: [.packageRunStatus, .packageRunStatusCamel]), + packageRunStartedAt: try container.decodeFirstString(forKeys: [.packageRunStartedAt, .packageRunStartedAtCamel]), + packageRunCompletedAt: try container.decodeFirstString(forKeys: [.packageRunCompletedAt, .packageRunCompletedAtCamel]) + ) + } +} + +struct BackendShift: Identifiable, Decodable, Equatable { + let id: String + let packageID: String? + let shiftName: String? + let startsAt: String? + let endsAt: String? + let active: Bool? + let package: BackendPackage? + + private enum CodingKeys: String, CodingKey { + case id + case packageID = "package_id" + case packageIDCamel = "packageId" + case shiftName = "shift_name" + case shiftNameCamel = "shiftName" + case startsAt = "starts_at" + case startsAtCamel = "startsAt" + case endsAt = "ends_at" + case endsAtCamel = "endsAt" + case active + case package + } + + init( + id: String, + packageID: String?, + shiftName: String?, + startsAt: String?, + endsAt: String?, + active: Bool?, + package: BackendPackage? + ) { + self.id = id + self.packageID = packageID + self.shiftName = shiftName + self.startsAt = startsAt + self.endsAt = endsAt + self.active = active + self.package = package + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(String.self, forKey: .id) + packageID = try container.decodeFirstString(forKeys: [.packageID, .packageIDCamel]) + shiftName = try container.decodeFirstString(forKeys: [.shiftName, .shiftNameCamel]) + startsAt = try container.decodeFirstString(forKeys: [.startsAt, .startsAtCamel]) + endsAt = try container.decodeFirstString(forKeys: [.endsAt, .endsAtCamel]) + active = try container.decodeLossyBool(forKeys: [.active]) + package = try container.decodeIfPresent(BackendPackage.self, forKey: .package) + } +} + +struct WorkerQueueStep: Identifiable, Decodable, Equatable, Hashable { + let id: String + let order: Int + let title: String + let description: String + let duration: String + let validation: String + let critical: Bool + let aiPrompt: String + let expectedObjects: [String] + let preconditions: [String] + let postconditions: [String] + let skipRisk: String + let evidenceRequired: Bool + let allowManualComplete: Bool + + private enum CodingKeys: String, CodingKey { + case id + case order + case name + case title + case label + case step + case item + case index + case description + case instruction + case duration + case validation + case critical + case aiPrompt = "ai_prompt" + case aiPromptCamel = "aiPrompt" + case expectedObjects = "expected_objects" + case expectedObjectsCamel = "expectedObjects" + case preconditions + case postconditions + case skipRisk = "skip_risk" + case skipRiskCamel = "skipRisk" + case evidenceRequired = "evidence_required" + case evidenceRequiredCamel = "evidenceRequired" + case allowManualComplete = "allow_manual_complete" + case allowManualCompleteCamel = "allowManualComplete" + } + + init( + id: String, + order: Int, + title: String, + description: String = "", + duration: String = "30s", + validation: String = "visual", + critical: Bool = false, + aiPrompt: String? = nil, + expectedObjects: [String] = [], + preconditions: [String] = [], + postconditions: [String] = [], + skipRisk: String = "medium", + evidenceRequired: Bool = true, + allowManualComplete: Bool = true + ) { + self.id = id + self.order = order + self.title = title + self.description = description + self.duration = duration + self.validation = validation + self.critical = critical + self.aiPrompt = aiPrompt ?? "Look at the image and confirm whether \"\(title)\" has been completed." + self.expectedObjects = expectedObjects + self.preconditions = preconditions + self.postconditions = postconditions + self.skipRisk = skipRisk + self.evidenceRequired = evidenceRequired + self.allowManualComplete = allowManualComplete + } + + init(from decoder: Decoder) throws { + if let single = try? decoder.singleValueContainer(), + let raw = try? single.decode(String.self) { + self.init( + id: raw.lowercased().replacingOccurrences(of: "[^a-z0-9]+", with: "_", options: .regularExpression), + order: 0, + title: raw + ) + return + } + + let container = try decoder.container(keyedBy: CodingKeys.self) + let name = try container.decodeLossyString(forKeys: [.name]) + let fallbackTitle = try container.decodeLossyString(forKeys: [.title]) + let label = try container.decodeLossyString(forKeys: [.label]) + let step = try container.decodeLossyString(forKeys: [.step]) + let item = try container.decodeLossyString(forKeys: [.item]) + let resolvedTitle = name ?? fallbackTitle ?? label ?? step ?? item ?? "Untitled Step" + let resolvedID = + try container.decodeLossyString(forKeys: [.id]) + ?? resolvedTitle.lowercased().replacingOccurrences(of: "[^a-z0-9]+", with: "_", options: .regularExpression) + let expectedObjects = + try container.decodeIfPresent([String].self, forKey: .expectedObjects) + ?? container.decodeIfPresent([String].self, forKey: .expectedObjectsCamel) + ?? [] + let preconditions = (try container.decodeIfPresent([String].self, forKey: .preconditions)) ?? [] + let postconditions = (try container.decodeIfPresent([String].self, forKey: .postconditions)) ?? [] + let skipRisk = + try container.decodeLossyString(forKeys: [.skipRisk, .skipRiskCamel]) + ?? ((try container.decodeLossyBool(forKeys: [.critical]) ?? false) ? "high" : "medium") + self.init( + id: resolvedID, + order: try container.decodeLossyInt(forKeys: [.order, .index]) ?? 0, + title: resolvedTitle, + description: try container.decodeLossyString(forKeys: [.description, .instruction]) ?? "", + duration: try container.decodeLossyString(forKeys: [.duration]) ?? "30s", + validation: try container.decodeLossyString(forKeys: [.validation]) ?? "visual", + critical: try container.decodeLossyBool(forKeys: [.critical]) ?? false, + aiPrompt: + try container.decodeLossyString(forKeys: [.aiPrompt, .aiPromptCamel]), + expectedObjects: expectedObjects.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }, + preconditions: preconditions.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }, + postconditions: postconditions.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }, + skipRisk: ["low", "medium", "high"].contains(skipRisk) ? skipRisk : "medium", + evidenceRequired: + try container.decodeLossyBool(forKeys: [.evidenceRequired, .evidenceRequiredCamel]) + ?? true, + allowManualComplete: + try container.decodeLossyBool(forKeys: [.allowManualComplete, .allowManualCompleteCamel]) + ?? true + ) + } +} + +struct WorkerQueueItem: Identifiable, Decodable, Equatable { + let shiftAssignmentID: String? + let workerID: String? + let workerName: String? + let packageID: String? + let packageTitle: String? + let packageRunID: String? + let packageVersion: Int? + let sopID: String + let sopTitle: String + let sopVersion: Int? + let steps: [WorkerQueueStep] + let shiftName: String? + let sourceType: String + let sortOrder: Int + let required: Bool + let active: Bool? + let startsAt: String? + let endsAt: String? + + var id: String { "\(packageRunID ?? packageID ?? sourceType):\(sopID)" } + var stepTitles: [String] { steps.map(\.title) } + + private enum CodingKeys: String, CodingKey { + case shiftAssignmentID = "shift_assignment_id" + case shiftAssignmentIDCamel = "shiftAssignmentId" + case workerID = "worker_id" + case workerIDCamel = "workerId" + case workerName = "worker_name" + case workerNameCamel = "workerName" + case packageID = "package_id" + case packageIDCamel = "packageId" + case packageTitle = "package_title" + case packageTitleCamel = "packageTitle" + case packageRunID = "package_run_id" + case packageRunIDCamel = "packageRunId" + case packageVersion = "package_version" + case packageVersionCamel = "packageVersion" + case sopID = "sop_id" + case sopIDCamel = "sopId" + case sopTitle = "sop_title" + case sopTitleCamel = "sopTitle" + case sopVersion = "sop_version" + case sopVersionCamel = "sopVersion" + case steps + case shiftName = "shift_name" + case shiftNameCamel = "shiftName" + case sourceType = "source_type" + case sourceTypeCamel = "sourceType" + case sortOrder = "sort_order" + case sortOrderCamel = "sortOrder" + case required + case active + case startsAt = "starts_at" + case startsAtCamel = "startsAt" + case scheduledFor = "scheduledFor" + case endsAt = "ends_at" + case endsAtCamel = "endsAt" + case completedAtCamel = "completedAt" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + guard let decodedSopID = try container.decodeFirstString(forKeys: [.sopID, .sopIDCamel]) else { + throw DecodingError.keyNotFound( + CodingKeys.sopID, + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Missing sop_id/sopId in worker queue item." + ) + ) + } + shiftAssignmentID = try container.decodeFirstString(forKeys: [.shiftAssignmentID, .shiftAssignmentIDCamel]) + workerID = try container.decodeFirstString(forKeys: [.workerID, .workerIDCamel]) + workerName = try container.decodeFirstString(forKeys: [.workerName, .workerNameCamel]) + packageID = try container.decodeFirstString(forKeys: [.packageID, .packageIDCamel]) + packageTitle = + try container.decodeFirstString(forKeys: [.packageTitle, .packageTitleCamel]) + ?? canonicalLucasPackageTitle(for: packageID) + packageRunID = try container.decodeFirstString(forKeys: [.packageRunID, .packageRunIDCamel]) + packageVersion = try container.decodeLossyInt(forKeys: [.packageVersion, .packageVersionCamel]) + sopID = decodedSopID + sopTitle = + try container.decodeFirstString(forKeys: [.sopTitle, .sopTitleCamel]) + ?? canonicalLucasSopTitle(for: decodedSopID) + ?? "Assigned SOP" + sopVersion = try container.decodeLossyInt(forKeys: [.sopVersion, .sopVersionCamel]) + shiftName = + try container.decodeFirstString(forKeys: [.shiftName, .shiftNameCamel]) + ?? "Morning" + sourceType = + try container.decodeFirstString(forKeys: [.sourceType, .sourceTypeCamel]) + ?? (packageID == nil ? "standalone" : "package") + sortOrder = + try container.decodeLossyInt(forKeys: [.sortOrder, .sortOrderCamel]) + ?? canonicalLucasSopSortOrder(for: decodedSopID) + ?? 0 + required = try container.decodeLossyBool(forKeys: [.required]) ?? true + active = try container.decodeLossyBool(forKeys: [.active]) + startsAt = try container.decodeFirstString(forKeys: [.startsAt, .startsAtCamel, .scheduledFor]) + endsAt = try container.decodeFirstString(forKeys: [.endsAt, .endsAtCamel, .completedAtCamel]) + + if let direct = try? container.decodeIfPresent([String].self, forKey: .steps) { + steps = direct.enumerated().map { index, title in + WorkerQueueStep( + id: "\(decodedSopID)-\(index + 1)", + order: index + 1, + title: title + ) + } + } else if let richSteps = try container.decodeIfPresent([WorkerQueueStep].self, forKey: .steps) { + steps = richSteps.enumerated().map { index, step in + WorkerQueueStep( + id: step.id, + order: step.order == 0 ? index + 1 : step.order, + title: step.title, + description: step.description, + duration: step.duration, + validation: step.validation, + critical: step.critical, + aiPrompt: step.aiPrompt, + expectedObjects: step.expectedObjects, + preconditions: step.preconditions, + postconditions: step.postconditions, + skipRisk: step.skipRisk, + evidenceRequired: step.evidenceRequired, + allowManualComplete: step.allowManualComplete + ) + } + } else { + steps = [] + } + } +} + +struct BootstrapPayload: Decodable, Equatable { + let worker: BackendWorker + let device: BackendDevice? + let shift: BackendShift? + let queue: [WorkerQueueItem] + let assignedPackages: [BackendAssignedPackage] + let workerSessionToken: String? + let workerSessionExpiresAt: String? + + private enum CodingKeys: String, CodingKey { + case worker + case device + case shift + case queue + case assignedPackages = "assigned_packages" + case assignedPackagesCamel = "assignedPackages" + case packages + case workerSessionToken = "worker_session_token" + case workerSessionTokenCamel = "workerSessionToken" + case workerToken = "worker_token" + case sessionToken = "session_token" + case sessionTokenCamel = "sessionToken" + case token + case workerSessionExpiresAt = "worker_session_expires_at" + case workerSessionExpiresAtCamel = "workerSessionExpiresAt" + case sessionExpiresAt = "session_expires_at" + case sessionExpiresAtCamel = "sessionExpiresAt" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + worker = try container.decode(BackendWorker.self, forKey: .worker) + device = try container.decodeIfPresent(BackendDevice.self, forKey: .device) + shift = try container.decodeIfPresent(BackendShift.self, forKey: .shift) + queue = try container.decodeIfPresent([WorkerQueueItem].self, forKey: .queue) ?? [] + let directAssignedPackages = + try container.decodeIfPresent([BackendAssignedPackage].self, forKey: .assignedPackages) + let camelAssignedPackages = + try container.decodeIfPresent([BackendAssignedPackage].self, forKey: .assignedPackagesCamel) + let packageList = + try container.decodeIfPresent([BackendAssignedPackage].self, forKey: .packages) + assignedPackages = + directAssignedPackages + ?? camelAssignedPackages + ?? packageList + ?? BootstrapPayload.deriveAssignedPackages(shift: shift, queue: queue) + + let tokenFromWorkerSession = + try container.decodeIfPresent(String.self, forKey: .workerSessionToken) + let tokenFromWorkerSessionCamel = + try container.decodeIfPresent(String.self, forKey: .workerSessionTokenCamel) + let tokenFromWorkerToken = + try container.decodeIfPresent(String.self, forKey: .workerToken) + let tokenFromSession = + try container.decodeIfPresent(String.self, forKey: .sessionToken) + let tokenFromSessionCamel = + try container.decodeIfPresent(String.self, forKey: .sessionTokenCamel) + let tokenFromGeneric = + try container.decodeIfPresent(String.self, forKey: .token) + workerSessionToken = + tokenFromWorkerSession + ?? tokenFromWorkerSessionCamel + ?? tokenFromWorkerToken + ?? tokenFromSession + ?? tokenFromSessionCamel + ?? tokenFromGeneric + + let expiresFromWorkerSession = + try container.decodeIfPresent(String.self, forKey: .workerSessionExpiresAt) + let expiresFromSession = + try container.decodeIfPresent(String.self, forKey: .sessionExpiresAt) + let expiresFromWorkerSessionCamel = + try container.decodeIfPresent(String.self, forKey: .workerSessionExpiresAtCamel) + let expiresFromSessionCamel = + try container.decodeIfPresent(String.self, forKey: .sessionExpiresAtCamel) + workerSessionExpiresAt = + expiresFromWorkerSession + ?? expiresFromSession + ?? expiresFromWorkerSessionCamel + ?? expiresFromSessionCamel + } + + private static func deriveAssignedPackages( + shift: BackendShift?, + queue: [WorkerQueueItem] + ) -> [BackendAssignedPackage] { + var resolved: [BackendAssignedPackage] = [] + var seen = Set() + + if let package = shift?.package { + let candidate = BackendAssignedPackage( + id: package.id, + title: package.title, + description: package.description, + outcome: package.outcome, + version: package.version, + shiftName: shift?.shiftName, + active: shift?.active, + packageRunID: queue.first(where: { $0.packageID == package.id })?.packageRunID, + packageRunStatus: nil, + packageRunStartedAt: nil, + packageRunCompletedAt: nil + ) + resolved.append(candidate) + seen.insert(candidate.id) + } + + for item in queue where item.packageID != nil { + guard let packageID = item.packageID, !seen.contains(packageID) else { continue } + resolved.append( + BackendAssignedPackage( + id: packageID, + title: item.packageTitle ?? "Assigned Package", + description: nil, + outcome: nil, + version: item.packageVersion, + shiftName: item.shiftName ?? shift?.shiftName, + active: item.active, + packageRunID: item.packageRunID, + packageRunStatus: nil, + packageRunStartedAt: item.startsAt, + packageRunCompletedAt: item.endsAt + ) + ) + seen.insert(packageID) + } + + return resolved + } +} + +struct BackendExecutionSession: Identifiable, Decodable, Equatable { + let id: String + let workerID: String? + let deviceID: String? + let packageID: String? + let packageRunID: String? + let currentSopID: String? + let sopVersion: Int? + let packageVersion: Int? + let currentStepIndex: Int + let status: String + let helpRequested: Bool + let webrtcRoomCode: String? + let lastFrameBucket: String? + let lastFramePath: String? + let startedAt: String? + let endedAt: String? + let updatedAt: String? + let packageProgressWarning: String? + + private enum CodingKeys: String, CodingKey { + case id + case workerID = "worker_id" + case workerIDCamel = "workerId" + case deviceID = "device_id" + case deviceIDCamel = "deviceId" + case packageID = "package_id" + case packageIDCamel = "packageId" + case packageRunID = "package_run_id" + case packageRunIDCamel = "packageRunId" + case currentSopID = "current_sop_id" + case currentSopIDCamel = "currentSopId" + case sopVersion = "sop_version" + case sopVersionCamel = "sopVersion" + case packageVersion = "package_version" + case packageVersionCamel = "packageVersion" + case currentStepIndex = "current_step_index" + case currentStepIndexCamel = "currentStepIndex" + case status + case helpRequested = "help_requested" + case helpRequestedCamel = "helpRequested" + case webrtcRoomCode = "webrtc_room_code" + case webrtcRoomCodeCamel = "webrtcRoomCode" + case lastFrameBucket = "last_frame_bucket" + case lastFrameBucketCamel = "lastFrameBucket" + case lastFramePath = "last_frame_path" + case lastFramePathCamel = "lastFramePath" + case startedAt = "started_at" + case startedAtCamel = "startedAt" + case endedAt = "ended_at" + case endedAtCamel = "endedAt" + case updatedAt = "updated_at" + case updatedAtCamel = "updatedAt" + case packageProgressWarning = "package_progress_warning" + case packageProgressWarningCamel = "packageProgressWarning" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(String.self, forKey: .id) + workerID = try container.decodeFirstString(forKeys: [.workerID, .workerIDCamel]) + deviceID = try container.decodeFirstString(forKeys: [.deviceID, .deviceIDCamel]) + packageID = try container.decodeFirstString(forKeys: [.packageID, .packageIDCamel]) + packageRunID = try container.decodeFirstString(forKeys: [.packageRunID, .packageRunIDCamel]) + currentSopID = try container.decodeFirstString(forKeys: [.currentSopID, .currentSopIDCamel]) + sopVersion = try container.decodeLossyInt(forKeys: [.sopVersion, .sopVersionCamel]) + packageVersion = try container.decodeLossyInt(forKeys: [.packageVersion, .packageVersionCamel]) + currentStepIndex = try container.decodeLossyInt(forKeys: [.currentStepIndex, .currentStepIndexCamel]) ?? 0 + status = try container.decodeIfPresent(String.self, forKey: .status) ?? "active" + helpRequested = try container.decodeLossyBool(forKeys: [.helpRequested, .helpRequestedCamel]) ?? false + webrtcRoomCode = try container.decodeFirstString(forKeys: [.webrtcRoomCode, .webrtcRoomCodeCamel]) + lastFrameBucket = try container.decodeFirstString(forKeys: [.lastFrameBucket, .lastFrameBucketCamel]) + lastFramePath = try container.decodeFirstString(forKeys: [.lastFramePath, .lastFramePathCamel]) + startedAt = try container.decodeFirstString(forKeys: [.startedAt, .startedAtCamel]) + endedAt = try container.decodeFirstString(forKeys: [.endedAt, .endedAtCamel]) + updatedAt = try container.decodeFirstString(forKeys: [.updatedAt, .updatedAtCamel]) + packageProgressWarning = try container.decodeFirstString( + forKeys: [.packageProgressWarning, .packageProgressWarningCamel] + ) + } +} + +struct BackendExecutionEvent: Identifiable, Decodable, Equatable { + let id: String + let sessionID: String? + let eventType: String? + + private enum CodingKeys: String, CodingKey { + case id + case sessionID = "session_id" + case sessionIDCamel = "sessionId" + case eventType = "event_type" + case eventTypeCamel = "eventType" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decodeLossyString(forKeys: [.id]) ?? UUID().uuidString + sessionID = try container.decodeFirstString(forKeys: [.sessionID, .sessionIDCamel]) + eventType = try container.decodeFirstString(forKeys: [.eventType, .eventTypeCamel]) + } +} + +struct BackendIntervention: Identifiable, Decodable, Equatable { + let id: String + let sessionID: String? + let status: String? + let notes: String? + + private enum CodingKeys: String, CodingKey { + case id + case sessionID = "session_id" + case status + case notes + } +} + +struct BackendMediaAsset: Identifiable, Decodable, Equatable { + let id: String + let sessionID: String? + let bucket: String? + let path: String? + + private enum CodingKeys: String, CodingKey { + case id + case sessionID = "session_id" + case bucket + case path + } +} + +struct BackendMediaUploadTarget: Decodable, Equatable { + let assetID: String? + let uploadURL: String + let method: String + let headers: [String: String] + + private enum CodingKeys: String, CodingKey { + case assetID = "asset_id" + case assetIDCamel = "assetId" + case uploadURL = "upload_url" + case uploadURLCamel = "uploadUrl" + case method + case headers + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + assetID = try container.decodeFirstString(forKeys: [.assetID, .assetIDCamel]) + uploadURL = try container.decodeFirstString(forKeys: [.uploadURL, .uploadURLCamel]) ?? "" + method = try container.decodeIfPresent(String.self, forKey: .method) ?? "PUT" + headers = try container.decodeIfPresent([String: String].self, forKey: .headers) ?? [:] + } +} + +struct WorkerTelemetryEvent: @unchecked Sendable { + let name: String + let source: String + let stage: String + let occurredAt: String + let durationMs: Double? + let sequence: Int? + let metricValue: Double? + let metricUnit: String? + let payload: [String: Any] + + init( + name: String, + source: String, + stage: String = "point", + occurredAt: Date = Date(), + durationMs: Double? = nil, + sequence: Int? = nil, + metricValue: Double? = nil, + metricUnit: String? = nil, + payload: [String: Any] = [:] + ) { + self.name = name + self.source = source + self.stage = stage + self.occurredAt = Self.formatter.string(from: occurredAt) + self.durationMs = durationMs + self.sequence = sequence + self.metricValue = metricValue + self.metricUnit = metricUnit + self.payload = WorkerTelemetryPayloadSanitizer.sanitizedPayload(payload) + } + + var wirePayload: [String: Any] { + var payload: [String: Any] = [ + "name": name, + "source": source, + "stage": stage, + "occurredAt": occurredAt + ] + if let durationMs { payload["durationMs"] = durationMs } + if let sequence { payload["sequence"] = sequence } + if let metricValue { payload["metricValue"] = metricValue } + if let metricUnit { payload["metricUnit"] = metricUnit } + if !self.payload.isEmpty { payload["payload"] = self.payload } + return payload + } + + private static var formatter: ISO8601DateFormatter { + let formatter = ISO8601DateFormatter() + formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + return formatter + } +} + +struct WorkerTelemetryBatch: @unchecked Sendable { + let sessionID: String + let deviceID: String? + let workerID: String? + let platform: String + let appBuild: String? + let events: [WorkerTelemetryEvent] + + var payload: [String: Any] { + var payload: [String: Any] = [ + "sessionId": sessionID, + "platform": platform, + "events": events.map(\.wirePayload) + ] + if let deviceID { payload["deviceId"] = deviceID } + if let workerID { payload["workerId"] = workerID } + if let appBuild { payload["appBuild"] = appBuild } + return payload + } +} + +enum WorkerTelemetryPayloadSanitizer { + private static let maxStringLength = 512 + private static let maxArrayLength = 25 + private static let maxObjectKeys = 60 + private static let maxPayloadBytes = 8 * 1024 + private static let maxDepth = 4 + + static func sanitizedPayload(_ payload: [String: Any]) -> [String: Any] { + let sanitized = sanitizeDictionary(payload, depth: 0) + guard jsonByteCount(sanitized) > maxPayloadBytes else { return sanitized } + + var trimmed: [String: Any] = ["_truncated": true] + for (key, value) in sanitized { + trimmed[key] = value + if jsonByteCount(trimmed) > maxPayloadBytes { + trimmed.removeValue(forKey: key) + break + } + } + return trimmed + } + + private static func sanitizeDictionary(_ payload: [String: Any], depth: Int) -> [String: Any] { + guard depth <= maxDepth else { return ["_truncated": true] } + var sanitized: [String: Any] = [:] + let entries = Array(payload.prefix(maxObjectKeys)) + for (key, value) in entries { + sanitized[key] = sanitizeValue(value, key: key, depth: depth + 1) + } + if payload.count > entries.count { + sanitized["_truncated"] = true + } + return sanitized + } + + private static func sanitizeValue(_ value: Any, key: String, depth: Int) -> Any { + if value is NSNull { + return NSNull() + } + if let number = value as? NSNumber { + return number + } + if let string = value as? String { + return sanitizeString(string, key: key) + } + if let array = value as? [Any] { + var sanitized = array.prefix(maxArrayLength).enumerated().map { index, item in + sanitizeValue(item, key: "\(key).\(index)", depth: depth + 1) + } + if array.count > sanitized.count { + sanitized.append("[truncated]") + } + return sanitized + } + if let dictionary = value as? [String: Any] { + return sanitizeDictionary(dictionary, depth: depth + 1) + } + return String(describing: value).prefixString(maxStringLength) + } + + private static func sanitizeString(_ value: String, key: String) -> String { + let lowercasedKey = key.lowercased() + if lowercasedKey.contains("authorization") + || lowercasedKey.contains("bearer") + || lowercasedKey.contains("token") + || lowercasedKey.contains("secret") + || lowercasedKey.contains("apikey") + || lowercasedKey.contains("api_key") + || lowercasedKey.contains("password") { + return "[redacted]" + } + if lowercasedKey.contains("signedurl") + || lowercasedKey.contains("signed_url") + || lowercasedKey.contains("uploadurl") + || lowercasedKey.contains("upload_url") { + return "[redacted-url]" + } + if isRawPayloadKey(lowercasedKey) || looksLikeRawPayload(value) { + return "[redacted-raw-payload]" + } + return value.prefixString(maxStringLength) + } + + private static func isRawPayloadKey(_ key: String) -> Bool { + key.contains("base64") + || key.contains("image_data") + || key.contains("imagedata") + || key.contains("audio_data") + || key.contains("audiodata") + || key.contains("video_data") + || key.contains("videodata") + || key.contains("jpeg_data") + || key.contains("jpegdata") + || key.contains("raw_transcript") + || key == "transcript" + } + + private static func looksLikeRawPayload(_ value: String) -> Bool { + if value.hasPrefix("data:image/") || value.hasPrefix("data:audio/") { + return true + } + guard value.count >= 512, value.count % 4 == 0 else { return false } + return value.range(of: #"^[A-Za-z0-9+/]+={0,2}$"#, options: .regularExpression) != nil + } + + private static func jsonByteCount(_ payload: [String: Any]) -> Int { + guard JSONSerialization.isValidJSONObject(payload), + let data = try? JSONSerialization.data(withJSONObject: payload) + else { + return Int.max + } + return data.count + } +} + +actor WorkerTelemetry { + static let shared = WorkerTelemetry() + + typealias Sleeper = @Sendable (UInt64) async -> Void + + private weak var api: WorkerAdminAPI? + private var sessionID: String? + private var deviceID: String? + private var workerID: String? + private var platform: String + private var appBuild: String? + private var sequence: Int = 0 + private var queue: [WorkerTelemetryEvent] = [] + private var flushTask: Task? + private var isFlushing = false + + private let flushIntervalNanoseconds: UInt64 + private let maxBatchSize: Int + private let maxQueueSize: Int + private let sleeper: Sleeper + + init( + api: WorkerAdminAPI? = nil, + sessionID: String? = nil, + deviceID: String? = nil, + workerID: String? = nil, + platform: String = "ios", + appBuild: String? = WorkerTelemetry.defaultAppBuild, + flushIntervalNanoseconds: UInt64 = 5_000_000_000, + maxBatchSize: Int = 20, + maxQueueSize: Int = 500, + sleeper: @escaping Sleeper = { nanoseconds in + guard nanoseconds > 0 else { return } + try? await Task.sleep(nanoseconds: nanoseconds) + } + ) { + self.api = api + self.sessionID = sessionID + self.deviceID = deviceID + self.workerID = workerID + self.platform = platform + self.appBuild = appBuild + self.flushIntervalNanoseconds = flushIntervalNanoseconds + self.maxBatchSize = max(1, maxBatchSize) + self.maxQueueSize = max(1, maxQueueSize) + self.sleeper = sleeper + } + + func configure( + api: WorkerAdminAPI, + sessionID: String, + deviceID: String? = GeminiConfig.deviceID, + workerID: String? = nil, + platform: String = "ios", + appBuild: String? = WorkerTelemetry.defaultAppBuild + ) { + let cleanedSessionID = sessionID.trimmingCharacters(in: .whitespacesAndNewlines) + if self.sessionID != cleanedSessionID { + queue.removeAll() + sequence = 0 + } + self.api = api + self.sessionID = cleanedSessionID + self.deviceID = trimmed(deviceID) + self.workerID = trimmed(workerID) + self.platform = platform + self.appBuild = trimmed(appBuild) + } + + func record( + _ name: String, + source: String, + stage: String = "point", + sessionID explicitSessionID: String? = nil, + durationMs: Double? = nil, + metricValue: Double? = nil, + metricUnit: String? = nil, + payload: [String: Any] = [:] + ) { + guard let resolvedSessionID = trimmed(explicitSessionID) ?? sessionID, + !resolvedSessionID.isEmpty + else { + return + } + + if sessionID == nil { + sessionID = resolvedSessionID + } + + sequence += 1 + queue.append( + WorkerTelemetryEvent( + name: name, + source: source, + stage: stage, + durationMs: durationMs, + sequence: sequence, + metricValue: metricValue, + metricUnit: metricUnit, + payload: payload + ) + ) + if queue.count > maxQueueSize { + queue.removeFirst(queue.count - maxQueueSize) + } + + if queue.count >= maxBatchSize { + Task { await self.flush() } + } else { + scheduleFlush() + } + } + + func flush() async { + guard !isFlushing else { return } + guard let api, let sessionID, !queue.isEmpty else { return } + + let count = min(maxBatchSize, queue.count) + let events = Array(queue.prefix(count)) + queue.removeFirst(count) + isFlushing = true + + do { + try await api.sendWorkerTelemetryBatch( + WorkerTelemetryBatch( + sessionID: sessionID, + deviceID: deviceID, + workerID: workerID, + platform: platform, + appBuild: appBuild, + events: events + ) + ) + } catch { + queue = Array((events + queue).suffix(maxQueueSize)) + NSLog("[telemetry] flush failed: %@", error.localizedDescription) + } + + isFlushing = false + if !queue.isEmpty { + scheduleFlush() + } + } + + func flushAndStop() async { + flushTask?.cancel() + flushTask = nil + await flush() + } + + private func scheduleFlush() { + guard flushIntervalNanoseconds > 0, flushTask == nil else { return } + flushTask = Task { [flushIntervalNanoseconds, sleeper] in + await sleeper(flushIntervalNanoseconds) + await self.flushAfterDelay() + } + } + + private func flushAfterDelay() async { + flushTask = nil + await flush() + } + + private func trimmed(_ value: String?) -> String? { + guard let value = value?.trimmingCharacters(in: .whitespacesAndNewlines), + !value.isEmpty + else { + return nil + } + return value + } + + private static var defaultAppBuild: String? { + let info = Bundle.main.infoDictionary + let version = info?["CFBundleShortVersionString"] as? String + let build = info?["CFBundleVersion"] as? String + return [version, build].compactMap { $0 }.joined(separator: " ") + } +} + +private extension String { + func prefixString(_ maxLength: Int) -> String { + guard count > maxLength else { return self } + let index = self.index(startIndex, offsetBy: maxLength) + return "\(self[.. WorkerLiveHeartbeatResponse + func requestWorkerMediaUploadTarget( + sessionID: String, + assetType: String, + filename: String, + contentType: String, + byteSize: Int, + source: String? + ) async throws -> WorkerMediaUploadTarget + func finalizeWorkerMediaUpload(_ finalize: WorkerMediaFinalizeRequest) async throws + func uploadBinary( + to target: WorkerMediaUploadTarget, + data: Data, + contentType: String + ) async throws + func sendWorkerTelemetryBatch(_ batch: WorkerTelemetryBatch) async throws + func requestGeminiLiveToken( + model: String?, + sessionID: String? + ) async throws -> GeminiLiveTokenResponse + func requestGeminiSpotter(_ request: GeminiSpotterRequest) async throws -> GeminiSpotterResponse +} + +struct WorkerLiveHeartbeatRequest: Equatable { + let sessionID: String + let webrtcRoomCode: String? + let currentStepIndex: Int + let helpRequested: Bool + let status: String + let lastFrameBucket: String? + let lastFramePath: String? + + var payload: [String: Any] { + var payload: [String: Any] = [ + "sessionId": sessionID, + "currentStepIndex": currentStepIndex, + "helpRequested": helpRequested, + "status": status + ] + if let webrtcRoomCode { + payload["webrtcRoomCode"] = webrtcRoomCode + } + if let lastFrameBucket { + payload["lastFrameBucket"] = lastFrameBucket + } + if let lastFramePath { + payload["lastFramePath"] = lastFramePath + } + return payload + } +} + +struct WorkerLiveHeartbeatResponse: Decodable, Equatable { + let sessionID: String + let updatedAt: String? + let isFreshLiveSession: Bool + let webrtcRoomCode: String? + let supportMode: String + let aiSessionStatus: String + let humanSupportStatus: String + let supportUpdatedAt: String? + let shouldOpenLiveRoom: Bool + + private enum CodingKeys: String, CodingKey { + case sessionID = "sessionId" + case updatedAt + case isFreshLiveSession + case webrtcRoomCode + case supportMode + case aiSessionStatus + case humanSupportStatus + case supportUpdatedAt + case shouldOpenLiveRoom + } + + init( + sessionID: String, + updatedAt: String? = nil, + isFreshLiveSession: Bool = false, + webrtcRoomCode: String? = nil, + supportMode: String = "ai", + aiSessionStatus: String = "active", + humanSupportStatus: String = "none", + supportUpdatedAt: String? = nil, + shouldOpenLiveRoom: Bool = false + ) { + self.sessionID = sessionID + self.updatedAt = updatedAt + self.isFreshLiveSession = isFreshLiveSession + self.webrtcRoomCode = webrtcRoomCode + self.supportMode = supportMode + self.aiSessionStatus = aiSessionStatus + self.humanSupportStatus = humanSupportStatus + self.supportUpdatedAt = supportUpdatedAt + self.shouldOpenLiveRoom = shouldOpenLiveRoom + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + sessionID = try container.decode(String.self, forKey: .sessionID) + updatedAt = try container.decodeIfPresent(String.self, forKey: .updatedAt) + isFreshLiveSession = try container.decodeIfPresent(Bool.self, forKey: .isFreshLiveSession) ?? false + webrtcRoomCode = try container.decodeIfPresent(String.self, forKey: .webrtcRoomCode) + supportMode = try container.decodeIfPresent(String.self, forKey: .supportMode) ?? "ai" + aiSessionStatus = try container.decodeIfPresent(String.self, forKey: .aiSessionStatus) ?? "active" + humanSupportStatus = try container.decodeIfPresent(String.self, forKey: .humanSupportStatus) ?? "none" + supportUpdatedAt = try container.decodeIfPresent(String.self, forKey: .supportUpdatedAt) + shouldOpenLiveRoom = try container.decodeIfPresent(Bool.self, forKey: .shouldOpenLiveRoom) ?? false + } +} + +struct WorkerMediaUploadTarget: Decodable, Equatable, Sendable { + let assetID: String + let bucket: String + let path: String + let uploadURL: String + let method: String + let headers: [String: String] + + init( + assetID: String, + bucket: String, + path: String, + uploadURL: String, + method: String = "PUT", + headers: [String: String] = [:] + ) { + self.assetID = assetID + self.bucket = bucket + self.path = path + self.uploadURL = uploadURL + self.method = method + self.headers = headers + } + + private enum CodingKeys: String, CodingKey { + case assetID = "asset_id" + case assetIDCamel = "assetId" + case bucket + case path + case uploadURL = "upload_url" + case uploadURLCamel = "uploadUrl" + case method + case headers + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + guard let assetID = try container.decodeFirstString(forKeys: [.assetID, .assetIDCamel]), + let bucket = try container.decodeFirstString(forKeys: [.bucket]), + let path = try container.decodeFirstString(forKeys: [.path]), + let uploadURL = try container.decodeFirstString(forKeys: [.uploadURL, .uploadURLCamel]) + else { + throw DecodingError.dataCorrupted( + .init(codingPath: decoder.codingPath, debugDescription: "Missing worker media upload target fields") + ) + } + self.assetID = assetID + self.bucket = bucket + self.path = path + self.uploadURL = uploadURL + self.method = try container.decodeIfPresent(String.self, forKey: .method) ?? "PUT" + self.headers = try container.decodeIfPresent([String: String].self, forKey: .headers) ?? [:] + } +} + +struct WorkerMediaFinalizeRequest: Equatable { + let assetID: String + let sessionID: String + let bucket: String + let path: String + let status: String + let byteSize: Int + let error: String? + + var payload: [String: Any] { + var payload: [String: Any] = [ + "assetId": assetID, + "sessionId": sessionID, + "bucket": bucket, + "path": path, + "status": status, + "byteSize": byteSize + ] + if let error { + payload["error"] = error + } + return payload + } +} + +struct GeminiLiveTokenResponse: Decodable, Equatable { + let token: String + let expiresAt: String + let newSessionExpiresAt: String + let model: String + let websocketBaseURL: String + let queryParameterName: String + let systemInstruction: String? + let runtimeContext: GeminiRuntimeContextEnvelope? + let diagnosticsID: String? + let provider: String? + + private enum CodingKeys: String, CodingKey { + case token + case expiresAt + case newSessionExpiresAt + case model + case websocketBaseURL + case queryParameterName + case systemInstruction + case runtimeContext + case diagnosticsID = "diagnosticsId" + case provider + } + + var credential: GeminiLiveCredential { + GeminiLiveCredential( + token: token, + queryParameterName: queryParameterName.isEmpty ? "access_token" : queryParameterName, + websocketBaseURL: websocketBaseURL.isEmpty ? GeminiConfig.ephemeralTokenWebsocketBaseURL : websocketBaseURL, + model: model.isEmpty ? GeminiConfig.model : model + ) + } +} + +struct GeminiRuntimeContextEnvelope: Decodable, Equatable { + let rawSummary: String + + init(from decoder: Decoder) throws { + if let container = try? decoder.singleValueContainer(), + container.decodeNil() { + rawSummary = "" + return + } + rawSummary = "runtime-context" + } +} + +struct GeminiSpotterRequest: Equatable { + let sessionID: String + let stepID: String + let stepTitle: String + let aiPrompt: String + let expectedObjects: [String] + let preconditions: [String] + let postconditions: [String] + let skipRisk: String + let evidenceRequired: Bool + let imageBase64: String + let imageMimeType: String + let capturedAt: String + let critical: Bool + let allowAIComplete: Bool + let elapsedActiveMs: Int? + + var payload: [String: Any] { + var payload: [String: Any] = [ + "sessionId": sessionID, + "stepId": stepID, + "stepTitle": stepTitle, + "aiPrompt": aiPrompt, + "expectedObjects": expectedObjects, + "preconditions": preconditions, + "postconditions": postconditions, + "skipRisk": skipRisk, + "evidenceRequired": evidenceRequired, + "imageBase64": imageBase64, + "imageMimeType": imageMimeType, + "capturedAt": capturedAt, + "critical": critical, + "allowAIComplete": allowAIComplete + ] + if let elapsedActiveMs { + payload["elapsedActiveMs"] = elapsedActiveMs + } + return payload + } +} + +struct GeminiSpotterResponse: Decodable, Equatable { + let matched: Bool + let confidence: Double + let reason: String + let evidenceTimestamp: String + let threshold: Double? + let model: String? + let autoComplete: Bool + let modelAutoComplete: Bool? + let evidenceWindowSatisfied: Bool? + let activeDurationSatisfied: Bool? + let elapsedActiveMs: Int? + let minActiveSeconds: Double? + let stableObservations: Int? + let stableObservationsRequired: Int? + let advancedToStepIndex: Int? + let completedSop: Bool? + let packageProgressWarning: String? + + private enum CodingKeys: String, CodingKey { + case matched + case confidence + case reason + case evidenceTimestamp + case threshold + case model + case autoComplete + case modelAutoComplete + case evidenceWindowSatisfied + case activeDurationSatisfied + case elapsedActiveMs + case minActiveSeconds + case stableObservations + case stableObservationsRequired + case advancedToStepIndex + case completedSop + case packageProgressWarning + } +} + +struct BackendMemoryLink: Identifiable, Decodable, Equatable { + let id: String +} + +struct BackendPackageExecutionRun: Identifiable, Decodable, Equatable { + let id: String + let packageID: String? + let status: String? + let completedAt: String? + + private enum CodingKeys: String, CodingKey { + case id + case packageID = "package_id" + case status + case completedAt = "completed_at" + } +} + +private struct HealthResponse: Decodable { + let status: String + let service: String +} + +struct ExecutionSessionPatch { + var status: String? + var currentSopID: String? + var currentStepIndex: Int? + var helpRequested: Bool? + var webrtcRoomCode: String? + var lastFrameBucket: String? + var lastFramePath: String? + var endedAt: String? + + var payload: [String: Any] { + var payload: [String: Any] = [:] + if let status { payload["status"] = status } + if let currentSopID { payload["current_sop_id"] = currentSopID } + if let currentStepIndex { payload["current_step_index"] = currentStepIndex } + if let helpRequested { payload["help_requested"] = helpRequested } + if let webrtcRoomCode = webrtcRoomCode?.trimmingCharacters(in: .whitespacesAndNewlines), + !webrtcRoomCode.isEmpty { + payload["webrtc_room_code"] = webrtcRoomCode + } + if let lastFrameBucket = lastFrameBucket?.trimmingCharacters(in: .whitespacesAndNewlines), + !lastFrameBucket.isEmpty { + payload["last_frame_bucket"] = lastFrameBucket + } + if let lastFramePath = lastFramePath?.trimmingCharacters(in: .whitespacesAndNewlines), + !lastFramePath.isEmpty { + payload["last_frame_path"] = lastFramePath + } + if let endedAt { payload["ended_at"] = endedAt } + return payload + } +} + +enum OpsAPIError: LocalizedError { + case notConfigured + case invalidURL(String) + case invalidResponse + case missingWorkerSession + case missingWorkerBearerToken + case server(statusCode: Int, message: String) + + var errorDescription: String? { + switch self { + case .notConfigured: + return "Ops API base URL is not configured." + case .invalidURL(let path): + return "Invalid URL for path \(path)." + case .invalidResponse: + return "The ops-api returned an invalid response." + case .missingWorkerSession: + return "Worker session is missing. Re-bootstrap before writing execution state." + case .missingWorkerBearerToken: + return "Worker bearer token is missing. Re-bootstrap or configure a worker bearer token in Settings." + case .server(let statusCode, let message): + return "ops-api returned HTTP \(statusCode): \(message)" + } + } +} + +enum AdminIngestError: LocalizedError { + case notConfigured + case invalidURL(String) + case invalidResponse + case missingWorkerBearerToken + case server(statusCode: Int, url: String, message: String) + + var errorDescription: String? { + switch self { + case .notConfigured: + return "Admin ingest base URL is not configured." + case .invalidURL(let path): + return "Invalid admin ingest URL for path \(path)." + case .invalidResponse: + return "The admin ingest service returned an invalid response." + case .missingWorkerBearerToken: + return "Worker bearer token is missing. Re-bootstrap or configure a worker bearer token in Settings." + case .server(let statusCode, let url, let message): + return "Admin ingest returned HTTP \(statusCode) from \(url): \(message)" + } + } +} + +final class OpsAPIClient: WorkerAdminAPI { + private let session: URLSession + private let decoder: JSONDecoder + private var workerSessionToken: String? + + init(session: URLSession = { + let config = URLSessionConfiguration.default + config.timeoutIntervalForRequest = 15 + return URLSession(configuration: config) + }()) { + self.session = session + self.decoder = JSONDecoder() + } + + var isConfigured: Bool { + GeminiConfig.isOpsConfigured + } + + var currentWorkerBearerToken: String? { + let liveToken = workerSessionToken?.trimmingCharacters(in: .whitespacesAndNewlines) + if let liveToken, !liveToken.isEmpty { + return liveToken + } + + let configuredToken = GeminiConfig.workerAPIBearerToken.trimmingCharacters(in: .whitespacesAndNewlines) + return configuredToken.isEmpty ? nil : configuredToken + } + + func health() async throws -> String { + let response: HealthResponse = try await performRequest(path: "/health", method: "GET") + return "\(response.status):\(response.service)" + } + + func bootstrap( + loginCode: String?, + email: String?, + platform: String, + label: String + ) async throws -> BootstrapPayload { + var payload: [String: Any] = [ + "platform": platform, + "label": label, + "device_label": label + ] + if let loginCode, !loginCode.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + payload["login_code"] = loginCode + } + if let email, !email.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + payload["email"] = email + } + let response: BootstrapPayload = try await performRequest( + path: "/v1/bootstrap", + method: "POST", + requiresWorkerAuth: false, + payload: payload + ) + workerSessionToken = response.workerSessionToken?.trimmingCharacters(in: .whitespacesAndNewlines) + return response + } + + func createExecutionSession( + workerID: String, + deviceID: String?, + shiftID: String?, + packageID: String?, + packageRunID: String?, + currentSopID: String?, + sopVersion: Int?, + packageVersion: Int?, + status: String = "active" + ) async throws -> BackendExecutionSession { + var payload: [String: Any] = [ + "worker_id": workerID, + "status": status + ] + if let deviceID { payload["device_id"] = deviceID } + if let shiftID { payload["shift_id"] = shiftID } + if let packageID { payload["package_id"] = packageID } + if let packageRunID { payload["package_run_id"] = packageRunID } + if let currentSopID { payload["current_sop_id"] = currentSopID } + if let sopVersion { payload["sop_version"] = sopVersion } + if let packageVersion { payload["package_version"] = packageVersion } + return try await performRequest( + path: "/v1/execution-sessions", + method: "POST", + requiresWorkerAuth: true, + payload: payload + ) + } + + func updateExecutionSession( + id: String, + patch: ExecutionSessionPatch + ) async throws -> BackendExecutionSession { + try await performRequest( + path: "/v1/execution-sessions/\(id)", + method: "PATCH", + requiresWorkerAuth: true, + payload: patch.payload + ) + } + + func postExecutionEvent( + sessionID: String, + eventType: String, + payload: [String: Any] + ) async throws -> BackendExecutionEvent { + try await performRequest( + path: "/v1/execution-sessions/\(sessionID)/events", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "event_type": eventType, + "payload": payload + ] + ) + } + + func createIntervention( + sessionID: String, + type: String, + notes: String? + ) async throws -> BackendIntervention { + var payload: [String: Any] = [ + "session_id": sessionID, + "type": type + ] + if let notes, !notes.isEmpty { + payload["notes"] = notes + } + return try await performRequest( + path: "/v1/interventions", + method: "POST", + requiresWorkerAuth: true, + payload: payload + ) + } + + func registerMediaAsset( + sessionID: String, + bucket: String, + path: String, + assetType: String, + metadata: [String: Any] + ) async throws -> BackendMediaAsset { + try await performRequest( + path: "/v1/media-assets", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "session_id": sessionID, + "bucket": bucket, + "path": path, + "asset_type": assetType, + "metadata": metadata + ] + ) + } + + func createMemoryLink( + sourceID: String, + sourceType: String, + targetID: String, + targetType: String, + linkType: String, + metadata: [String: Any] + ) async throws -> BackendMemoryLink { + try await performRequest( + path: "/v1/memory-links", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "source_id": sourceID, + "source_type": sourceType, + "target_id": targetID, + "target_type": targetType, + "link_type": linkType, + "metadata": metadata + ] + ) + } + + func requestMediaUploadTarget( + assetID: String, + contentType: String, + byteCount: Int + ) async throws -> BackendMediaUploadTarget { + try await performRequest( + path: "/v1/media-assets/\(assetID)/upload-target", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "content_type": contentType, + "byte_count": byteCount + ] + ) + } + + func finalizeMediaAssetUpload( + assetID: String, + uploadState: String = "uploaded", + byteCount: Int, + contentType: String + ) async throws -> BackendMediaAsset { + try await performRequest( + path: "/v1/media-assets/\(assetID)/finalize", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "upload_state": uploadState, + "byte_count": byteCount, + "content_type": contentType + ] + ) + } + + func uploadBinary( + to target: BackendMediaUploadTarget, + data: Data, + contentType: String + ) async throws { + guard let url = URL(string: target.uploadURL), !target.uploadURL.isEmpty else { + throw OpsAPIError.invalidURL(target.uploadURL) + } + + var request = URLRequest(url: url) + request.httpMethod = target.method.isEmpty ? "PUT" : target.method + request.setValue(contentType, forHTTPHeaderField: "Content-Type") + for (name, value) in target.headers { + request.setValue(value, forHTTPHeaderField: name) + } + + let (_, response) = try await session.upload(for: request, from: data) + guard let httpResponse = response as? HTTPURLResponse else { + throw OpsAPIError.invalidResponse + } + guard (200...299).contains(httpResponse.statusCode) else { + throw OpsAPIError.server(statusCode: httpResponse.statusCode, message: "Media upload failed") + } + } + + func sendWorkerLiveHeartbeat(_ heartbeat: WorkerLiveHeartbeatRequest) async throws -> WorkerLiveHeartbeatResponse { + let data = try await performWorkerRequest( + path: "/api/worker/live/heartbeat", + method: "POST", + payload: heartbeat.payload + ) + + do { + return try decoder.decode(WorkerLiveHeartbeatResponse.self, from: data) + } catch { + let body = String(data: data, encoding: .utf8) ?? "" + NSLog("[admin-ingest] Failed decoding /api/worker/live/heartbeat -> %@", body) + throw error + } + } + + func requestWorkerMediaUploadTarget( + sessionID: String, + assetType: String, + filename: String, + contentType: String, + byteSize: Int, + source: String? = nil + ) async throws -> WorkerMediaUploadTarget { + var payload: [String: Any] = [ + "sessionId": sessionID, + "assetType": assetType, + "filename": filename, + "contentType": contentType, + "byteSize": byteSize + ] + if let source, !source.isEmpty { + payload["source"] = source + } + let data = try await performWorkerRequest( + path: "/api/worker/media/upload-target", + method: "POST", + payload: payload + ) + + do { + return try decoder.decode(WorkerMediaUploadTarget.self, from: data) + } catch { + let body = String(data: data, encoding: .utf8) ?? "" + NSLog("[admin-ingest] Failed decoding /api/worker/media/upload-target -> %@", body) + throw error + } + } + + func finalizeWorkerMediaUpload(_ finalize: WorkerMediaFinalizeRequest) async throws { + _ = try await performWorkerRequest( + path: "/api/worker/media/finalize", + method: "POST", + payload: finalize.payload + ) + } + + func sendWorkerTelemetryBatch(_ batch: WorkerTelemetryBatch) async throws { + _ = try await performWorkerRequest( + path: "/api/worker/telemetry", + method: "POST", + payload: batch.payload + ) + } + + func requestGeminiLiveToken( + model: String? = nil, + sessionID: String? = nil + ) async throws -> GeminiLiveTokenResponse { + var payload: [String: Any] = [ + "responseModalities": ["AUDIO"] + ] + if let model = model?.trimmingCharacters(in: .whitespacesAndNewlines), + !model.isEmpty { + payload["model"] = model + } + if let sessionID, !sessionID.isEmpty { + payload["sessionId"] = sessionID + } + + let data = try await performWorkerRequest( + path: "/api/worker/gemini/live-token", + method: "POST", + payload: payload + ) + + do { + return try decoder.decode(GeminiLiveTokenResponse.self, from: data) + } catch { + let body = String(data: data, encoding: .utf8) ?? "" + NSLog("[admin-ingest] Failed decoding /api/worker/gemini/live-token -> %@", body) + throw error + } + } + + func requestGeminiSpotter(_ request: GeminiSpotterRequest) async throws -> GeminiSpotterResponse { + let data = try await performWorkerRequest( + path: "/api/worker/gemini/spotter", + method: "POST", + payload: request.payload + ) + + do { + return try decoder.decode(GeminiSpotterResponse.self, from: data) + } catch { + let body = String(data: data, encoding: .utf8) ?? "" + NSLog("[admin-ingest] Failed decoding /api/worker/gemini/spotter -> %@", body) + throw error + } + } + + func uploadBinary( + to target: WorkerMediaUploadTarget, + data: Data, + contentType: String + ) async throws { + guard let url = URL(string: target.uploadURL), !target.uploadURL.isEmpty else { + throw OpsAPIError.invalidURL(target.uploadURL) + } + + var request = URLRequest(url: url) + request.httpMethod = target.method.isEmpty ? "PUT" : target.method + request.setValue(contentType, forHTTPHeaderField: "Content-Type") + for (name, value) in target.headers { + request.setValue(value, forHTTPHeaderField: name) + } + + let (_, response) = try await session.upload(for: request, from: data) + guard let httpResponse = response as? HTTPURLResponse else { + throw OpsAPIError.invalidResponse + } + guard (200...299).contains(httpResponse.statusCode) else { + throw OpsAPIError.server(statusCode: httpResponse.statusCode, message: "Media upload failed") + } + } + + func closePackageRun( + packageRunID: String, + workerID: String + ) async throws -> BackendPackageExecutionRun { + try await performRequest( + path: "/v1/package-runs/\(packageRunID)/close", + method: "POST", + requiresWorkerAuth: true, + payload: [ + "worker_id": workerID + ] + ) + } + + private func performRequest( + path: String, + method: String, + requiresWorkerAuth: Bool = false, + payload: [String: Any]? = nil + ) async throws -> Response { + guard isConfigured else { + throw OpsAPIError.notConfigured + } + + guard let url = makeURL(path: path, baseURLString: GeminiConfig.opsBaseURL) else { + throw OpsAPIError.invalidURL(path) + } + + var request = URLRequest(url: url) + request.httpMethod = method + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("application/json", forHTTPHeaderField: "Accept") + + if requiresWorkerAuth { + guard let token = workerSessionToken?.trimmingCharacters(in: .whitespacesAndNewlines), + !token.isEmpty + else { + throw OpsAPIError.missingWorkerSession + } + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + + if let payload { + request.httpBody = try JSONSerialization.data(withJSONObject: payload, options: []) + } + + let (data, response) = try await session.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw OpsAPIError.invalidResponse + } + + guard (200...299).contains(httpResponse.statusCode) else { + let message = String(data: data, encoding: .utf8) ?? "Unknown error" + throw OpsAPIError.server(statusCode: httpResponse.statusCode, message: message) + } + + do { + return try decoder.decode(Response.self, from: data) + } catch { + let body = String(data: data, encoding: .utf8) ?? "" + NSLog("[ops-api] Failed decoding %@ -> %@", path, body) + throw error + } + } + + private func makeURL(path: String, baseURLString: String) -> URL? { + let trimmed = baseURLString.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + + let normalizedBase: String + if trimmed.hasPrefix("http://") || trimmed.hasPrefix("https://") { + normalizedBase = trimmed + } else { + normalizedBase = "https://\(trimmed)" + } + + guard var components = URLComponents(string: normalizedBase) else { + return nil + } + let cleanedPath = components.path.trimmingCharacters(in: CharacterSet(charactersIn: "/")) + components.path = "/" + [cleanedPath, path.trimmingCharacters(in: CharacterSet(charactersIn: "/"))] + .filter { !$0.isEmpty } + .joined(separator: "/") + return components.url + } + + private func performWorkerRequest( + path: String, + method: String, + payload: [String: Any]? = nil + ) async throws -> Data { + guard GeminiConfig.isAdminConfigured else { + throw AdminIngestError.notConfigured + } + + guard let url = makeURL(path: path, baseURLString: GeminiConfig.adminBaseURL) else { + throw AdminIngestError.invalidURL(path) + } + + guard let token = currentWorkerBearerToken else { + throw AdminIngestError.missingWorkerBearerToken + } + + var request = URLRequest(url: url) + request.httpMethod = method + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("application/json", forHTTPHeaderField: "Accept") + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + if let payload { + request.httpBody = try JSONSerialization.data(withJSONObject: payload, options: []) + } + + let (data, response) = try await session.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw AdminIngestError.invalidResponse + } + + guard (200...299).contains(httpResponse.statusCode) else { + let message = summarizeResponseBody(data) + NSLog( + "[admin-ingest] %@ %@ -> %d %@", + method, + url.absoluteString, + httpResponse.statusCode, + message + ) + throw AdminIngestError.server( + statusCode: httpResponse.statusCode, + url: url.absoluteString, + message: message + ) + } + + return data + } + + private func summarizeResponseBody(_ data: Data) -> String { + let raw = String(data: data, encoding: .utf8) ?? "Unknown error" + let compact = raw.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression) + .trimmingCharacters(in: .whitespacesAndNewlines) + guard compact.count > 240 else { return compact } + let endIndex = compact.index(compact.startIndex, offsetBy: 240) + return "\(compact[..CFBundleDevelopmentRegion $(DEVELOPMENT_LANGUAGE) CFBundleDisplayName - VisionClaw + Embarcadero CFBundleExecutable $(EXECUTABLE_NAME) CFBundleIdentifier @@ -13,7 +13,7 @@ CFBundleInfoDictionaryVersion 6.0 CFBundleName - VisionClaw + Embarcadero CFBundlePackageType $(PRODUCT_BUNDLE_PACKAGE_TYPE) CFBundleShortVersionString @@ -33,49 +33,44 @@ CFBundleVersion $(CURRENT_PROJECT_VERSION) - - MWDAT AppLinkURLScheme - cameraaccess:// - MetaAppID - - $(META_APP_ID) - ClientToken $(CLIENT_TOKEN) - + MetaAppID + $(META_APP_ID) TeamID $(DEVELOPMENT_TEAM) - UIBackgroundModes - - audio - bluetooth-peripheral - external-accessory - + NSAppTransportSecurity + + NSAllowsLocalNetworking + + NSExceptionDomains + + 100.64.30.99 + + NSExceptionAllowsInsecureHTTPLoads + + NSIncludesSubdomains + + + + NSBluetoothAlwaysUsageDescription Needed to connect to Meta AI Glasses NSBluetoothPeripheralUsageDescription To listen to audio from Meta AI Glasses - UISupportedExternalAccessoryProtocols - - com.meta.ar.wearable - - NSCameraUsageDescription This app uses the camera for iPhone testing mode, allowing you to test the AI assistant pipeline without glasses. NSMicrophoneUsageDescription This app uses the microphone to have voice conversations with the AI assistant while streaming from your glasses. NSPhotoLibraryAddUsageDescription This app needs access to save photos captured from your glasses. - NSAppTransportSecurity - - NSAllowsLocalNetworking - - + NSSpeechRecognitionUsageDescription + This app uses speech recognition so operators can check SOP items hands-free while capturing. UIApplicationSceneManifest UIApplicationSupportsMultipleScenes @@ -83,14 +78,22 @@ UIApplicationSupportsIndirectInputEvents - + UIBackgroundModes + + audio + bluetooth-peripheral + external-accessory + UILaunchScreen UIRequiredDeviceCapabilities armv7 - + UISupportedExternalAccessoryProtocols + + com.meta.ar.wearable + UISupportedInterfaceOrientations UIInterfaceOrientationPortrait diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift deleted file mode 100644 index 1f48ac6f..00000000 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ /dev/null @@ -1,140 +0,0 @@ -import Foundation - -enum OpenClawConnectionState: Equatable { - case notConfigured - case checking - case connected - case unreachable(String) -} - -@MainActor -class OpenClawBridge: ObservableObject { - @Published var lastToolCallStatus: ToolCallStatus = .idle - @Published var connectionState: OpenClawConnectionState = .notConfigured - - private let session: URLSession - private let pingSession: URLSession - private var sessionKey: String - private var conversationHistory: [[String: String]] = [] - private let maxHistoryTurns = 10 - - private static let stableSessionKey = "agent:main:glass" - - init() { - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 120 - self.session = URLSession(configuration: config) - - let pingConfig = URLSessionConfiguration.default - pingConfig.timeoutIntervalForRequest = 5 - self.pingSession = URLSession(configuration: pingConfig) - - self.sessionKey = OpenClawBridge.stableSessionKey - } - - func checkConnection() async { - guard GeminiConfig.isOpenClawConfigured else { - connectionState = .notConfigured - return - } - connectionState = .checking - guard let url = URL(string: "\(GeminiConfig.openClawHost):\(GeminiConfig.openClawPort)/v1/chat/completions") else { - connectionState = .unreachable("Invalid URL") - return - } - var request = URLRequest(url: url) - request.httpMethod = "GET" - request.setValue("Bearer \(GeminiConfig.openClawGatewayToken)", forHTTPHeaderField: "Authorization") - request.setValue("glass", forHTTPHeaderField: "x-openclaw-message-channel") - do { - let (_, response) = try await pingSession.data(for: request) - if let http = response as? HTTPURLResponse, (200...499).contains(http.statusCode) { - connectionState = .connected - NSLog("[OpenClaw] Gateway reachable (HTTP %d)", http.statusCode) - } else { - connectionState = .unreachable("Unexpected response") - } - } catch { - connectionState = .unreachable(error.localizedDescription) - NSLog("[OpenClaw] Gateway unreachable: %@", error.localizedDescription) - } - } - - func resetSession() { - conversationHistory = [] - NSLog("[OpenClaw] Session reset (key retained: %@)", sessionKey) - } - - // MARK: - Agent Chat (session continuity via x-openclaw-session-key header) - - func delegateTask( - task: String, - toolName: String = "execute" - ) async -> ToolResult { - lastToolCallStatus = .executing(toolName) - - guard let url = URL(string: "\(GeminiConfig.openClawHost):\(GeminiConfig.openClawPort)/v1/chat/completions") else { - lastToolCallStatus = .failed(toolName, "Invalid URL") - return .failure("Invalid gateway URL") - } - - // Append the new user message to conversation history - conversationHistory.append(["role": "user", "content": task]) - - // Trim history to keep only the most recent turns (user+assistant pairs) - if conversationHistory.count > maxHistoryTurns * 2 { - conversationHistory = Array(conversationHistory.suffix(maxHistoryTurns * 2)) - } - - var request = URLRequest(url: url) - request.httpMethod = "POST" - request.setValue("Bearer \(GeminiConfig.openClawGatewayToken)", forHTTPHeaderField: "Authorization") - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.setValue(sessionKey, forHTTPHeaderField: "x-openclaw-session-key") - request.setValue("glass", forHTTPHeaderField: "x-openclaw-message-channel") - - let body: [String: Any] = [ - "model": "openclaw", - "messages": conversationHistory, - "stream": false - ] - - NSLog("[OpenClaw] Sending %d messages in conversation", conversationHistory.count) - - do { - request.httpBody = try JSONSerialization.data(withJSONObject: body) - let (data, response) = try await session.data(for: request) - let httpResponse = response as? HTTPURLResponse - - guard let statusCode = httpResponse?.statusCode, (200...299).contains(statusCode) else { - let code = httpResponse?.statusCode ?? 0 - let bodyStr = String(data: data, encoding: .utf8) ?? "no body" - NSLog("[OpenClaw] Chat failed: HTTP %d - %@", code, String(bodyStr.prefix(200))) - lastToolCallStatus = .failed(toolName, "HTTP \(code)") - return .failure("Agent returned HTTP \(code)") - } - - if let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], - let choices = json["choices"] as? [[String: Any]], - let first = choices.first, - let message = first["message"] as? [String: Any], - let content = message["content"] as? String { - // Append assistant response to history for continuity - conversationHistory.append(["role": "assistant", "content": content]) - NSLog("[OpenClaw] Agent result: %@", String(content.prefix(200))) - lastToolCallStatus = .completed(toolName) - return .success(content) - } - - let raw = String(data: data, encoding: .utf8) ?? "OK" - conversationHistory.append(["role": "assistant", "content": raw]) - NSLog("[OpenClaw] Agent raw: %@", String(raw.prefix(200))) - lastToolCallStatus = .completed(toolName) - return .success(raw) - } catch { - NSLog("[OpenClaw] Agent error: %@", error.localizedDescription) - lastToolCallStatus = .failed(toolName, error.localizedDescription) - return .failure("Agent error: \(error.localizedDescription)") - } - } -} diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift deleted file mode 100644 index 8ceeef59..00000000 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift +++ /dev/null @@ -1,191 +0,0 @@ -import Foundation - -class OpenClawEventClient { - var onNotification: ((String) -> Void)? - - private var webSocketTask: URLSessionWebSocketTask? - private var session: URLSession? - private var isConnected = false - private var shouldReconnect = false - private var reconnectDelay: TimeInterval = 2 - private let maxReconnectDelay: TimeInterval = 30 - - func connect() { - guard GeminiConfig.isOpenClawConfigured else { - NSLog("[OpenClawWS] Not configured, skipping") - return - } - - shouldReconnect = true - reconnectDelay = 2 - establishConnection() - } - - func disconnect() { - shouldReconnect = false - isConnected = false - webSocketTask?.cancel(with: .normalClosure, reason: nil) - webSocketTask = nil - session?.invalidateAndCancel() - session = nil - NSLog("[OpenClawWS] Disconnected") - } - - // MARK: - Private - - private func establishConnection() { - let host = GeminiConfig.openClawHost - .replacingOccurrences(of: "http://", with: "") - .replacingOccurrences(of: "https://", with: "") - let port = GeminiConfig.openClawPort - guard let url = URL(string: "ws://\(host):\(port)") else { - NSLog("[OpenClawWS] Invalid URL") - return - } - - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 30 - session = URLSession(configuration: config) - webSocketTask = session?.webSocketTask(with: url) - webSocketTask?.resume() - - NSLog("[OpenClawWS] Connecting to %@", url.absoluteString) - startReceiving() - } - - private func startReceiving() { - webSocketTask?.receive { [weak self] result in - guard let self else { return } - switch result { - case .success(let message): - switch message { - case .string(let text): - self.handleMessage(text) - case .data(let data): - if let text = String(data: data, encoding: .utf8) { - self.handleMessage(text) - } - @unknown default: - break - } - self.startReceiving() - case .failure(let error): - NSLog("[OpenClawWS] Receive error: %@", error.localizedDescription) - self.isConnected = false - self.scheduleReconnect() - } - } - } - - private func handleMessage(_ text: String) { - guard let data = text.data(using: .utf8), - let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], - let type = json["type"] as? String else { return } - - if type == "event" { - handleEvent(json) - } else if type == "res" { - let ok = json["ok"] as? Bool ?? false - if ok { - NSLog("[OpenClawWS] Connected and authenticated") - isConnected = true - reconnectDelay = 2 - } else { - let error = json["error"] as? [String: Any] - let msg = error?["message"] as? String ?? "unknown" - NSLog("[OpenClawWS] Connect failed: %@", msg) - } - } - } - - private func handleEvent(_ json: [String: Any]) { - guard let event = json["event"] as? String else { return } - let payload = json["payload"] as? [String: Any] ?? [:] - - switch event { - case "connect.challenge": - sendConnectHandshake() - - case "heartbeat": - handleHeartbeatEvent(payload) - - case "cron": - handleCronEvent(payload) - - default: - break - } - } - - private func sendConnectHandshake() { - let connectMsg: [String: Any] = [ - "type": "req", - "id": UUID().uuidString, - "method": "connect", - "params": [ - "minProtocol": 3, - "maxProtocol": 3, - "client": [ - "id": "ios-node", - "displayName": "VisionClaw Glass", - "version": "1.0", - "platform": "ios", - "mode": "node" - ], - "role": "node", - "scopes": [] as [String], - "caps": ["camera", "voice"], - "commands": [] as [String], - "permissions": [:] as [String: Any], - "auth": [ - "token": GeminiConfig.openClawGatewayToken - ] - ] as [String: Any] - ] - - guard let data = try? JSONSerialization.data(withJSONObject: connectMsg), - let string = String(data: data, encoding: .utf8) else { return } - webSocketTask?.send(.string(string)) { error in - if let error { - NSLog("[OpenClawWS] Handshake send error: %@", error.localizedDescription) - } - } - } - - private func handleHeartbeatEvent(_ payload: [String: Any]) { - let status = payload["status"] as? String ?? "" - // Only notify if there's actual content (not empty/silent heartbeats) - guard status == "sent", let preview = payload["preview"] as? String, !preview.isEmpty else { - return - } - - let silent = payload["silent"] as? Bool ?? false - guard !silent else { return } - - NSLog("[OpenClawWS] Heartbeat notification: %@", String(preview.prefix(100))) - onNotification?("[Notification from your assistant] \(preview)") - } - - private func handleCronEvent(_ payload: [String: Any]) { - let action = payload["action"] as? String ?? "" - guard action == "finished" else { return } - - let summary = payload["summary"] as? String - ?? payload["result"] as? String - ?? "" - guard !summary.isEmpty else { return } - - NSLog("[OpenClawWS] Cron notification: %@", String(summary.prefix(100))) - onNotification?("[Scheduled update] \(summary)") - } - - private func scheduleReconnect() { - guard shouldReconnect else { return } - NSLog("[OpenClawWS] Reconnecting in %.0fs", reconnectDelay) - DispatchQueue.main.asyncAfter(deadline: .now() + reconnectDelay) { [weak self] in - guard let self, self.shouldReconnect else { return } - self.reconnectDelay = min(self.reconnectDelay * 2, self.maxReconnectDelay) - self.establishConnection() - } - } -} diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift deleted file mode 100644 index c7222a28..00000000 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ /dev/null @@ -1,106 +0,0 @@ -import Foundation - -// MARK: - Gemini Tool Call (parsed from server JSON) - -struct GeminiFunctionCall { - let id: String - let name: String - let args: [String: Any] -} - -struct GeminiToolCall { - let functionCalls: [GeminiFunctionCall] - - init?(json: [String: Any]) { - guard let toolCall = json["toolCall"] as? [String: Any], - let calls = toolCall["functionCalls"] as? [[String: Any]] else { - return nil - } - self.functionCalls = calls.compactMap { call in - guard let id = call["id"] as? String, - let name = call["name"] as? String else { return nil } - let args = call["args"] as? [String: Any] ?? [:] - return GeminiFunctionCall(id: id, name: name, args: args) - } - } -} - -// MARK: - Gemini Tool Call Cancellation - -struct GeminiToolCallCancellation { - let ids: [String] - - init?(json: [String: Any]) { - guard let cancellation = json["toolCallCancellation"] as? [String: Any], - let ids = cancellation["ids"] as? [String] else { - return nil - } - self.ids = ids - } -} - -// MARK: - Tool Result - -enum ToolResult { - case success(String) - case failure(String) - - var responseValue: [String: Any] { - switch self { - case .success(let result): - return ["result": result] - case .failure(let error): - return ["error": error] - } - } -} - -// MARK: - Tool Call Status (for UI) - -enum ToolCallStatus: Equatable { - case idle - case executing(String) - case completed(String) - case failed(String, String) - case cancelled(String) - - var displayText: String { - switch self { - case .idle: return "" - case .executing(let name): return "Running: \(name)..." - case .completed(let name): return "Done: \(name)" - case .failed(let name, let err): return "Failed: \(name) - \(err)" - case .cancelled(let name): return "Cancelled: \(name)" - } - } - - var isActive: Bool { - if case .executing = self { return true } - return false - } -} - -// MARK: - Tool Declarations (for Gemini setup message) - -enum ToolDeclarations { - - static func allDeclarations() -> [[String: Any]] { - return [execute] - } - - static let execute: [String: Any] = [ - "name": "execute", - "description": "Your only way to take action. You have no memory, storage, or ability to do anything on your own -- use this tool for everything: sending messages, searching the web, adding to lists, setting reminders, creating notes, research, drafts, scheduling, smart home control, app interactions, or any request that goes beyond answering a question. When in doubt, use this tool.", - "parameters": [ - "type": "object", - "properties": [ - "task": [ - "type": "string", - "description": "Clear, detailed description of what to do. Include all relevant context: names, content, platforms, quantities, etc." - ] - ], - "required": ["task"] - ] as [String: Any], - "behavior": "BLOCKING" - ] -} diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift deleted file mode 100644 index a20babf4..00000000 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ /dev/null @@ -1,108 +0,0 @@ -import Foundation - -@MainActor -class ToolCallRouter { - private let bridge: OpenClawBridge - private var inFlightTasks: [String: Task] = [:] - private var consecutiveFailures = 0 - private let maxConsecutiveFailures = 3 - - init(bridge: OpenClawBridge) { - self.bridge = bridge - } - - /// Route a tool call from Gemini to OpenClaw. Calls sendResponse with the - /// JSON dictionary to send back as a toolResponse message. - func handleToolCall( - _ call: GeminiFunctionCall, - sendResponse: @escaping ([String: Any]) -> Void - ) { - let callId = call.id - let callName = call.name - - NSLog("[ToolCall] Received: %@ (id: %@) args: %@", - callName, callId, String(describing: call.args)) - - // Circuit breaker: stop sending tool calls after repeated failures - if consecutiveFailures >= maxConsecutiveFailures { - NSLog("[ToolCall] Circuit breaker open (%d consecutive failures), rejecting %@", - consecutiveFailures, callId) - let errorResult: ToolResult = .failure( - "Tool execution is temporarily unavailable after \(consecutiveFailures) consecutive failures. " + - "Please tell the user you cannot complete this action right now and suggest they check their OpenClaw gateway connection." - ) - let response = buildToolResponse(callId: callId, name: callName, result: errorResult) - sendResponse(response) - return - } - - let task = Task { @MainActor in - let taskDesc = call.args["task"] as? String ?? String(describing: call.args) - let result = await bridge.delegateTask(task: taskDesc, toolName: callName) - - guard !Task.isCancelled else { - NSLog("[ToolCall] Task %@ was cancelled, skipping response", callId) - return - } - - switch result { - case .success: - self.consecutiveFailures = 0 - case .failure: - self.consecutiveFailures += 1 - } - - NSLog("[ToolCall] Result for %@ (id: %@): %@", - callName, callId, String(describing: result)) - - let response = self.buildToolResponse(callId: callId, name: callName, result: result) - sendResponse(response) - - self.inFlightTasks.removeValue(forKey: callId) - } - - inFlightTasks[callId] = task - } - - /// Cancel specific in-flight tool calls (from toolCallCancellation) - func cancelToolCalls(ids: [String]) { - for id in ids { - if let task = inFlightTasks[id] { - NSLog("[ToolCall] Cancelling in-flight call: %@", id) - task.cancel() - inFlightTasks.removeValue(forKey: id) - } - } - bridge.lastToolCallStatus = .cancelled(ids.first ?? "unknown") - } - - /// Cancel all in-flight tool calls (on session stop) - func cancelAll() { - for (id, task) in inFlightTasks { - NSLog("[ToolCall] Cancelling in-flight call: %@", id) - task.cancel() - } - inFlightTasks.removeAll() - consecutiveFailures = 0 - } - - // MARK: - Private - - private func buildToolResponse( - callId: String, - name: String, - result: ToolResult - ) -> [String: Any] { - return [ - "toolResponse": [ - "functionResponses": [ - [ - "id": callId, - "name": name, - "response": result.responseValue - ] - ] - ] - ] - } -} diff --git a/samples/CameraAccess/CameraAccess/Secrets.swift.example b/samples/CameraAccess/CameraAccess/Secrets.swift.example index af66099a..e2a47705 100644 --- a/samples/CameraAccess/CameraAccess/Secrets.swift.example +++ b/samples/CameraAccess/CameraAccess/Secrets.swift.example @@ -4,17 +4,26 @@ import Foundation enum Secrets { - // REQUIRED: Get your key at https://aistudio.google.com/apikey - static let geminiAPIKey = "YOUR_GEMINI_API_KEY" - - // OPTIONAL: OpenClaw gateway config (for agentic tool-calling) - // Use your Mac's Bonjour hostname (run: scutil --get LocalHostName) - static let openClawHost = "http://YOUR_MAC_HOSTNAME.local" - static let openClawPort = 18789 - static let openClawHookToken = "YOUR_OPENCLAW_HOOK_TOKEN" - static let openClawGatewayToken = "YOUR_OPENCLAW_GATEWAY_TOKEN" - - // OPTIONAL: WebRTC signaling server URL (for live POV streaming) - // Run: cd samples/CameraAccess/server && npm install && npm start - static let webrtcSignalingURL = "ws://YOUR_MAC_IP:8080" + // OPTIONAL: Stable device ID for SOP/heartbeat telemetry + static let deviceID = "YOUR_DEVICE_UUID" + + // Prototype worker identity used during ops-api bootstrap + static let workerLoginCode = "PD-0101" + static let workerEmail = "" + + // Optional fallback bearer for /api/worker/* before bootstrap returns a session token. + // Gemini credentials and prompts are owned by Admin AI Settings on the server. + static let workerAPIBearerToken = "" + + // Operations API URL for worker bootstrap, sessions, events, and media registration. + static let opsBaseURL = "https://admin.embarcaderolabs.cloud" + + // Admin API URL for worker live ingest (/api/worker/*). + static let adminBaseURL = "https://admin.embarcaderolabs.cloud" + + // Signaling service URL for live supervisor jump-in. + static let signalBaseURL = "https://signal.embarcaderolabs.cloud" + + // Backward-compatible legacy signaling URL used by older code paths. + static let webrtcSignalingURL = "wss://signal.embarcaderolabs.cloud" } diff --git a/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift b/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift index 8d63a557..fa1ee1af 100644 --- a/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift +++ b/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift @@ -1,67 +1,162 @@ import Foundation +import Security final class SettingsManager { static let shared = SettingsManager() + private enum RuntimeURL { + static let adminBaseURL = "https://admin.embarcaderolabs.cloud" + static let opsBaseURL = "https://admin.embarcaderolabs.cloud" + static let signalBaseURL = "https://signal.embarcaderolabs.cloud" + } + private let defaults = UserDefaults.standard + private let keychain = KeychainStore( + service: Bundle.main.bundleIdentifier ?? "com.embarcaderolabs.visionclaw" + ) private enum Key: String { - case geminiAPIKey - case openClawHost - case openClawPort - case openClawHookToken - case openClawGatewayToken - case geminiSystemPrompt + case deviceID + case workerLoginCode + case workerLoginCodeMigratedFromFastFoodDefault + case workerEmail + case workerAPIBearerToken + case opsBaseURL + case adminBaseURL + case signalBaseURL case webrtcSignalingURL case speakerOutputEnabled case videoStreamingEnabled + case videoStreamingDefaultMigratedToOnDemand case proactiveNotificationsEnabled } - private init() {} + private init() { + migrateOnDemandVideoDefaultIfNeeded() + } + + private func migrateOnDemandVideoDefaultIfNeeded() { + guard !defaults.bool(forKey: Key.videoStreamingDefaultMigratedToOnDemand.rawValue) else { return } + if defaults.object(forKey: Key.videoStreamingEnabled.rawValue) != nil { + defaults.set(false, forKey: Key.videoStreamingEnabled.rawValue) + } + defaults.set(true, forKey: Key.videoStreamingDefaultMigratedToOnDemand.rawValue) + } + + // MARK: - Worker - // MARK: - Gemini + var deviceID: String { + get { + if let stored = defaults.string(forKey: Key.deviceID.rawValue), !stored.isEmpty { + return stored + } - var geminiAPIKey: String { - get { defaults.string(forKey: Key.geminiAPIKey.rawValue) ?? Secrets.geminiAPIKey } - set { defaults.set(newValue, forKey: Key.geminiAPIKey.rawValue) } + if !Secrets.deviceID.isEmpty, Secrets.deviceID != "YOUR_DEVICE_UUID" { + defaults.set(Secrets.deviceID, forKey: Key.deviceID.rawValue) + return Secrets.deviceID + } + + let generated = UUID().uuidString + defaults.set(generated, forKey: Key.deviceID.rawValue) + return generated + } + set { defaults.set(newValue, forKey: Key.deviceID.rawValue) } } - var geminiSystemPrompt: String { - get { defaults.string(forKey: Key.geminiSystemPrompt.rawValue) ?? GeminiConfig.defaultSystemInstruction } - set { defaults.set(newValue, forKey: Key.geminiSystemPrompt.rawValue) } + var workerLoginCode: String { + get { + if let stored = defaults.string(forKey: Key.workerLoginCode.rawValue) { + let trimmed = stored.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.caseInsensitiveCompare("EMBC-0001") == .orderedSame, + Secrets.workerLoginCode.caseInsensitiveCompare("EMBC-0001") != .orderedSame, + defaults.bool(forKey: Key.workerLoginCodeMigratedFromFastFoodDefault.rawValue) == false { + defaults.set(Secrets.workerLoginCode, forKey: Key.workerLoginCode.rawValue) + defaults.set(true, forKey: Key.workerLoginCodeMigratedFromFastFoodDefault.rawValue) + return Secrets.workerLoginCode + } + + return stored + } + + return Secrets.workerLoginCode + } + set { defaults.set(newValue, forKey: Key.workerLoginCode.rawValue) } } - // MARK: - OpenClaw + var workerEmail: String { + get { defaults.string(forKey: Key.workerEmail.rawValue) ?? Secrets.workerEmail } + set { defaults.set(newValue, forKey: Key.workerEmail.rawValue) } + } - var openClawHost: String { - get { defaults.string(forKey: Key.openClawHost.rawValue) ?? Secrets.openClawHost } - set { defaults.set(newValue, forKey: Key.openClawHost.rawValue) } + var workerAPIBearerToken: String { + get { + let current = secureString(for: .workerAPIBearerToken, fallback: Secrets.workerAPIBearerToken) + if !current.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return current + } + if let legacy = keychain.string(for: "openClawBearerToken"), + !legacy.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + keychain.set(legacy, for: Key.workerAPIBearerToken.rawValue) + keychain.removeValue(for: "openClawBearerToken") + return legacy + } + if let legacy = defaults.string(forKey: "openClawBearerToken"), + !legacy.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + keychain.set(legacy, for: Key.workerAPIBearerToken.rawValue) + defaults.removeObject(forKey: "openClawBearerToken") + return legacy + } + return current + } + set { setSecureString(newValue, for: .workerAPIBearerToken) } } - var openClawPort: Int { + var opsBaseURL: String { get { - let stored = defaults.integer(forKey: Key.openClawPort.rawValue) - return stored != 0 ? stored : Secrets.openClawPort + if let stored = defaults.string(forKey: Key.opsBaseURL.rawValue), + Self.isUsableRuntimeURL(stored) { + return migratedRuntimeURL(stored, for: .opsBaseURL) + } + return migratedRuntimeURL(Secrets.opsBaseURL, for: .opsBaseURL) } - set { defaults.set(newValue, forKey: Key.openClawPort.rawValue) } + set { defaults.set(newValue, forKey: Key.opsBaseURL.rawValue) } } - var openClawHookToken: String { - get { defaults.string(forKey: Key.openClawHookToken.rawValue) ?? Secrets.openClawHookToken } - set { defaults.set(newValue, forKey: Key.openClawHookToken.rawValue) } + var adminBaseURL: String { + get { + if let stored = defaults.string(forKey: Key.adminBaseURL.rawValue), + Self.isUsableRuntimeURL(stored) { + return migratedRuntimeURL(stored, for: .adminBaseURL) + } + return migratedRuntimeURL(Secrets.adminBaseURL, for: .adminBaseURL) + } + set { defaults.set(newValue, forKey: Key.adminBaseURL.rawValue) } } - var openClawGatewayToken: String { - get { defaults.string(forKey: Key.openClawGatewayToken.rawValue) ?? Secrets.openClawGatewayToken } - set { defaults.set(newValue, forKey: Key.openClawGatewayToken.rawValue) } + var signalBaseURL: String { + get { + if let stored = defaults.string(forKey: Key.signalBaseURL.rawValue), + Self.isUsableRuntimeURL(stored) { + return migratedRuntimeURL(stored, for: .signalBaseURL) + } + if let legacy = defaults.string(forKey: Key.webrtcSignalingURL.rawValue), + Self.isUsableRuntimeURL(legacy) { + return migratedRuntimeURL(Self.normalizeSignalBaseURL(legacy), for: .signalBaseURL) + } + return migratedRuntimeURL(Secrets.signalBaseURL, for: .signalBaseURL) + } + set { defaults.set(newValue, forKey: Key.signalBaseURL.rawValue) } } // MARK: - WebRTC var webrtcSignalingURL: String { - get { defaults.string(forKey: Key.webrtcSignalingURL.rawValue) ?? Secrets.webrtcSignalingURL } - set { defaults.set(newValue, forKey: Key.webrtcSignalingURL.rawValue) } + get { Self.normalizeWebSocketURL(signalBaseURL) } + set { + let normalized = Self.normalizeSignalBaseURL(newValue) + defaults.set(normalized, forKey: Key.signalBaseURL.rawValue) + defaults.set(normalized, forKey: Key.webrtcSignalingURL.rawValue) + } } // MARK: - Audio @@ -74,7 +169,7 @@ final class SettingsManager { // MARK: - Video var videoStreamingEnabled: Bool { - get { defaults.object(forKey: Key.videoStreamingEnabled.rawValue) as? Bool ?? true } + get { defaults.object(forKey: Key.videoStreamingEnabled.rawValue) as? Bool ?? false } set { defaults.set(newValue, forKey: Key.videoStreamingEnabled.rawValue) } } @@ -88,11 +183,178 @@ final class SettingsManager { // MARK: - Reset func resetAll() { - for key in [Key.geminiAPIKey, .geminiSystemPrompt, .openClawHost, .openClawPort, - .openClawHookToken, .openClawGatewayToken, .webrtcSignalingURL, - .speakerOutputEnabled, .videoStreamingEnabled, + for key in [Key.workerLoginCode, .workerLoginCodeMigratedFromFastFoodDefault, .workerEmail, .opsBaseURL, .adminBaseURL, .signalBaseURL, + .webrtcSignalingURL, + .deviceID, .speakerOutputEnabled, .videoStreamingEnabled, .proactiveNotificationsEnabled] { defaults.removeObject(forKey: key.rawValue) } + for key in [Key.workerAPIBearerToken] { + defaults.removeObject(forKey: key.rawValue) + keychain.removeValue(for: key.rawValue) + } + for legacy in [ + "geminiAPIKey", + "openClawBearerToken", + "openClawHookToken", + "openClawGatewayToken", + "openClawHost", + "openClawPort", + "openClawTailscaleIP", + "geminiSystemPrompt" + ] { + defaults.removeObject(forKey: legacy) + keychain.removeValue(for: legacy) + } + } + + private func secureString(for key: Key, fallback: String) -> String { + if let stored = keychain.string(for: key.rawValue), !stored.isEmpty { + return stored + } + + if let legacy = defaults.string(forKey: key.rawValue), !legacy.isEmpty { + keychain.set(legacy, for: key.rawValue) + defaults.removeObject(forKey: key.rawValue) + return legacy + } + + if !fallback.isEmpty, !fallback.contains("YOUR_") { + keychain.set(fallback, for: key.rawValue) + return fallback + } + + return fallback + } + + private func setSecureString(_ value: String, for key: Key) { + let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) + defaults.removeObject(forKey: key.rawValue) + if trimmed.isEmpty { + keychain.removeValue(for: key.rawValue) + } else { + keychain.set(trimmed, for: key.rawValue) + } + } + + private func migratedRuntimeURL(_ raw: String, for key: Key) -> String { + let migrated = Self.migrateRuntimeURL(raw, for: key) + if migrated != raw { + defaults.set(migrated, forKey: key.rawValue) + if key == .signalBaseURL { + defaults.set(migrated, forKey: Key.webrtcSignalingURL.rawValue) + } + } + return migrated + } + + private static func migrateRuntimeURL(_ raw: String, for key: Key) -> String { + let normalized = normalizeSignalBaseURL(raw) + let lowercased = normalized.lowercased() + + if lowercased.contains("embarcadero-admin-705096377819.us-central1.run.app") { + if key == .signalBaseURL { + return RuntimeURL.signalBaseURL + } + if key == .opsBaseURL { + return RuntimeURL.opsBaseURL + } + return RuntimeURL.adminBaseURL + } + + if lowercased.contains("embarcadero-signal-705096377819.us-central1.run.app") { + return RuntimeURL.signalBaseURL + } + + return normalized + } + + private static func normalizeSignalBaseURL(_ raw: String) -> String { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + if trimmed.hasPrefix("wss://") { + return "https://" + String(trimmed.dropFirst("wss://".count)) + } + if trimmed.hasPrefix("ws://") { + return "http://" + String(trimmed.dropFirst("ws://".count)) + } + if trimmed.hasPrefix("https://") || trimmed.hasPrefix("http://") { + return trimmed + } + return "https://\(trimmed)" + } + + private static func isUsableRuntimeURL(_ raw: String) -> Bool { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return false } + + let blockedMarkers = [ + "YOUR_", + "YOUR_MAC_IP", + "example.com", + ] + + return !blockedMarkers.contains { trimmed.localizedCaseInsensitiveContains($0) } + } + + private static func normalizeWebSocketURL(_ raw: String) -> String { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + if trimmed.hasPrefix("wss://") || trimmed.hasPrefix("ws://") { + return trimmed + } + if trimmed.hasPrefix("https://") { + return "wss://" + String(trimmed.dropFirst("https://".count)) + } + if trimmed.hasPrefix("http://") { + return "ws://" + String(trimmed.dropFirst("http://".count)) + } + return "wss://\(trimmed)" + } +} + +private struct KeychainStore { + let service: String + + func string(for key: String) -> String? { + var query = baseQuery(for: key) + query[kSecReturnData as String] = kCFBooleanTrue + query[kSecMatchLimit as String] = kSecMatchLimitOne + + var result: AnyObject? + let status = SecItemCopyMatching(query as CFDictionary, &result) + guard status == errSecSuccess, + let data = result as? Data, + let value = String(data: data, encoding: .utf8) + else { + return nil + } + return value + } + + func set(_ value: String, for key: String) { + let data = Data(value.utf8) + let query = baseQuery(for: key) + let attributes = [kSecValueData as String: data] + let status = SecItemUpdate(query as CFDictionary, attributes as CFDictionary) + + if status == errSecItemNotFound { + var item = query + item[kSecValueData as String] = data + item[kSecAttrAccessible as String] = kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly + SecItemAdd(item as CFDictionary, nil) + } + } + + func removeValue(for key: String) { + SecItemDelete(baseQuery(for: key) as CFDictionary) + } + + private func baseQuery(for key: String) -> [String: Any] { + [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: key, + ] } } diff --git a/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift b/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift index 8e22fe33..976e610c 100644 --- a/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift +++ b/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift @@ -4,104 +4,71 @@ struct SettingsView: View { @Environment(\.dismiss) private var dismiss private let settings = SettingsManager.shared - @State private var geminiAPIKey: String = "" - @State private var openClawHost: String = "" - @State private var openClawPort: String = "" - @State private var openClawHookToken: String = "" - @State private var openClawGatewayToken: String = "" - @State private var geminiSystemPrompt: String = "" + @State private var workerLoginCode: String = "" + @State private var workerEmail: String = "" + @State private var workerAPIBearerToken: String = "" + @State private var opsBaseURL: String = "" + @State private var adminBaseURL: String = "" + @State private var signalBaseURL: String = "" @State private var webrtcSignalingURL: String = "" @State private var speakerOutputEnabled: Bool = false - @State private var videoStreamingEnabled: Bool = true + @State private var videoStreamingEnabled: Bool = false @State private var proactiveNotificationsEnabled: Bool = true + @State private var showAdvancedBackend = false @State private var showResetConfirmation = false var body: some View { NavigationView { Form { - Section(header: Text("Gemini API")) { + Section(header: Text("Worker")) { VStack(alignment: .leading, spacing: 4) { - Text("API Key") + Text("Worker Email") .font(.caption) .foregroundColor(.secondary) - TextField("Enter Gemini API key", text: $geminiAPIKey) - .autocapitalization(.none) - .disableAutocorrection(true) - .font(.system(.body, design: .monospaced)) - } - } - - Section(header: Text("System Prompt"), footer: Text("Customize the AI assistant's behavior and personality. Changes take effect on the next Gemini session.")) { - TextEditor(text: $geminiSystemPrompt) - .font(.system(.body, design: .monospaced)) - .frame(minHeight: 200) - } - - Section(header: Text("OpenClaw"), footer: Text("Connect to an OpenClaw gateway running on your Mac for agentic tool-calling.")) { - VStack(alignment: .leading, spacing: 4) { - Text("Host") - .font(.caption) - .foregroundColor(.secondary) - TextField("http://your-mac.local", text: $openClawHost) - .autocapitalization(.none) - .disableAutocorrection(true) - .keyboardType(.URL) - .font(.system(.body, design: .monospaced)) - } - - VStack(alignment: .leading, spacing: 4) { - Text("Port") - .font(.caption) - .foregroundColor(.secondary) - TextField("18789", text: $openClawPort) - .keyboardType(.numberPad) - .font(.system(.body, design: .monospaced)) - } - - VStack(alignment: .leading, spacing: 4) { - Text("Hook Token") - .font(.caption) - .foregroundColor(.secondary) - TextField("Hook token", text: $openClawHookToken) + TextField("worker@company.com", text: $workerEmail) .autocapitalization(.none) .disableAutocorrection(true) + .keyboardType(.emailAddress) + .textInputAutocapitalization(.never) .font(.system(.body, design: .monospaced)) } VStack(alignment: .leading, spacing: 4) { - Text("Gateway Token") + Text("Login Code") .font(.caption) .foregroundColor(.secondary) - TextField("Gateway auth token", text: $openClawGatewayToken) + TextField("PD-0101", text: $workerLoginCode) .autocapitalization(.none) .disableAutocorrection(true) .font(.system(.body, design: .monospaced)) } } - Section(header: Text("WebRTC")) { - VStack(alignment: .leading, spacing: 4) { - Text("Signaling URL") - .font(.caption) - .foregroundColor(.secondary) - TextField("wss://your-server.example.com", text: $webrtcSignalingURL) - .autocapitalization(.none) - .disableAutocorrection(true) - .keyboardType(.URL) - .font(.system(.body, design: .monospaced)) - } + Section( + header: Text("AI Guide"), + footer: Text("Gemini key, model, prompt, checklist brain, and manual context come from Admin AI Settings and the server-minted Live token.") + ) { + Label("Server-managed Gemini Live", systemImage: "sparkles") + Label("Checklist context loads from the assigned patient checklist", systemImage: "checklist") } Section(header: Text("Audio"), footer: Text("Route audio output to the iPhone speaker instead of glasses. Useful for demos where others need to hear.")) { Toggle("Speaker Output", isOn: $speakerOutputEnabled) } - Section(header: Text("Video"), footer: Text("Disable video streaming to save battery. Audio remains active for voice-only interaction.")) { - Toggle("Video Streaming", isOn: $videoStreamingEnabled) + Section(header: Text("Video"), footer: Text("Continuous Gemini video frames are optional. Step checks still use the camera on demand when you say \"I'm done\" or tap Check Step.")) { + Toggle("Continuous AI Video Frames", isOn: $videoStreamingEnabled) + } + + Section(header: Text("Notifications"), footer: Text("Receive AI guide status updates spoken through the glasses.")) { + Toggle("Proactive Guide Updates", isOn: $proactiveNotificationsEnabled) } - Section(header: Text("Notifications"), footer: Text("Receive proactive updates from OpenClaw (heartbeat, scheduled tasks) spoken through the glasses.")) { - Toggle("Proactive Notifications", isOn: $proactiveNotificationsEnabled) + Section(header: Text("Advanced")) { + Toggle("Developer Backend Settings", isOn: $showAdvancedBackend) + if showAdvancedBackend { + backendFields + } } Section { @@ -142,13 +109,72 @@ struct SettingsView: View { } } + private var backendFields: some View { + Group { + VStack(alignment: .leading, spacing: 4) { + Text("Ops Base URL") + .font(.caption) + .foregroundColor(.secondary) + TextField("https://admin.embarcaderolabs.cloud", text: $opsBaseURL) + .autocapitalization(.none) + .disableAutocorrection(true) + .keyboardType(.URL) + .font(.system(.body, design: .monospaced)) + } + + VStack(alignment: .leading, spacing: 4) { + Text("Admin Base URL") + .font(.caption) + .foregroundColor(.secondary) + TextField("https://admin.embarcaderolabs.cloud", text: $adminBaseURL) + .autocapitalization(.none) + .disableAutocorrection(true) + .keyboardType(.URL) + .font(.system(.body, design: .monospaced)) + } + + VStack(alignment: .leading, spacing: 4) { + Text("Signal Base URL") + .font(.caption) + .foregroundColor(.secondary) + TextField("https://signal.embarcaderolabs.cloud", text: $signalBaseURL) + .autocapitalization(.none) + .disableAutocorrection(true) + .keyboardType(.URL) + .font(.system(.body, design: .monospaced)) + } + + VStack(alignment: .leading, spacing: 4) { + Text("WebRTC Signaling URL") + .font(.caption) + .foregroundColor(.secondary) + TextField("wss://signal.embarcaderolabs.cloud", text: $webrtcSignalingURL) + .autocapitalization(.none) + .disableAutocorrection(true) + .keyboardType(.URL) + .font(.system(.body, design: .monospaced)) + } + + VStack(alignment: .leading, spacing: 4) { + Text("Worker API Bearer Token") + .font(.caption) + .foregroundColor(.secondary) + SecureField("Optional fallback token", text: $workerAPIBearerToken) + .autocapitalization(.none) + .disableAutocorrection(true) + .textInputAutocapitalization(.never) + .font(.system(.body, design: .monospaced)) + } + } + } + private func loadCurrentValues() { - geminiAPIKey = settings.geminiAPIKey - geminiSystemPrompt = settings.geminiSystemPrompt - openClawHost = settings.openClawHost - openClawPort = String(settings.openClawPort) - openClawHookToken = settings.openClawHookToken - openClawGatewayToken = settings.openClawGatewayToken + workerLoginCode = settings.workerLoginCode + workerEmail = settings.workerEmail + workerAPIBearerToken = settings.workerAPIBearerToken + opsBaseURL = settings.opsBaseURL + adminBaseURL = settings.adminBaseURL + signalBaseURL = settings.signalBaseURL webrtcSignalingURL = settings.webrtcSignalingURL speakerOutputEnabled = settings.speakerOutputEnabled videoStreamingEnabled = settings.videoStreamingEnabled @@ -156,14 +182,12 @@ struct SettingsView: View { } private func save() { - settings.geminiAPIKey = geminiAPIKey.trimmingCharacters(in: .whitespacesAndNewlines) - settings.geminiSystemPrompt = geminiSystemPrompt.trimmingCharacters(in: .whitespacesAndNewlines) - settings.openClawHost = openClawHost.trimmingCharacters(in: .whitespacesAndNewlines) - if let port = Int(openClawPort.trimmingCharacters(in: .whitespacesAndNewlines)) { - settings.openClawPort = port - } - settings.openClawHookToken = openClawHookToken.trimmingCharacters(in: .whitespacesAndNewlines) - settings.openClawGatewayToken = openClawGatewayToken.trimmingCharacters(in: .whitespacesAndNewlines) + settings.workerLoginCode = workerLoginCode.trimmingCharacters(in: .whitespacesAndNewlines) + settings.workerEmail = workerEmail.trimmingCharacters(in: .whitespacesAndNewlines) + settings.workerAPIBearerToken = workerAPIBearerToken.trimmingCharacters(in: .whitespacesAndNewlines) + settings.opsBaseURL = opsBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + settings.adminBaseURL = adminBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + settings.signalBaseURL = signalBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) settings.webrtcSignalingURL = webrtcSignalingURL.trimmingCharacters(in: .whitespacesAndNewlines) settings.speakerOutputEnabled = speakerOutputEnabled settings.videoStreamingEnabled = videoStreamingEnabled diff --git a/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift b/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift index 29203cd8..fab84e6f 100644 --- a/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift @@ -19,7 +19,11 @@ import CoreMedia import CoreVideo import MWDATCamera import MWDATCore +import AVFoundation +import Combine +import Speech import SwiftUI +import UIKit import VideoToolbox enum StreamingStatus { @@ -33,6 +37,2829 @@ enum StreamingMode { case iPhone } +enum ChecklistCompletionSource: String, Codable { + case pending + case manual + case voice + case vision +} + +enum SopTerminationStatus: String, Codable { + case timedOut = "timed_out" + case allItemsChecked = "all_items_checked" + case userEnded = "user_ended" +} + +enum GuidancePolicy: String { + case silent + case confirm + case nextInstruction = "next_instruction" + case warning + case helpPrompt = "help_prompt" + + var instruction: String { + switch self { + case .silent: + return "Stay silent unless the worker asks for help, the step changes, or a safety/compliance issue appears." + case .confirm: + return "Briefly confirm the observed evidence, then stay quiet." + case .nextInstruction: + return "Give the next step instruction once, then stay quiet while the worker acts." + case .warning: + return "Warn the worker only about the specific skipped, out-of-order, or unsafe condition." + case .helpPrompt: + return "Ask one short clarifying question or offer help because the evidence is unclear." + } + } +} + +enum DossierPipelineStatusKind { + case info + case active + case success + case error +} + +struct SOPTemplate: Identifiable, Hashable { + let id: UUID + let remoteID: String? + let name: String + let steps: [SOPStepTemplate] + let estimatedDuration: Double + let shiftID: String? + let shiftName: String? + let packageID: String? + let packageRunID: String? + let packageTitle: String? + let packageVersion: Int? + let sopVersion: Int? + let sourceType: String + let sortOrder: Int + let required: Bool + + var items: [String] { + steps.map(\.title) + } + + var validationSummary: String { + let labels = Array(Set(steps.map { $0.validation.uppercased() })).sorted() + return labels.isEmpty ? "NO VALIDATION" : labels.joined(separator: " + ") + } + + init( + id: UUID = UUID(), + remoteID: String? = nil, + name: String, + steps: [SOPStepTemplate]? = nil, + items: [String] = [], + estimatedDuration: Double = 15.0, + shiftID: String? = nil, + shiftName: String? = nil, + packageID: String? = nil, + packageRunID: String? = nil, + packageTitle: String? = nil, + packageVersion: Int? = nil, + sopVersion: Int? = nil, + sourceType: String = "standalone", + sortOrder: Int = 0, + required: Bool = true + ) { + self.id = id + self.remoteID = remoteID + self.name = name + let resolvedSteps = steps ?? items.enumerated().map { index, item in + SOPStepTemplate( + id: ChecklistItemState.normalizedItemID(from: item), + order: index + 1, + title: item, + aiPrompt: "Look at the image and confirm whether \"\(item)\" has been completed." + ) + } + self.steps = resolvedSteps.sorted { $0.order < $1.order } + self.estimatedDuration = estimatedDuration + self.shiftID = shiftID + self.shiftName = shiftName + self.packageID = packageID + self.packageRunID = packageRunID + self.packageTitle = packageTitle + self.packageVersion = packageVersion + self.sopVersion = sopVersion + self.sourceType = sourceType + self.sortOrder = sortOrder + self.required = required + } +} + +private func validRemoteUUID(_ value: String?) -> String? { + guard let value, + UUID(uuidString: value) != nil + else { return nil } + return value +} + +struct SOPStepTemplate: Identifiable, Hashable { + let id: String + let order: Int + let title: String + let description: String + let duration: String + let validation: String + let critical: Bool + let aiPrompt: String + let expectedObjects: [String] + let preconditions: [String] + let postconditions: [String] + let skipRisk: String + let evidenceRequired: Bool + let allowManualComplete: Bool + + init( + id: String, + order: Int, + title: String, + description: String = "", + duration: String = "30s", + validation: String = "visual", + critical: Bool = false, + aiPrompt: String, + expectedObjects: [String] = [], + preconditions: [String] = [], + postconditions: [String] = [], + skipRisk: String = "medium", + evidenceRequired: Bool = true, + allowManualComplete: Bool = true + ) { + self.id = id + self.order = order + self.title = title + self.description = description + self.duration = duration + self.validation = validation + self.critical = critical + self.aiPrompt = aiPrompt + self.expectedObjects = expectedObjects + self.preconditions = preconditions + self.postconditions = postconditions + self.skipRisk = skipRisk + self.evidenceRequired = evidenceRequired + self.allowManualComplete = allowManualComplete + } +} + +private struct RemoteSOPListResponse: Decodable { + let version: String? + + private let sops: [RemoteSOP]? + private let data: [RemoteSOP]? + private let templates: [RemoteSOP]? + private let sopTemplates: [RemoteSOP]? + + var allSOPs: [RemoteSOP] { + sops ?? data ?? templates ?? sopTemplates ?? [] + } + + private enum CodingKeys: String, CodingKey { + case version + case sops + case data + case templates + case sopTemplates = "sop_templates" + } +} + +private struct RemoteSOP: Decodable { + let id: String + let name: String + let items: [RemoteSOPItem] + let estimatedDuration: Double? + let updatedAt: Date? + let createdAt: Date? + + private enum CodingKeys: String, CodingKey { + case id + case uuid + case sopID = "sop_id" + case name + case title + case items + case estimatedDuration = "estimatedDuration" + case estimatedDurationSnake = "estimated_duration" + case duration + case updatedAt = "updatedAt" + case updatedAtSnake = "updated_at" + case modifiedAtSnake = "modified_at" + case createdAt = "createdAt" + case createdAtSnake = "created_at" + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + + let decodedID = try container.decodeIfPresent(String.self, forKey: .id) + let decodedUUID = try container.decodeIfPresent(String.self, forKey: .uuid) + let decodedSopID = try container.decodeIfPresent(String.self, forKey: .sopID) + id = decodedID ?? decodedUUID ?? decodedSopID ?? UUID().uuidString + + let decodedName = try container.decodeIfPresent(String.self, forKey: .name) + let decodedTitle = try container.decodeIfPresent(String.self, forKey: .title) + name = decodedName ?? decodedTitle ?? "Untitled SOP" + + if let decodedItems = try container.decodeIfPresent([RemoteSOPItem].self, forKey: .items) { + items = decodedItems + } else if let stringItems = try container.decodeIfPresent([String].self, forKey: .items) { + items = stringItems.map { RemoteSOPItem(name: $0) } + } else { + items = [] + } + + if let estimate = try container.decodeIfPresent(Double.self, forKey: .estimatedDuration) { + estimatedDuration = estimate + } else if let estimate = try container.decodeIfPresent(Double.self, forKey: .estimatedDurationSnake) { + estimatedDuration = estimate + } else if let estimate = try container.decodeIfPresent(Double.self, forKey: .duration) { + estimatedDuration = estimate + } else if let estimateInt = try container.decodeIfPresent(Int.self, forKey: .estimatedDuration) { + estimatedDuration = Double(estimateInt) + } else if let estimateInt = try container.decodeIfPresent(Int.self, forKey: .estimatedDurationSnake) { + estimatedDuration = Double(estimateInt) + } else if let estimateInt = try container.decodeIfPresent(Int.self, forKey: .duration) { + estimatedDuration = Double(estimateInt) + } else { + estimatedDuration = nil + } + + let decodedUpdatedAt = try container.decodeIfPresent(String.self, forKey: .updatedAt) + let decodedUpdatedAtSnake = try container.decodeIfPresent(String.self, forKey: .updatedAtSnake) + let decodedModifiedAtSnake = try container.decodeIfPresent(String.self, forKey: .modifiedAtSnake) + let updatedRaw = decodedUpdatedAt ?? decodedUpdatedAtSnake ?? decodedModifiedAtSnake + + let decodedCreatedAt = try container.decodeIfPresent(String.self, forKey: .createdAt) + let decodedCreatedAtSnake = try container.decodeIfPresent(String.self, forKey: .createdAtSnake) + let createdRaw = decodedCreatedAt ?? decodedCreatedAtSnake + + updatedAt = Self.parseDate(updatedRaw) + createdAt = Self.parseDate(createdRaw) + } + + private static func parseDate(_ raw: String?) -> Date? { + guard let raw, !raw.isEmpty else { return nil } + + let isoFormatter = ISO8601DateFormatter() + isoFormatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + if let date = isoFormatter.date(from: raw) { return date } + + let fallbackISO = ISO8601DateFormatter() + if let date = fallbackISO.date(from: raw) { return date } + + let formatter = DateFormatter() + formatter.locale = Locale(identifier: "en_US_POSIX") + formatter.timeZone = TimeZone(secondsFromGMT: 0) + formatter.dateFormat = "yyyy-MM-dd HH:mm:ss" + return formatter.date(from: raw) + } +} + +private struct RemoteSOPItem: Decodable { + let name: String + + init(name: String) { + self.name = name + } + + private enum CodingKeys: String, CodingKey { + case name + case title + case label + case item + } + + init(from decoder: Decoder) throws { + if let singleValueContainer = try? decoder.singleValueContainer(), + let raw = try? singleValueContainer.decode(String.self) + { + name = raw + return + } + + let container = try decoder.container(keyedBy: CodingKeys.self) + let decodedName = try container.decodeIfPresent(String.self, forKey: .name) + let decodedTitle = try container.decodeIfPresent(String.self, forKey: .title) + let decodedLabel = try container.decodeIfPresent(String.self, forKey: .label) + let decodedItem = try container.decodeIfPresent(String.self, forKey: .item) + name = decodedName ?? decodedTitle ?? decodedLabel ?? decodedItem ?? "Unknown Item" + } +} + +private final class ConversationAudioRecorder: @unchecked Sendable { + private enum Source: String { + case input = "worker_input" + case output = "gemini_output" + } + + private struct AudioChunk { + let data: Data + let sampleRate: Double + let source: Source + let hostTime: CFTimeInterval + } + + private let queue = DispatchQueue(label: "sop.conversation.audio.recorder", qos: .userInitiated) + private let sessionID: String? + private let recordingStartHostTime: CFTimeInterval + private let outputURL: URL + private var chunks: [AudioChunk] = [] + private var isFinishing = false + private var inputAudioChunkCount = 0 + private var outputAudioChunkCount = 0 + + init(sessionID: String?, recordingStartHostTime: CFTimeInterval = CACurrentMediaTime()) { + self.sessionID = sessionID + self.recordingStartHostTime = recordingStartHostTime + self.outputURL = FileManager.default.temporaryDirectory + .appendingPathComponent("sop_\(sessionID ?? UUID().uuidString)_conversation") + .appendingPathExtension("m4a") + try? FileManager.default.removeItem(at: outputURL) + } + + func appendInputAudio(_ data: Data) { + append(data, sampleRate: GeminiConfig.inputAudioSampleRate, source: .input) + } + + func appendOutputAudio(_ data: Data) { + append(data, sampleRate: GeminiConfig.outputAudioSampleRate, source: .output) + } + + func finishAudioFile() async -> URL? { + await withCheckedContinuation { continuation in + queue.async { [weak self] in + guard let self else { + continuation.resume(returning: nil) + return + } + + self.isFinishing = true + let chunks = self.chunks + let inputCount = self.inputAudioChunkCount + let outputCount = self.outputAudioChunkCount + guard !chunks.isEmpty else { + continuation.resume(returning: nil) + return + } + + let mixedPCM = Self.renderMixedPCM( + chunks: chunks, + recordingStartHostTime: self.recordingStartHostTime + ) + guard !mixedPCM.isEmpty else { + continuation.resume(returning: nil) + return + } + + Self.writeAACAudio(data: mixedPCM, outputURL: self.outputURL) { url in + let byteCount = url.flatMap { url -> Int? in + guard let attributes = try? FileManager.default.attributesOfItem(atPath: url.path) else { + return nil + } + return attributes[.size] as? Int + } ?? 0 + Task { + await WorkerTelemetry.shared.record( + "conversation_audio_finish", + source: "media_upload", + stage: url == nil ? "failed" : "completed", + sessionID: self.sessionID, + metricValue: Double(byteCount), + metricUnit: "bytes", + payload: [ + "bytes": byteCount, + "input_audio_chunks": inputCount, + "output_audio_chunks": outputCount + ] + ) + } + continuation.resume(returning: url) + } + } + } + } + + private func append(_ data: Data, sampleRate: Double, source: Source) { + guard !data.isEmpty else { return } + let hostTime = CACurrentMediaTime() + queue.async { [weak self] in + guard let self, !self.isFinishing else { return } + switch source { + case .input: + self.inputAudioChunkCount += 1 + case .output: + self.outputAudioChunkCount += 1 + } + self.chunks.append( + AudioChunk( + data: data, + sampleRate: sampleRate, + source: source, + hostTime: hostTime + ) + ) + } + } + + private static func renderMixedPCM( + chunks: [AudioChunk], + recordingStartHostTime: CFTimeInterval + ) -> Data { + let targetSampleRate = GeminiConfig.outputAudioSampleRate + var renderedChunks: [(startFrame: Int, samples: [Float])] = [] + var totalFrameCount = 0 + + for chunk in chunks { + let samples = resampledFloatSamples( + from: chunk.data, + sourceSampleRate: chunk.sampleRate, + targetSampleRate: targetSampleRate + ) + guard !samples.isEmpty else { continue } + let startFrame = max(0, Int((chunk.hostTime - recordingStartHostTime) * targetSampleRate)) + totalFrameCount = max(totalFrameCount, startFrame + samples.count) + renderedChunks.append((startFrame, samples)) + } + + guard totalFrameCount > 0 else { return Data() } + + var mixed = [Float](repeating: 0, count: totalFrameCount) + var contributors = [UInt8](repeating: 0, count: totalFrameCount) + for rendered in renderedChunks { + for (offset, sample) in rendered.samples.enumerated() { + let index = rendered.startFrame + offset + guard index < mixed.count else { continue } + mixed[index] += sample + if contributors[index] < UInt8.max { + contributors[index] += 1 + } + } + } + + var int16Samples = [Int16](repeating: 0, count: totalFrameCount) + for index in mixed.indices { + let count = contributors[index] + let normalized = count > 1 ? mixed[index] / Float(count) : mixed[index] + let clamped = max(-1.0, min(1.0, normalized)) + int16Samples[index] = Int16(clamped * Float(Int16.max)) + } + + return int16Samples.withUnsafeBufferPointer { Data(buffer: $0) } + } + + private static func resampledFloatSamples( + from data: Data, + sourceSampleRate: Double, + targetSampleRate: Double + ) -> [Float] { + let sourceFrameCount = data.count / MemoryLayout.size + guard sourceFrameCount > 0 else { return [] } + + let sourceSamples: [Float] = data.withUnsafeBytes { rawBuffer in + guard let baseAddress = rawBuffer.bindMemory(to: Int16.self).baseAddress else { return [] } + return (0.. Void + ) { + try? FileManager.default.removeItem(at: outputURL) + guard let writer = try? AVAssetWriter(outputURL: outputURL, fileType: .m4a) else { + completion(nil) + return + } + + let audioInput = AVAssetWriterInput( + mediaType: .audio, + outputSettings: [ + AVFormatIDKey: kAudioFormatMPEG4AAC, + AVSampleRateKey: GeminiConfig.outputAudioSampleRate, + AVNumberOfChannelsKey: Int(GeminiConfig.audioChannels), + AVEncoderBitRateKey: 64_000 + ] + ) + audioInput.expectsMediaDataInRealTime = false + guard writer.canAdd(audioInput) else { + completion(nil) + return + } + writer.add(audioInput) + guard writer.startWriting() else { + completion(nil) + return + } + writer.startSession(atSourceTime: .zero) + + guard let formatDescription = makePCMFormatDescription() else { + completion(nil) + return + } + + let bytesPerFrame = MemoryLayout.size * Int(GeminiConfig.audioChannels) + let framesPerChunk = Int(GeminiConfig.outputAudioSampleRate) + let bytesPerChunk = framesPerChunk * bytesPerFrame + var byteOffset = 0 + var accumulatedSampleCount = 0 + var appendFailed = false + + while byteOffset < data.count, !appendFailed { + while !audioInput.isReadyForMoreMediaData { + Thread.sleep(forTimeInterval: 0.005) + } + let byteCount = min(bytesPerChunk, data.count - byteOffset) + let alignedByteCount = byteCount - (byteCount % bytesPerFrame) + guard alignedByteCount > 0 else { break } + let chunkData = data.subdata(in: byteOffset..<(byteOffset + alignedByteCount)) + let presentationTime = CMTime( + value: CMTimeValue(accumulatedSampleCount), + timescale: CMTimeScale(GeminiConfig.outputAudioSampleRate) + ) + guard let sampleBuffer = makeAudioSampleBuffer( + data: chunkData, + sampleRate: GeminiConfig.outputAudioSampleRate, + channels: GeminiConfig.audioChannels, + formatDescription: formatDescription, + presentationTime: presentationTime + ) else { + appendFailed = true + break + } + appendFailed = !audioInput.append(sampleBuffer) + byteOffset += alignedByteCount + accumulatedSampleCount += alignedByteCount / bytesPerFrame + } + + audioInput.markAsFinished() + writer.finishWriting { + completion(!appendFailed && writer.status == .completed ? outputURL : nil) + } + } + + private static func makePCMFormatDescription() -> CMAudioFormatDescription? { + var streamDescription = AudioStreamBasicDescription( + mSampleRate: GeminiConfig.outputAudioSampleRate, + mFormatID: kAudioFormatLinearPCM, + mFormatFlags: AudioFormatFlags(kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked), + mBytesPerPacket: UInt32(MemoryLayout.size) * GeminiConfig.audioChannels, + mFramesPerPacket: 1, + mBytesPerFrame: UInt32(MemoryLayout.size) * GeminiConfig.audioChannels, + mChannelsPerFrame: GeminiConfig.audioChannels, + mBitsPerChannel: UInt32(MemoryLayout.size * 8), + mReserved: 0 + ) + var formatDescription: CMAudioFormatDescription? + let status = CMAudioFormatDescriptionCreate( + allocator: kCFAllocatorDefault, + asbd: &streamDescription, + layoutSize: 0, + layout: nil, + magicCookieSize: 0, + magicCookie: nil, + extensions: nil, + formatDescriptionOut: &formatDescription + ) + guard status == noErr else { return nil } + return formatDescription + } + + private static func makeAudioSampleBuffer( + data: Data, + sampleRate: Double, + channels: UInt32, + formatDescription: CMAudioFormatDescription, + presentationTime: CMTime + ) -> CMSampleBuffer? { + let bytesPerFrame = Int(MemoryLayout.size * Int(channels)) + guard bytesPerFrame > 0 else { return nil } + let sampleCount = data.count / bytesPerFrame + guard sampleCount > 0 else { return nil } + + var blockBuffer: CMBlockBuffer? + var status = CMBlockBufferCreateWithMemoryBlock( + allocator: kCFAllocatorDefault, + memoryBlock: nil, + blockLength: data.count, + blockAllocator: nil, + customBlockSource: nil, + offsetToData: 0, + dataLength: data.count, + flags: 0, + blockBufferOut: &blockBuffer + ) + guard status == noErr, let blockBuffer else { return nil } + + status = data.withUnsafeBytes { rawBuffer in + guard let baseAddress = rawBuffer.baseAddress else { return OSStatus(-1) } + return CMBlockBufferReplaceDataBytes( + with: baseAddress, + blockBuffer: blockBuffer, + offsetIntoDestination: 0, + dataLength: data.count + ) + } + guard status == noErr else { return nil } + + let sampleDuration = CMTime(value: 1, timescale: CMTimeScale(sampleRate)) + var timing = CMSampleTimingInfo( + duration: sampleDuration, + presentationTimeStamp: presentationTime, + decodeTimeStamp: .invalid + ) + var sampleBuffer: CMSampleBuffer? + status = CMSampleBufferCreate( + allocator: kCFAllocatorDefault, + dataBuffer: blockBuffer, + dataReady: true, + makeDataReadyCallback: nil, + refcon: nil, + formatDescription: formatDescription, + sampleCount: sampleCount, + sampleTimingEntryCount: 1, + sampleTimingArray: &timing, + sampleSizeEntryCount: 0, + sampleSizeArray: nil, + sampleBufferOut: &sampleBuffer + ) + guard status == noErr else { return nil } + return sampleBuffer + } +} + +private enum ConversationAudioMuxer { + static func mux(videoURL: URL, audioURL: URL, outputURL: URL) async -> URL? { + await withCheckedContinuation { continuation in + let videoAsset = AVURLAsset(url: videoURL) + let audioAsset = AVURLAsset(url: audioURL) + let composition = AVMutableComposition() + + guard + let videoTrack = videoAsset.tracks(withMediaType: .video).first, + let compositionVideoTrack = composition.addMutableTrack( + withMediaType: .video, + preferredTrackID: kCMPersistentTrackID_Invalid + ) + else { + continuation.resume(returning: nil) + return + } + + do { + try compositionVideoTrack.insertTimeRange( + CMTimeRange(start: .zero, duration: videoAsset.duration), + of: videoTrack, + at: .zero + ) + compositionVideoTrack.preferredTransform = videoTrack.preferredTransform + + if let audioTrack = audioAsset.tracks(withMediaType: .audio).first, + let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) { + let audioDuration = CMTimeCompare(audioAsset.duration, videoAsset.duration) > 0 + ? videoAsset.duration + : audioAsset.duration + if CMTimeCompare(audioDuration, .zero) > 0 { + try compositionAudioTrack.insertTimeRange( + CMTimeRange(start: .zero, duration: audioDuration), + of: audioTrack, + at: .zero + ) + } + } + } catch { + NSLog("[SOPRecorder] Failed to build mixed replay composition: %@", error.localizedDescription) + continuation.resume(returning: nil) + return + } + + try? FileManager.default.removeItem(at: outputURL) + guard let exporter = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + continuation.resume(returning: nil) + return + } + exporter.outputURL = outputURL + exporter.outputFileType = .mp4 + exporter.shouldOptimizeForNetworkUse = true + exporter.exportAsynchronously { + continuation.resume(returning: exporter.status == .completed ? outputURL : nil) + } + } + } +} + +private enum SopMediaBackgroundTask { + static func begin(name: String) async -> UIBackgroundTaskIdentifier { + await MainActor.run { + UIApplication.shared.beginBackgroundTask(withName: name) { + NSLog("[SOPRecorder] Background media finalize task expired") + } + } + } + + static func end(_ identifier: UIBackgroundTaskIdentifier) async { + guard identifier != .invalid else { return } + await MainActor.run { + UIApplication.shared.endBackgroundTask(identifier) + } + } +} + +private enum SopSessionMediaFinalizer { + static func finishRecording( + wasIPhoneRecording: Bool, + iPhoneCameraManager: IPhoneCameraManager?, + conversationAudioRecorder: ConversationAudioRecorder?, + sopVideoRecorder: SopVideoRecorder? + ) async -> URL? { + if wasIPhoneRecording { + let phoneVideoURL = await iPhoneCameraManager?.stopRecording() + let conversationAudioURL = await conversationAudioRecorder?.finishAudioFile() + iPhoneCameraManager?.stop() + + guard let phoneVideoURL else { + return nil + } + + guard let conversationAudioURL else { + return phoneVideoURL + } + + let mixedURL = phoneVideoURL + .deletingLastPathComponent() + .appendingPathComponent(phoneVideoURL.deletingPathExtension().lastPathComponent + "_mixed") + .appendingPathExtension("mp4") + if let muxedURL = await ConversationAudioMuxer.mux( + videoURL: phoneVideoURL, + audioURL: conversationAudioURL, + outputURL: mixedURL + ) { + try? FileManager.default.removeItem(at: phoneVideoURL) + try? FileManager.default.removeItem(at: conversationAudioURL) + return muxedURL + } + + NSLog("[SOPRecorder] iPhone mixed audio mux failed; returning phone recording") + return phoneVideoURL + } + + return await sopVideoRecorder?.finishRecording() + } +} + +private final class SopVideoRecorder: @unchecked Sendable { + private enum AudioTrackKind: String { + case input = "worker_input" + case output = "gemini_output" + } + + private struct PendingAudioChunk { + let data: Data + let sampleRate: Double + let source: AudioTrackKind + let hostTime: CFTimeInterval + } + + private let queue = DispatchQueue(label: "sop.video.recorder", qos: .userInitiated) + private let sessionID: String? + private var writer: AVAssetWriter? + private var writerInput: AVAssetWriterInput? + private var inputAudioInput: AVAssetWriterInput? + private var outputAudioInput: AVAssetWriterInput? + private var inputAudioFormatDescription: CMAudioFormatDescription? + private var outputAudioFormatDescription: CMAudioFormatDescription? + private var pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor? + private let recordingStartHostTime: CFTimeInterval + private let conversationRecorder: ConversationAudioRecorder + private(set) var outputURL: URL? + private var isFinishing = false + private var appendedFrameCount = 0 + private var inputAudioChunkCount = 0 + private var outputAudioChunkCount = 0 + private var droppedAudioChunkCount = 0 + private var pendingAudioChunks: [PendingAudioChunk] = [] + private let sourcePixelFormat = VideoFrameBufferFactory.pixelFormat + private let maxPendingAudioChunks = 160 + + init(sessionID: String? = nil) { + self.sessionID = sessionID + let startHostTime = CACurrentMediaTime() + self.recordingStartHostTime = startHostTime + self.conversationRecorder = ConversationAudioRecorder( + sessionID: sessionID, + recordingStartHostTime: startHostTime + ) + let fileURL = FileManager.default.temporaryDirectory + .appendingPathComponent("sop_\(sessionID ?? UUID().uuidString)") + .appendingPathExtension("mp4") + try? FileManager.default.removeItem(at: fileURL) + self.outputURL = fileURL + NSLog("[SOPRecorder] Prepared output path at %@", fileURL.path) + Task { + await WorkerTelemetry.shared.record( + "sop_recorder_start", + source: "media_upload", + stage: "prepared", + sessionID: sessionID, + payload: ["path_ready": true] + ) + } + } + + func appendFrame(_ image: UIImage) { + queue.async { [weak self] in + guard let self, !self.isFinishing else { return } + guard let cgImage = image.cgImage else { return } + self.configureWriterIfNeeded(width: cgImage.width, height: cgImage.height) + + guard + let pixelBuffer = VideoFrameBufferFactory.makePixelBuffer( + from: image, + using: self.pixelBufferAdaptor?.pixelBufferPool + ) + else { + return + } + + self.appendPixelBufferInternal(pixelBuffer) + } + } + + func appendPixelBuffer(_ pixelBuffer: CVPixelBuffer) { + queue.async { [weak self] in + guard let self, !self.isFinishing else { return } + self.configureWriterIfNeeded( + width: CVPixelBufferGetWidth(pixelBuffer), + height: CVPixelBufferGetHeight(pixelBuffer) + ) + self.appendPixelBufferInternal(pixelBuffer) + } + } + + func appendInputAudio(_ data: Data) { + guard !data.isEmpty else { return } + conversationRecorder.appendInputAudio(data) + queue.async { [weak self] in + self?.inputAudioChunkCount += 1 + } + } + + func appendOutputAudio(_ data: Data) { + guard !data.isEmpty else { return } + conversationRecorder.appendOutputAudio(data) + queue.async { [weak self] in + self?.outputAudioChunkCount += 1 + } + } + + func finishRecording() async -> URL? { + await withCheckedContinuation { continuation in + queue.async { [weak self] in + guard let self else { + continuation.resume(returning: nil) + return + } + + NSLog( + "[SOPRecorder] finishRecording called (frames=%d, inputAudio=%d, outputAudio=%d, droppedAudio=%d, hasWriter=%@, outputURL=%@)", + self.appendedFrameCount, + self.inputAudioChunkCount, + self.outputAudioChunkCount, + self.droppedAudioChunkCount, + self.writer == nil ? "no" : "yes", + self.outputURL?.path ?? "nil") + + guard let writer = self.writer, + let writerInput = self.writerInput, + writer.status == .writing else { + if let writer = self.writer { + NSLog("[SOPRecorder] finishRecording returning nil because writer status=%d", writer.status.rawValue) + } else { + NSLog("[SOPRecorder] finishRecording returning nil because no video frames were recorded") + } + Task { + await WorkerTelemetry.shared.record( + "sop_recorder_finish", + source: "media_upload", + stage: "missing_video", + sessionID: self.sessionID, + payload: [ + "frame_count": self.appendedFrameCount, + "audio_input_chunks": self.inputAudioChunkCount, + "audio_output_chunks": self.outputAudioChunkCount, + "dropped_audio_chunks": self.droppedAudioChunkCount, + "reason": "no_video_frames_recorded" + ] + ) + } + continuation.resume(returning: nil) + return + } + + self.isFinishing = true + writerInput.markAsFinished() + writer.finishWriting { + Task { + let videoURL = self.outputURL + let audioURL = await self.conversationRecorder.finishAudioFile() + var finalURL = videoURL + if writer.status == .completed, let videoURL, let audioURL { + let mixedURL = videoURL + .deletingLastPathComponent() + .appendingPathComponent(videoURL.deletingPathExtension().lastPathComponent + "_mixed") + .appendingPathExtension("mp4") + if let muxedURL = await ConversationAudioMuxer.mux( + videoURL: videoURL, + audioURL: audioURL, + outputURL: mixedURL + ) { + finalURL = muxedURL + try? FileManager.default.removeItem(at: videoURL) + try? FileManager.default.removeItem(at: audioURL) + } else { + NSLog("[SOPRecorder] Mixed audio mux failed; returning video-only recording") + } + } + + let fileSize = finalURL.flatMap { url -> Int? in + guard let attributes = try? FileManager.default.attributesOfItem(atPath: url.path) else { + return nil + } + return attributes[.size] as? Int + } + NSLog( + "[SOPRecorder] finishWriting completed (status=%d, outputURL=%@, bytes=%d)", + writer.status.rawValue, + finalURL?.path ?? "nil", + fileSize ?? 0) + Task { + await WorkerTelemetry.shared.record( + "sop_recorder_finish", + source: "media_upload", + stage: writer.status == .completed ? "completed" : "failed", + sessionID: self.sessionID, + metricValue: Double(fileSize ?? 0), + metricUnit: "bytes", + payload: [ + "frame_count": self.appendedFrameCount, + "file_size": fileSize ?? 0, + "audio_input_chunks": self.inputAudioChunkCount, + "audio_output_chunks": self.outputAudioChunkCount, + "dropped_audio_chunks": self.droppedAudioChunkCount, + "writer_status": writer.status.rawValue, + "writer_error": writer.error?.localizedDescription ?? NSNull() + ] + ) + } + continuation.resume(returning: writer.status == .completed ? finalURL : nil) + } + } + } + } + } + + private func appendPixelBufferInternal(_ pixelBuffer: CVPixelBuffer) { + guard let writer = writer, + writer.status == .writing, + let writerInput = writerInput, + let adaptor = pixelBufferAdaptor, + writerInput.isReadyForMoreMediaData else { + if writer == nil { + NSLog("[SOPRecorder] Dropping frame because writer was never configured") + } else if let writer { + NSLog("[SOPRecorder] Dropping frame because writer is not writable (status=%d)", writer.status.rawValue) + } + return + } + + let elapsed = CACurrentMediaTime() - recordingStartHostTime + let presentationTime = CMTime(seconds: max(0, elapsed), preferredTimescale: 600) + let bufferForWriter = + VideoFrameBufferFactory.copyPixelBuffer(pixelBuffer, using: adaptor.pixelBufferPool) + ?? pixelBuffer + + let appended = adaptor.append(bufferForWriter, withPresentationTime: presentationTime) + if appended { + appendedFrameCount += 1 + if appendedFrameCount == 1 { + NSLog("[SOPRecorder] First frame appended successfully") + Task { + await WorkerTelemetry.shared.record( + "sop_recorder_first_frame", + source: "media_upload", + stage: "recording", + sessionID: sessionID + ) + } + } else if appendedFrameCount % 60 == 0 { + NSLog("[SOPRecorder] Appended %d frames", appendedFrameCount) + } + } else { + NSLog( + "[SOPRecorder] Failed appending frame at %.3fs (writer status=%d)", + elapsed, + writer.status.rawValue + ) + } + } + + private func configureWriterIfNeeded(width: Int, height: Int) { + guard writer == nil else { return } + + let size = Self.normalizedSize(width: width, height: height) + guard size.width > 0, size.height > 0 else { + NSLog("[SOPRecorder] Invalid normalized size: %@", NSCoder.string(for: size)) + return + } + + let fileURL = outputURL ?? FileManager.default.temporaryDirectory + .appendingPathComponent("sop_\(sessionID ?? UUID().uuidString)") + .appendingPathExtension("mp4") + + try? FileManager.default.removeItem(at: fileURL) + + NSLog("[SOPRecorder] Creating writer at %@ with size %@", fileURL.path, NSCoder.string(for: size)) + + guard let writer = try? AVAssetWriter(outputURL: fileURL, fileType: .mp4) else { + NSLog("[SOPRecorder] Failed to create AVAssetWriter") + return + } + + let outputSettings: [String: Any] = [ + AVVideoCodecKey: AVVideoCodecType.h264, + AVVideoWidthKey: Int(size.width), + AVVideoHeightKey: Int(size.height), + AVVideoCompressionPropertiesKey: [ + AVVideoAverageBitRateKey: 2_500_000, + AVVideoProfileLevelKey: AVVideoProfileLevelH264MainAutoLevel + ] + ] + + let input = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings) + input.expectsMediaDataInRealTime = true + + let sourceAttributes: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: Int(sourcePixelFormat), + kCVPixelBufferWidthKey as String: Int(size.width), + kCVPixelBufferHeightKey as String: Int(size.height), + kCVPixelBufferIOSurfacePropertiesKey as String: [:] as [String: Any], + ] + + let adaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: input, sourcePixelBufferAttributes: sourceAttributes) + + guard writer.canAdd(input) else { + NSLog("[SOPRecorder] Writer cannot add AVAssetWriterInput") + return + } + writer.add(input) + // Conversation audio is mixed into one AAC track during finishRecording(), + // so the live writer stays focused on video frames. + + guard writer.startWriting() else { + NSLog("[SOPRecorder] startWriting failed: %@", writer.error?.localizedDescription ?? "unknown") + return + } + writer.startSession(atSourceTime: .zero) + NSLog("[SOPRecorder] Writer started successfully") + + self.writer = writer + self.writerInput = input + self.pixelBufferAdaptor = adaptor + self.outputURL = fileURL + drainPendingAudioChunks() + } + + private func appendAudio( + _ data: Data, + sampleRate: Double, + source: AudioTrackKind + ) { + guard !data.isEmpty else { return } + let hostTime = CACurrentMediaTime() + queue.async { [weak self] in + guard let self, !self.isFinishing else { return } + switch source { + case .input: + self.inputAudioChunkCount += 1 + case .output: + self.outputAudioChunkCount += 1 + } + let chunk = PendingAudioChunk( + data: data, + sampleRate: sampleRate, + source: source, + hostTime: hostTime + ) + + guard self.writer?.status == .writing else { + self.pendingAudioChunks.append(chunk) + if self.pendingAudioChunks.count > self.maxPendingAudioChunks { + self.pendingAudioChunks.removeFirst(self.pendingAudioChunks.count - self.maxPendingAudioChunks) + self.droppedAudioChunkCount += 1 + } + return + } + + self.appendAudioChunkInternal(chunk) + } + } + + private func drainPendingAudioChunks() { + guard !pendingAudioChunks.isEmpty else { return } + let chunks = pendingAudioChunks + pendingAudioChunks.removeAll() + for chunk in chunks { + appendAudioChunkInternal(chunk) + } + } + + private func appendAudioChunkInternal(_ chunk: PendingAudioChunk) { + let audioInput: AVAssetWriterInput? + let formatDescription: CMAudioFormatDescription? + switch chunk.source { + case .input: + audioInput = inputAudioInput + formatDescription = inputAudioFormatDescription + case .output: + audioInput = outputAudioInput + formatDescription = outputAudioFormatDescription + } + + guard let audioInput, let formatDescription else { + droppedAudioChunkCount += 1 + return + } + guard audioInput.isReadyForMoreMediaData else { + droppedAudioChunkCount += 1 + return + } + guard let sampleBuffer = Self.makeAudioSampleBuffer( + data: chunk.data, + sampleRate: chunk.sampleRate, + channels: GeminiConfig.audioChannels, + formatDescription: formatDescription, + presentationTime: CMTime( + seconds: max(0, chunk.hostTime - recordingStartHostTime), + preferredTimescale: 600 + ) + ) else { + droppedAudioChunkCount += 1 + return + } + + if !audioInput.append(sampleBuffer) { + droppedAudioChunkCount += 1 + NSLog("[SOPRecorder] Failed appending %@ audio chunk", chunk.source.rawValue) + } + } + + private func makeAudioInput(sampleRate: Double, channels: UInt32) -> AVAssetWriterInput { + let outputSettings: [String: Any] = [ + AVFormatIDKey: kAudioFormatMPEG4AAC, + AVSampleRateKey: sampleRate, + AVNumberOfChannelsKey: Int(channels), + AVEncoderBitRateKey: 64_000 + ] + let input = AVAssetWriterInput(mediaType: .audio, outputSettings: outputSettings) + input.expectsMediaDataInRealTime = true + return input + } + + private static func makePCMFormatDescription( + sampleRate: Double, + channels: UInt32 + ) -> CMAudioFormatDescription? { + var streamDescription = AudioStreamBasicDescription( + mSampleRate: sampleRate, + mFormatID: kAudioFormatLinearPCM, + mFormatFlags: AudioFormatFlags(kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked), + mBytesPerPacket: UInt32(MemoryLayout.size) * channels, + mFramesPerPacket: 1, + mBytesPerFrame: UInt32(MemoryLayout.size) * channels, + mChannelsPerFrame: channels, + mBitsPerChannel: UInt32(MemoryLayout.size * 8), + mReserved: 0 + ) + var formatDescription: CMAudioFormatDescription? + let status = CMAudioFormatDescriptionCreate( + allocator: kCFAllocatorDefault, + asbd: &streamDescription, + layoutSize: 0, + layout: nil, + magicCookieSize: 0, + magicCookie: nil, + extensions: nil, + formatDescriptionOut: &formatDescription + ) + guard status == noErr else { return nil } + return formatDescription + } + + private static func makeAudioSampleBuffer( + data: Data, + sampleRate: Double, + channels: UInt32, + formatDescription: CMAudioFormatDescription, + presentationTime: CMTime + ) -> CMSampleBuffer? { + let bytesPerFrame = Int(MemoryLayout.size * Int(channels)) + guard bytesPerFrame > 0 else { return nil } + let sampleCount = data.count / bytesPerFrame + guard sampleCount > 0 else { return nil } + + var blockBuffer: CMBlockBuffer? + var status = CMBlockBufferCreateWithMemoryBlock( + allocator: kCFAllocatorDefault, + memoryBlock: nil, + blockLength: data.count, + blockAllocator: nil, + customBlockSource: nil, + offsetToData: 0, + dataLength: data.count, + flags: 0, + blockBufferOut: &blockBuffer + ) + guard status == noErr, let blockBuffer else { return nil } + + status = data.withUnsafeBytes { rawBuffer in + guard let baseAddress = rawBuffer.baseAddress else { return OSStatus(-1) } + return CMBlockBufferReplaceDataBytes( + with: baseAddress, + blockBuffer: blockBuffer, + offsetIntoDestination: 0, + dataLength: data.count + ) + } + guard status == noErr else { return nil } + + let sampleDuration = CMTime(value: 1, timescale: CMTimeScale(sampleRate)) + var timing = CMSampleTimingInfo( + duration: sampleDuration, + presentationTimeStamp: presentationTime, + decodeTimeStamp: .invalid + ) + var sampleBuffer: CMSampleBuffer? + status = CMSampleBufferCreate( + allocator: kCFAllocatorDefault, + dataBuffer: blockBuffer, + dataReady: true, + makeDataReadyCallback: nil, + refcon: nil, + formatDescription: formatDescription, + sampleCount: sampleCount, + sampleTimingEntryCount: 1, + sampleTimingArray: &timing, + sampleSizeEntryCount: 0, + sampleSizeArray: nil, + sampleBufferOut: &sampleBuffer + ) + guard status == noErr else { return nil } + return sampleBuffer + } + + private static func normalizedSize(width: Int, height: Int) -> CGSize { + var width = max(2, width) + var height = max(2, height) + if width % 2 != 0 { width += 1 } + if height % 2 != 0 { height += 1 } + return CGSize(width: width, height: height) + } +} + +struct ChecklistItemState: Identifiable, Codable, Hashable { + let id: UUID + let itemID: String + let name: String + let description: String + let duration: String + let validation: String + let critical: Bool + let aiPrompt: String + let expectedObjects: [String] + let preconditions: [String] + let postconditions: [String] + let skipRisk: String + let evidenceRequired: Bool + let allowManualComplete: Bool + var isChecked: Bool + var completionSource: ChecklistCompletionSource + + private enum CodingKeys: String, CodingKey { + case id + case itemID + case name + case description + case duration + case validation + case critical + case aiPrompt + case expectedObjects + case preconditions + case postconditions + case skipRisk + case evidenceRequired + case allowManualComplete + case isChecked + case completionSource + } + + init( + id: UUID = UUID(), + itemID: String? = nil, + name: String, + description: String = "", + duration: String = "30s", + validation: String = "visual", + critical: Bool = false, + aiPrompt: String? = nil, + expectedObjects: [String] = [], + preconditions: [String] = [], + postconditions: [String] = [], + skipRisk: String = "medium", + evidenceRequired: Bool = true, + allowManualComplete: Bool = true, + isChecked: Bool = false, + completionSource: ChecklistCompletionSource = .pending + ) { + self.id = id + self.itemID = itemID ?? ChecklistItemState.normalizedItemID(from: name) + self.name = name + self.description = description + self.duration = duration + self.validation = validation + self.critical = critical + self.aiPrompt = aiPrompt ?? "Look at the image and confirm whether \"\(name)\" has been completed." + self.expectedObjects = expectedObjects + self.preconditions = preconditions + self.postconditions = postconditions + self.skipRisk = skipRisk + self.evidenceRequired = evidenceRequired + self.allowManualComplete = allowManualComplete + self.isChecked = isChecked + self.completionSource = completionSource + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let name = try container.decode(String.self, forKey: .name) + self.init( + id: try container.decodeIfPresent(UUID.self, forKey: .id) ?? UUID(), + itemID: try container.decodeIfPresent(String.self, forKey: .itemID), + name: name, + description: try container.decodeIfPresent(String.self, forKey: .description) ?? "", + duration: try container.decodeIfPresent(String.self, forKey: .duration) ?? "30s", + validation: try container.decodeIfPresent(String.self, forKey: .validation) ?? "visual", + critical: try container.decodeIfPresent(Bool.self, forKey: .critical) ?? false, + aiPrompt: try container.decodeIfPresent(String.self, forKey: .aiPrompt), + expectedObjects: try container.decodeIfPresent([String].self, forKey: .expectedObjects) ?? [], + preconditions: try container.decodeIfPresent([String].self, forKey: .preconditions) ?? [], + postconditions: try container.decodeIfPresent([String].self, forKey: .postconditions) ?? [], + skipRisk: try container.decodeIfPresent(String.self, forKey: .skipRisk) ?? "medium", + evidenceRequired: try container.decodeIfPresent(Bool.self, forKey: .evidenceRequired) ?? true, + allowManualComplete: try container.decodeIfPresent(Bool.self, forKey: .allowManualComplete) ?? true, + isChecked: try container.decodeIfPresent(Bool.self, forKey: .isChecked) ?? false, + completionSource: try container.decodeIfPresent(ChecklistCompletionSource.self, forKey: .completionSource) ?? .pending + ) + } + + static func normalizedItemID(from name: String) -> String { + let lowered = name.lowercased() + let filtered = lowered.map { ch in + ch.isLetter || ch.isNumber ? ch : "_" + } + let collapsed = String(filtered) + .replacingOccurrences(of: "__+", with: "_", options: .regularExpression) + .trimmingCharacters(in: CharacterSet(charactersIn: "_")) + return collapsed.isEmpty ? UUID().uuidString.lowercased() : collapsed + } +} + +struct SpotterEvidenceDecision: Equatable { + let shouldAutoComplete: Bool + let sampleCount: Int + let positiveCount: Int + let averagePositiveConfidence: Double + let threshold: Double +} + +struct SpotterEvidenceWindow { + private struct Sample { + let matched: Bool + let confidence: Double + } + + var maxSamples = 5 + var minPositiveSamples = 3 + private var samplesByStepID: [String: [Sample]] = [:] + + mutating func record( + stepID: String, + matched: Bool, + autoComplete: Bool, + confidence: Double, + threshold: Double + ) -> SpotterEvidenceDecision { + let positive = matched && autoComplete && confidence >= threshold + var samples = samplesByStepID[stepID] ?? [] + samples.append(Sample(matched: positive, confidence: confidence)) + if samples.count > maxSamples { + samples = Array(samples.suffix(maxSamples)) + } + samplesByStepID[stepID] = samples + + let positives = samples.filter(\.matched) + let average = positives.isEmpty + ? 0 + : positives.reduce(0) { $0 + $1.confidence } / Double(positives.count) + let shouldAutoComplete = positives.count >= minPositiveSamples && average >= threshold + + return SpotterEvidenceDecision( + shouldAutoComplete: shouldAutoComplete, + sampleCount: samples.count, + positiveCount: positives.count, + averagePositiveConfidence: average, + threshold: threshold + ) + } + + mutating func reset(stepID: String) { + samplesByStepID[stepID] = nil + } + + mutating func resetAll() { + samplesByStepID.removeAll() + } +} + +private enum WorkerLiveLogger { + static func log( + _ event: String, + sessionID: String? = nil, + roomCode: String? = nil, + assetID: String? = nil, + assetType: String? = nil, + bucket: String? = nil, + path: String? = nil, + byteSize: Int? = nil, + retryCount: Int? = nil, + uploadState: String? = nil, + error: String? = nil, + durationMs: Double? = nil, + metricValue: Double? = nil, + metricUnit: String? = nil, + telemetry: WorkerTelemetry? = nil + ) { + let payload: [String: Any] = [ + "event": event, + "sessionId": sessionID ?? NSNull(), + "roomCode": roomCode ?? NSNull(), + "assetId": assetID ?? NSNull(), + "assetType": assetType ?? NSNull(), + "bucket": bucket ?? NSNull(), + "path": path ?? NSNull(), + "byteSize": byteSize ?? NSNull(), + "retryCount": retryCount ?? NSNull(), + "uploadState": uploadState ?? NSNull(), + "error": error ?? NSNull(), + "durationMs": durationMs ?? NSNull() + ] + + guard JSONSerialization.isValidJSONObject(payload), + let data = try? JSONSerialization.data(withJSONObject: payload, options: [.sortedKeys]), + let encoded = String(data: data, encoding: .utf8) + else { + NSLog("[worker-live] %@", event) + return + } + + NSLog("[worker-live] %@", encoded) + + guard let telemetry, let sessionID else { return } + Task { + await telemetry.record( + event, + source: telemetrySource(for: event), + stage: telemetryStage(for: event, uploadState: uploadState), + sessionID: sessionID, + durationMs: durationMs, + metricValue: metricValue, + metricUnit: metricUnit, + payload: payload + ) + } + } + + private static func telemetrySource(for event: String) -> String { + if event.contains("upload") || event.contains("finalize") || event == "retry_scheduled" { + return "media_upload" + } + if event.contains("heartbeat") { + return "ios_app" + } + return "ios_app" + } + + private static func telemetryStage(for event: String, uploadState: String?) -> String { + if event.contains("failure") || uploadState == "failed" { + return "failed" + } + if event.contains("success") || uploadState == "uploaded" { + return "uploaded" + } + if event.contains("target") { + return "target" + } + if event.contains("heartbeat") { + return "heartbeat" + } + if event == "retry_scheduled" { + return "retry" + } + return "point" + } +} + +struct WorkerMediaUploadResult: Equatable, Sendable { + let assetType: String + let assetID: String? + let bucket: String? + let path: String? + let byteSize: Int + let uploadState: String + let errorMessage: String? + + var succeeded: Bool { + uploadState == "uploaded" + } + + var isPending: Bool { + uploadState == "pending" + } +} + +struct WorkerPreparedMediaUpload: Equatable, Sendable { + let target: WorkerMediaUploadTarget + let result: WorkerMediaUploadResult +} + +actor WorkerAdminLiveSessionCoordinator { + typealias Sleeper = @Sendable (UInt64) async -> Void + typealias FileLoader = @Sendable (URL) async -> Data? + typealias HeartbeatResponseHandler = @Sendable (WorkerLiveHeartbeatResponse) async -> Void + typealias HeartbeatFailureHandler = @Sendable (_ sessionID: String, _ message: String) async -> Void + + private let api: WorkerAdminAPI + private let telemetry: WorkerTelemetry? + private let heartbeatIntervalNanoseconds: UInt64 + private let sleeper: Sleeper + private let fileLoader: FileLoader + private let onHeartbeatResponse: HeartbeatResponseHandler? + private let onHeartbeatFailure: HeartbeatFailureHandler? + + private var sessionID: String? + private var roomCode: String? + private var currentStepIndex: Int = 0 + private var helpRequested: Bool = false + private var lastFrameBucket: String? + private var lastFramePath: String? + private var heartbeatTask: Task? + private var queuedFrameData: Data? + private var frameUploadTask: Task? + + init( + api: WorkerAdminAPI, + sessionID: String? = nil, + heartbeatIntervalNanoseconds: UInt64 = 7_000_000_000, + telemetry: WorkerTelemetry? = nil, + onHeartbeatResponse: HeartbeatResponseHandler? = nil, + onHeartbeatFailure: HeartbeatFailureHandler? = nil, + sleeper: @escaping Sleeper = { nanoseconds in + guard nanoseconds > 0 else { return } + try? await Task.sleep(nanoseconds: nanoseconds) + }, + fileLoader: @escaping FileLoader = { url in + await Task.detached(priority: .utility) { + try? Data(contentsOf: url) + }.value + } + ) { + self.api = api + self.telemetry = telemetry + self.sessionID = sessionID + self.heartbeatIntervalNanoseconds = heartbeatIntervalNanoseconds + self.sleeper = sleeper + self.fileLoader = fileLoader + self.onHeartbeatResponse = onHeartbeatResponse + self.onHeartbeatFailure = onHeartbeatFailure + } + + func start( + sessionID: String, + currentStepIndex: Int, + helpRequested: Bool, + roomCode: String? = nil + ) async { + self.sessionID = sessionID + self.currentStepIndex = currentStepIndex + self.helpRequested = helpRequested + await telemetry?.configure( + api: api, + sessionID: sessionID, + deviceID: GeminiConfig.deviceID + ) + await telemetry?.record( + "session_start", + source: "ios_app", + stage: "started", + sessionID: sessionID, + payload: [ + "current_step_index": currentStepIndex, + "help_requested": helpRequested, + "room_code_present": Self.trimmed(roomCode) != nil + ] + ) + if let roomCode = Self.trimmed(roomCode) { + self.roomCode = roomCode + } + + if heartbeatTask == nil, heartbeatIntervalNanoseconds > 0 { + heartbeatTask = Task { [heartbeatIntervalNanoseconds] in + while !Task.isCancelled { + await self.sleeper(heartbeatIntervalNanoseconds) + if Task.isCancelled { break } + await self.sendHeartbeat() + } + } + } + + await sendHeartbeat() + } + + func updateRoomCode(_ roomCode: String?, sendImmediateHeartbeat: Bool = true) async { + guard let roomCode = Self.trimmed(roomCode) else { return } + self.roomCode = roomCode + if sendImmediateHeartbeat { + await sendHeartbeat() + } + } + + func updateCurrentStepIndex(_ currentStepIndex: Int, sendImmediateHeartbeat: Bool = false) async { + self.currentStepIndex = currentStepIndex + if sendImmediateHeartbeat { + await sendHeartbeat() + } + } + + func updateHelpRequested(_ helpRequested: Bool, sendImmediateHeartbeat: Bool = true) async { + self.helpRequested = helpRequested + if sendImmediateHeartbeat { + await sendHeartbeat() + } + } + + func enqueueFrameUpload(data: Data) async { + queuedFrameData = data + if let sessionID { + await telemetry?.record( + "frame_enqueued", + source: "media_upload", + stage: "queued", + sessionID: sessionID, + metricValue: Double(data.count), + metricUnit: "bytes", + payload: [ + "bytes": data.count, + "latest_frame_only": true + ] + ) + } + guard frameUploadTask == nil else { return } + frameUploadTask = Task { + await self.drainQueuedFrames() + } + } + + func uploadVideoRecording( + from fileURL: URL?, + source: String = "session-recording" + ) async -> WorkerMediaUploadResult { + guard let preparedUpload = await prepareVideoRecordingUpload(source: source) else { + return videoUploadFailureResult(errorMessage: "Recording upload target could not be prepared.") + } + + return await uploadPreparedVideoRecording(from: fileURL, preparedUpload: preparedUpload) + } + + func prepareVideoRecordingUpload( + source: String = "session-recording" + ) async -> WorkerPreparedMediaUpload? { + guard let sessionID else { + return nil + } + + let targetStartedAt = CACurrentMediaTime() + + do { + let target = try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetType: "video", + byteSize: 0, + uploadState: "pending" + ) { + try await api.requestWorkerMediaUploadTarget( + sessionID: sessionID, + assetType: "video", + filename: "recording.mp4", + contentType: "video/mp4", + byteSize: 0, + source: source + ) + } + + WorkerLiveLogger.log( + "video_upload_target", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: "video", + bucket: target.bucket, + path: target.path, + byteSize: 0, + uploadState: "pending", + durationMs: (CACurrentMediaTime() - targetStartedAt) * 1000, + telemetry: telemetry + ) + + return WorkerPreparedMediaUpload( + target: target, + result: WorkerMediaUploadResult( + assetType: "video", + assetID: target.assetID, + bucket: target.bucket, + path: target.path, + byteSize: 0, + uploadState: "pending", + errorMessage: nil + ) + ) + } catch { + WorkerLiveLogger.log( + "video_upload_target_failure", + sessionID: sessionID, + roomCode: roomCode, + assetType: "video", + byteSize: 0, + uploadState: "failed", + error: error.localizedDescription, + durationMs: (CACurrentMediaTime() - targetStartedAt) * 1000, + telemetry: telemetry + ) + return nil + } + } + + func uploadPreparedVideoRecording( + from fileURL: URL?, + preparedUpload: WorkerPreparedMediaUpload + ) async -> WorkerMediaUploadResult { + guard let sessionID else { + return videoUploadFailureResult(errorMessage: "Recording upload session is missing.") + } + + let byteSize: Int + let data: Data? + let missingDataError: String + + if let fileURL { + data = await fileLoader(fileURL) + if let data { + byteSize = data.count + missingDataError = data.isEmpty ? "Recording file is empty." : "Recording file could not be loaded." + } else { + byteSize = 0 + missingDataError = "Recording file could not be loaded." + } + } else { + data = nil + byteSize = 0 + missingDataError = "Recording file was not created because no video frames were recorded." + } + + guard let data, !data.isEmpty else { + WorkerLiveLogger.log( + "recording_missing", + sessionID: sessionID, + roomCode: roomCode, + assetID: preparedUpload.target.assetID, + assetType: "video", + bucket: preparedUpload.target.bucket, + path: preparedUpload.target.path, + byteSize: byteSize, + uploadState: "failed", + error: missingDataError, + telemetry: telemetry + ) + + return await finalizeFailure( + logPrefix: "video", + sessionID: sessionID, + assetType: "video", + target: preparedUpload.target, + byteSize: byteSize, + errorMessage: missingDataError + ) + } + + return await uploadPreparedAsset( + assetType: "video", + contentType: "video/mp4", + target: preparedUpload.target, + data: data, + byteSize: byteSize, + missingDataError: missingDataError + ) + } + + func completeSession( + pendingVideoUpload: WorkerPreparedMediaUpload?, + onBeforeMarkEnded: () async -> Void + ) async -> WorkerMediaUploadResult { + let result = pendingVideoUpload?.result + ?? videoUploadFailureResult(errorMessage: "Recording upload target could not be prepared.") + + queuedFrameData = nil + frameUploadTask?.cancel() + frameUploadTask = nil + heartbeatTask?.cancel() + heartbeatTask = nil + + await onBeforeMarkEnded() + await telemetry?.record( + "session_end_requested", + source: "ios_app", + stage: result.isPending ? "pending" : result.succeeded ? "uploaded" : "failed", + sessionID: sessionID, + payload: [ + "video_upload_state": result.uploadState, + "video_bytes": result.byteSize, + "error": result.errorMessage ?? NSNull() + ] + ) + await telemetry?.flush() + return result + } + + func flushTelemetryAndStop() async { + await telemetry?.flushAndStop() + } + + func stop() async { + queuedFrameData = nil + frameUploadTask?.cancel() + frameUploadTask = nil + heartbeatTask?.cancel() + heartbeatTask = nil + if let sessionID { + await telemetry?.record( + "session_stop", + source: "ios_app", + stage: "stopped", + sessionID: sessionID + ) + await telemetry?.flushAndStop() + } + } + + private func sendHeartbeat() async { + guard let sessionID else { return } + + let heartbeat = WorkerLiveHeartbeatRequest( + sessionID: sessionID, + webrtcRoomCode: roomCode, + currentStepIndex: currentStepIndex, + helpRequested: helpRequested, + status: "active", + lastFrameBucket: lastFrameBucket, + lastFramePath: lastFramePath + ) + + WorkerLiveLogger.log( + "heartbeat_sent", + sessionID: sessionID, + roomCode: roomCode, + bucket: lastFrameBucket, + path: lastFramePath, + uploadState: "active", + telemetry: telemetry + ) + + do { + let heartbeatStartedAt = CACurrentMediaTime() + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetType: nil, + bucket: lastFrameBucket, + path: lastFramePath, + uploadState: "active" + ) { + let response = try await api.sendWorkerLiveHeartbeat(heartbeat) + await onHeartbeatResponse?(response) + } + + WorkerLiveLogger.log( + "heartbeat_result", + sessionID: sessionID, + roomCode: roomCode, + bucket: lastFrameBucket, + path: lastFramePath, + uploadState: "active", + durationMs: (CACurrentMediaTime() - heartbeatStartedAt) * 1000, + telemetry: telemetry + ) + } catch { + let message = error.localizedDescription + WorkerLiveLogger.log( + "heartbeat_result", + sessionID: sessionID, + roomCode: roomCode, + bucket: lastFrameBucket, + path: lastFramePath, + uploadState: "active", + error: message, + telemetry: telemetry + ) + await onHeartbeatFailure?(sessionID, message) + } + } + + private func drainQueuedFrames() async { + while !Task.isCancelled { + guard let frameData = queuedFrameData else { break } + queuedFrameData = nil + + let result = await uploadAsset( + assetType: "frame", + filename: "last-frame.jpg", + contentType: "image/jpeg", + data: frameData, + byteSize: frameData.count, + missingDataError: "Frame JPEG data was empty.", + source: "live-preview" + ) + + if result.succeeded { + lastFrameBucket = result.bucket + lastFramePath = result.path + if !Task.isCancelled { + await sendHeartbeat() + } + } + } + + frameUploadTask = nil + if queuedFrameData != nil, !Task.isCancelled { + frameUploadTask = Task { + await self.drainQueuedFrames() + } + } + } + + private func uploadAsset( + assetType: String, + filename: String, + contentType: String, + data: Data?, + byteSize: Int, + missingDataError: String, + source: String? = nil + ) async -> WorkerMediaUploadResult { + guard let sessionID else { + return WorkerMediaUploadResult( + assetType: assetType, + assetID: nil, + bucket: nil, + path: nil, + byteSize: byteSize, + uploadState: "failed", + errorMessage: "Session ID missing." + ) + } + + let logPrefix = assetType == "frame" ? "frame" : "video" + let assetUploadStartedAt = CACurrentMediaTime() + + do { + let targetStartedAt = CACurrentMediaTime() + let target = try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetType: assetType, + byteSize: byteSize, + uploadState: "pending" + ) { + try await api.requestWorkerMediaUploadTarget( + sessionID: sessionID, + assetType: assetType, + filename: filename, + contentType: contentType, + byteSize: byteSize, + source: source + ) + } + + WorkerLiveLogger.log( + "\(logPrefix)_upload_target", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "pending", + durationMs: (CACurrentMediaTime() - targetStartedAt) * 1000, + telemetry: telemetry + ) + + guard let data, !data.isEmpty else { + WorkerLiveLogger.log( + "\(logPrefix)_upload_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: missingDataError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: missingDataError + ) + } + + do { + let binaryUploadStartedAt = CACurrentMediaTime() + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "pending" + ) { + try await api.uploadBinary(to: target, data: data, contentType: contentType) + } + + WorkerLiveLogger.log( + "\(logPrefix)_upload_success", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "pending", + durationMs: (CACurrentMediaTime() - binaryUploadStartedAt) * 1000, + metricValue: Double(byteSize), + metricUnit: "bytes", + telemetry: telemetry + ) + + do { + let finalizeStartedAt = CACurrentMediaTime() + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded" + ) { + try await api.finalizeWorkerMediaUpload( + WorkerMediaFinalizeRequest( + assetID: target.assetID, + sessionID: sessionID, + bucket: target.bucket, + path: target.path, + status: "uploaded", + byteSize: byteSize, + error: nil + ) + ) + } + + WorkerLiveLogger.log( + "\(logPrefix)_finalize_success", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + durationMs: (CACurrentMediaTime() - finalizeStartedAt) * 1000, + metricValue: Double(byteSize), + metricUnit: "bytes", + telemetry: telemetry + ) + + return WorkerMediaUploadResult( + assetType: assetType, + assetID: target.assetID, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + errorMessage: nil + ) + } catch { + let finalizeError = "Finalize uploaded failed: \(error.localizedDescription)" + WorkerLiveLogger.log( + "\(logPrefix)_finalize_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + error: finalizeError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: finalizeError + ) + } + } catch { + let uploadError = error.localizedDescription + WorkerLiveLogger.log( + "\(logPrefix)_upload_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: uploadError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: uploadError + ) + } + } catch { + WorkerLiveLogger.log( + "\(logPrefix)_upload_failure", + sessionID: sessionID, + roomCode: roomCode, + assetType: assetType, + byteSize: byteSize, + uploadState: "failed", + error: error.localizedDescription, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + + return WorkerMediaUploadResult( + assetType: assetType, + assetID: nil, + bucket: nil, + path: nil, + byteSize: byteSize, + uploadState: "failed", + errorMessage: error.localizedDescription + ) + } + } + + private func uploadPreparedAsset( + assetType: String, + contentType: String, + target: WorkerMediaUploadTarget, + data: Data?, + byteSize: Int, + missingDataError: String + ) async -> WorkerMediaUploadResult { + guard let sessionID else { + return WorkerMediaUploadResult( + assetType: assetType, + assetID: target.assetID, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + errorMessage: "Session ID missing." + ) + } + + let logPrefix = assetType == "frame" ? "frame" : "video" + let assetUploadStartedAt = CACurrentMediaTime() + + guard let data, !data.isEmpty else { + WorkerLiveLogger.log( + "\(logPrefix)_upload_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: missingDataError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: missingDataError + ) + } + + do { + let binaryUploadStartedAt = CACurrentMediaTime() + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "pending" + ) { + try await api.uploadBinary(to: target, data: data, contentType: contentType) + } + + WorkerLiveLogger.log( + "\(logPrefix)_upload_success", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "pending", + durationMs: (CACurrentMediaTime() - binaryUploadStartedAt) * 1000, + metricValue: Double(byteSize), + metricUnit: "bytes", + telemetry: telemetry + ) + + do { + let finalizeStartedAt = CACurrentMediaTime() + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded" + ) { + try await api.finalizeWorkerMediaUpload( + WorkerMediaFinalizeRequest( + assetID: target.assetID, + sessionID: sessionID, + bucket: target.bucket, + path: target.path, + status: "uploaded", + byteSize: byteSize, + error: nil + ) + ) + } + + WorkerLiveLogger.log( + "\(logPrefix)_finalize_success", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + durationMs: (CACurrentMediaTime() - finalizeStartedAt) * 1000, + metricValue: Double(byteSize), + metricUnit: "bytes", + telemetry: telemetry + ) + + return WorkerMediaUploadResult( + assetType: assetType, + assetID: target.assetID, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + errorMessage: nil + ) + } catch { + let finalizeError = "Finalize uploaded failed: \(error.localizedDescription)" + WorkerLiveLogger.log( + "\(logPrefix)_finalize_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "uploaded", + error: finalizeError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: finalizeError + ) + } + } catch { + let uploadError = error.localizedDescription + WorkerLiveLogger.log( + "\(logPrefix)_upload_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: uploadError, + durationMs: (CACurrentMediaTime() - assetUploadStartedAt) * 1000, + telemetry: telemetry + ) + return await finalizeFailure( + logPrefix: logPrefix, + sessionID: sessionID, + assetType: assetType, + target: target, + byteSize: byteSize, + errorMessage: uploadError + ) + } + } + + private func videoUploadFailureResult(errorMessage: String) -> WorkerMediaUploadResult { + WorkerMediaUploadResult( + assetType: "video", + assetID: nil, + bucket: nil, + path: nil, + byteSize: 0, + uploadState: "failed", + errorMessage: errorMessage + ) + } + + private func finalizeFailure( + logPrefix: String, + sessionID: String, + assetType: String, + target: WorkerMediaUploadTarget, + byteSize: Int, + errorMessage: String + ) async -> WorkerMediaUploadResult { + do { + try await retry( + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed" + ) { + try await api.finalizeWorkerMediaUpload( + WorkerMediaFinalizeRequest( + assetID: target.assetID, + sessionID: sessionID, + bucket: target.bucket, + path: target.path, + status: "failed", + byteSize: byteSize, + error: errorMessage + ) + ) + } + + WorkerLiveLogger.log( + "\(logPrefix)_finalize_success", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: errorMessage, + telemetry: telemetry + ) + } catch { + WorkerLiveLogger.log( + "\(logPrefix)_finalize_failure", + sessionID: sessionID, + roomCode: roomCode, + assetID: target.assetID, + assetType: assetType, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + error: error.localizedDescription, + telemetry: telemetry + ) + } + + return WorkerMediaUploadResult( + assetType: assetType, + assetID: target.assetID, + bucket: target.bucket, + path: target.path, + byteSize: byteSize, + uploadState: "failed", + errorMessage: errorMessage + ) + } + + private func retry( + sessionID: String?, + roomCode: String?, + assetID: String? = nil, + assetType: String?, + bucket: String? = nil, + path: String? = nil, + byteSize: Int? = nil, + uploadState: String? = nil, + operation: () async throws -> T + ) async throws -> T { + let backoffSchedule: [UInt64] = [750_000_000, 1_500_000_000, 3_000_000_000] + var attempt = 0 + + while true { + do { + return try await operation() + } catch { + guard !Task.isCancelled, attempt < backoffSchedule.count, Self.isTransient(error) else { + throw error + } + + let retryCount = attempt + 1 + WorkerLiveLogger.log( + "retry_scheduled", + sessionID: sessionID, + roomCode: roomCode, + assetID: assetID, + assetType: assetType, + bucket: bucket, + path: path, + byteSize: byteSize, + retryCount: retryCount, + uploadState: uploadState, + error: error.localizedDescription, + telemetry: telemetry + ) + + await sleeper(backoffSchedule[attempt]) + attempt += 1 + } + } + } + + private static func trimmed(_ value: String?) -> String? { + guard let value = value?.trimmingCharacters(in: .whitespacesAndNewlines), + !value.isEmpty + else { + return nil + } + return value + } + + private static func isTransient(_ error: Error) -> Bool { + if error is CancellationError { + return false + } + + if let urlError = error as? URLError { + switch urlError.code { + case .timedOut, + .cannotFindHost, + .cannotConnectToHost, + .networkConnectionLost, + .dnsLookupFailed, + .notConnectedToInternet, + .resourceUnavailable, + .dataNotAllowed, + .callIsActive, + .internationalRoamingOff: + return true + default: + return false + } + } + + if let opsError = error as? OpsAPIError { + switch opsError { + case .invalidResponse: + return true + case .server(let statusCode, _): + return [408, 409, 425, 429, 500, 502, 503, 504].contains(statusCode) + case .notConfigured, .invalidURL, .missingWorkerSession, .missingWorkerBearerToken: + return false + } + } + + return false + } +} + +struct ShippedSessionRecord: Identifiable, Codable { + let id: UUID + let timestamp: Date + let sopName: String + let status: String + + init(id: UUID = UUID(), timestamp: Date, sopName: String, status: String) { + self.id = id + self.timestamp = timestamp + self.sopName = sopName + self.status = status + } + + var timestampText: String { + let formatter = DateFormatter() + formatter.dateStyle = .medium + formatter.timeStyle = .medium + return formatter.string(from: timestamp) + } +} + +private struct PendingWorkerRecording: Codable, Equatable { + let sessionID: String + let filePath: String +} + +private enum PendingWorkerRecordingStore { + static func remember(defaultsKey: String, sessionID: String, fileURL: URL) { + let pending = PendingWorkerRecording(sessionID: sessionID, filePath: fileURL.path) + guard let encoded = try? JSONEncoder().encode(pending) else { return } + UserDefaults.standard.set(encoded, forKey: defaultsKey) + } + + static func clear(defaultsKey: String) { + UserDefaults.standard.removeObject(forKey: defaultsKey) + } +} + +private struct IPhoneAnalysisFrameEnvelope: @unchecked Sendable { + let image: UIImage + let shouldRecordAudit: Bool + let enqueuedAt: CFTimeInterval +} + +private struct SendableStreamPixelBuffer: @unchecked Sendable { + let pixelBuffer: CVPixelBuffer +} + +private struct SendableSampleBuffer: @unchecked Sendable { + let sampleBuffer: CMSampleBuffer +} + +private struct SendableDecodedVideoFrame: @unchecked Sendable { + let pixelBuffer: CVPixelBuffer + let presentationTimeStamp: CMTime + + init(_ frame: VideoDecoder.DecodedFrame) { + pixelBuffer = frame.pixelBuffer + presentationTimeStamp = frame.presentationTimeStamp + } +} + +private final class StreamPixelBufferImageRenderer: @unchecked Sendable { + private let context = CIContext(options: [.useSoftwareRenderer: true]) + + func makeUIImage(from pixelBuffer: CVPixelBuffer) -> UIImage? { + let width = CVPixelBufferGetWidth(pixelBuffer) + let height = CVPixelBufferGetHeight(pixelBuffer) + guard width > 0, height > 0 else { return nil } + + let ciImage = CIImage(cvPixelBuffer: pixelBuffer) + let rect = CGRect(x: 0, y: 0, width: width, height: height) + guard let cgImage = context.createCGImage(ciImage, from: rect) else { return nil } + return UIImage(cgImage: cgImage) + } +} + +private final class GlassesVideoDecodeLane: @unchecked Sendable { + private let queue = DispatchQueue( + label: "visionclaw.glasses-video-decode-lane", + qos: .userInitiated + ) + private let decoder = VideoDecoder() + private var onFrameDecoded: (@Sendable (SendableDecodedVideoFrame) -> Void)? + + init() { + decoder.setFrameCallback { [weak self] frame in + self?.onFrameDecoded?(SendableDecodedVideoFrame(frame)) + } + } + + func setFrameCallback(_ callback: @escaping @Sendable (SendableDecodedVideoFrame) -> Void) { + queue.sync { + self.onFrameDecoded = callback + } + } + + func decode(_ sampleBuffer: CMSampleBuffer, onError: @escaping @Sendable (String) -> Void) { + let sendableSampleBuffer = SendableSampleBuffer(sampleBuffer: sampleBuffer) + queue.async { + do { + try self.decoder.decode(sendableSampleBuffer.sampleBuffer) + } catch { + onError(String(describing: error)) + } + } + } + + func invalidateSession() { + queue.async { + self.decoder.invalidateSession() + } + } +} + +private final class IPhoneAnalysisLane: @unchecked Sendable { + private let queue = DispatchQueue( + label: "visionclaw.iphone.analysis-lane", + qos: .userInitiated + ) + private var pendingFrame: IPhoneAnalysisFrameEnvelope? + private var isProcessing = false + private var submittedCount: Int64 = 0 + private var processedCount: Int64 = 0 + private var droppedCount: Int64 = 0 + + var onFrameReady: (@Sendable (IPhoneAnalysisFrameEnvelope, @escaping @Sendable () -> Void) -> Void)? + + func submit(_ image: UIImage, shouldRecordAudit: Bool) { + queue.async { + self.submittedCount += 1 + + if self.pendingFrame != nil { + self.droppedCount += 1 + } + + self.pendingFrame = IPhoneAnalysisFrameEnvelope( + image: image, + shouldRecordAudit: shouldRecordAudit, + enqueuedAt: CACurrentMediaTime() + ) + + guard !self.isProcessing else { return } + self.isProcessing = true + self.drain() + } + } + + func reset() { + queue.async { + self.pendingFrame = nil + self.isProcessing = false + self.submittedCount = 0 + self.processedCount = 0 + self.droppedCount = 0 + } + } + + private func drain() { + guard let frame = pendingFrame else { + isProcessing = false + return + } + + pendingFrame = nil + let completion: @Sendable () -> Void = { [weak self] in + guard let self else { return } + self.queue.async { + self.processedCount += 1 + self.logIfNeeded(lastLatencyMs: (CACurrentMediaTime() - frame.enqueuedAt) * 1000) + self.drain() + } + } + + if let onFrameReady { + onFrameReady(frame, completion) + } else { + completion() + } + } + + private func logIfNeeded(lastLatencyMs: Double) { + guard processedCount == 1 || processedCount % 20 == 0 else { return } + let queueDepth = pendingFrame == nil ? 0 : 1 + NSLog( + "[Stream] iPhone analysis lane processed=%lld dropped=%lld queue-depth=%d last-latency=%.1fms", + processedCount, + droppedCount, + queueDepth, + lastLatencyMs + ) + } +} + +private final class LivePreviewFrameEncoder: @unchecked Sendable { + private let queue = DispatchQueue( + label: "visionclaw.live-preview-frame-encoder", + qos: .utility + ) + + func encode( + image: UIImage, + maxDimension: CGFloat, + compressionQuality: CGFloat + ) async -> Data? { + await withCheckedContinuation { continuation in + queue.async { + let previewImage = image.resizedForLivePreview(maxDimension: maxDimension) + continuation.resume(returning: previewImage.jpegData(compressionQuality: compressionQuality)) + } + } + } +} + @MainActor class StreamSessionViewModel: ObservableObject { @Published var currentVideoFrame: UIImage? @@ -43,323 +2870,3906 @@ class StreamSessionViewModel: ObservableObject { @Published var hasActiveDevice: Bool = false @Published var streamingMode: StreamingMode = .glasses @Published var selectedResolution: StreamingResolution = .low + @Published var preferredCaptureMode: StreamingMode = .iPhone + @Published var isSopAuditRunning: Bool = false + @Published var sopAuditSecondsRemaining: Double = 15.0 + @Published var sopAuditStatusMessage: String = "" + @Published var selectedSOP: SOPTemplate? + @Published var checklistItems: [ChecklistItemState] = [] + @Published var shouldDismissCapture: Bool = false + @Published var showShipSuccessToast: Bool = false + @Published var isListeningForVoice: Bool = false + @Published var isAiGuideStarting: Bool = false + @Published var isStepValidationRunning: Bool = false + @Published var aiGuideStatusMessage: String = "" + @Published var isDossierUploading: Bool = false + @Published var isSwitchingCaptureMode: Bool = false + @Published var dossierPipelineStatusMessage: String = "" + @Published var dossierPipelineStatusKind: DossierPipelineStatusKind = .info + @Published var dossierPipelineStatusTimestamp: String = "" + @Published var dossierSpotterHitCount: Int = 0 + @Published var shippedHistory: [ShippedSessionRecord] = [] + @Published var isSyncingOperations: Bool = false + @Published var operationsSyncError: String? + @Published var operationsSyncWarning: String? + @Published var workerProfile: BackendWorker? + @Published var registeredDevice: BackendDevice? + @Published var activeShift: BackendShift? + @Published var assignedPackages: [BackendAssignedPackage] = [] + @Published var activeExecutionSession: BackendExecutionSession? + @Published var helpRequestNotes: String = "" + @Published var helpStatusMessage: String = "" + @Published var isRequestingHelp: Bool = false + @Published var packageClosureStatusMessage: String = "" + @Published var isClosingPackage: Bool = false + @Published var activeCaptureSOP: SOPTemplate? + @Published var geminiInstructionSyncStatus: String = "" + @Published private(set) var guidancePolicy: GuidancePolicy = .nextInstruction + @Published private(set) var guidancePolicyReason: String = "Start the assigned SOP." + @Published var iPhonePreviewSession: AVCaptureSession? + + @Published var availableSOPs: [SOPTemplate] = [] + @Published private(set) var locallyCompletedPendingTaskKeys: Set = [] + + var isStreaming: Bool { + streamingStatus != .stopped + } + + var resolutionLabel: String { + switch selectedResolution { + case .low: return "360x640" + case .medium: return "504x896" + case .high: return "720x1280" + @unknown default: return "Unknown" + } + } + + var progressText: String { + "\(checklistItems.filter { $0.isChecked }.count)/\(checklistItems.count)" + } + + var currentAssignedSOP: SOPTemplate? { + if let selectedSOP, pendingTaskSOPs.contains(selectedSOP) { + return selectedSOP + } + return pendingTaskSOPs.first + } + + var workerDisplayName: String { + if isDemoWorkerMode { + return "Lucas Pereira" + } + return workerProfile?.displayName ?? "Unassigned Worker" + } + + var workerRoleText: String { + workerProfile?.role?.uppercased() ?? "WORKER" + } + + var activePackageTitle: String { + assignedPackages.first?.title + ?? activeShift?.package?.title + ?? currentAssignedSOP?.packageTitle + ?? "No Active Package" + } + + var pendingTaskSOPs: [SOPTemplate] { + availableSOPs + .sorted { $0.sortOrder < $1.sortOrder } + .filter { !locallyCompletedPendingTaskKeys.contains(pendingTaskKey(for: $0)) } + } + + var pendingShiftLabel: String { + activeShift?.shiftName + ?? currentAssignedSOP?.shiftName + ?? "MORNING SHIFT" + } + + var pendingTaskHeaderSummary: String { + let count = pendingTaskSOPs.count + if count == 0 { + return "ALL PENDING TASKS COMPLETE" + } + return "\(count) PENDING TASK\(count == 1 ? "" : "S") · \(selectedCaptureModeLabel)" + } + + var activeAssignedPackageCount: Int { + assignedPackages.count + } + + var currentPackageProgressText: String { + guard let key = currentPackageCompletionKey, !currentPackageRequiredRemoteIDs.isEmpty else { + return availableSOPs.isEmpty ? "NO PACKAGE QUEUE" : "\(availableSOPs.count) SOPS QUEUED" + } + + let completedCount = locallyCompletedSopsByPackageKey[key]?.count ?? 0 + return "\(completedCount)/\(currentPackageRequiredRemoteIDs.count) PACKAGE SOPS COMPLETE" + } + + var currentSessionSyncLabel: String { + if let activeExecutionSession { + return "SESSION \(activeExecutionSession.id.prefix(8))" + } + if currentSopSessionId != nil { + return "LOCAL SESSION" + } + return "NOT STARTED" + } + + var canRequestHelp: Bool { + isSopAuditRunning + } + + var canTapBackOfficeCall: Bool { + canRequestHelp && !isRequestingHelp && !hasActiveHelpEscalation + } + + var backOfficeCallButtonTitle: String { + if webrtcViewModel.isActive { + return "LIVE" + } + if isRequestingHelp { + return "CALLING" + } + if hasActiveHelpEscalation { + return "RINGING" + } + return "CALL" + } + + var canCloseCurrentPackage: Bool { + guard activePackageRunID != nil else { return false } + guard let key = currentPackageCompletionKey, !currentPackageRequiredRemoteIDs.isEmpty else { return false } + let completed = locallyCompletedSopsByPackageKey[key] ?? [] + return Set(currentPackageRequiredRemoteIDs).isSubset(of: completed) + } + + var selectedCaptureModeLabel: String { + switch preferredCaptureMode { + case .glasses: return "META CAMERA" + case .iPhone: return "IPHONE CAMERA" + } + } + + var currentAssignmentSubtitle: String { + guard let sop = currentAssignedSOP else { return "No assignment loaded" } + let package = sop.packageTitle ?? activePackageTitle + let stepCount = sop.steps.count + return "\(package) · \(stepCount) STEP\(stepCount == 1 ? "" : "S")" + } + + var assignmentQueueSummary: String { + let count = pendingTaskSOPs.count + if count == 0 { + return "Queue clear" + } + if count == 1 { + return "1 assignment ready" + } + return "\(count) assignments ready" + } + + var cameraReadinessLabel: String { + hasActiveDevice ? "Meta camera ready" : "iPhone camera ready" + } + + var cameraReadinessDetail: String { + hasActiveDevice + ? "Glasses will be used for the next execution." + : "Using iPhone until glasses are available." + } + + var aiGuideButtonTitle: String { + if isAiGuideStarting { + return "STARTING AI" + } + if geminiAssistant.isGeminiActive && geminiAssistant.isAudioReady { + return "AI LISTENING" + } + if geminiAssistant.isGeminiActive { + return "AI CONNECTING" + } + return "RESUME AI" + } + + var canToggleAiGuide: Bool { + isSopAuditRunning && !isAiGuideStarting && !hasActiveHelpEscalation + } + + var canSwitchCaptureMode: Bool { + !isSwitchingCaptureMode && + !isRequestingHelp && + !webrtcViewModel.isActive && + !isDossierUploading && + !isFinalizingAndShipping + } + + var canRequestStepValidation: Bool { + isSopAuditRunning && !isStepValidationRunning && !hasActiveHelpEscalation + } + + // Photo capture properties + @Published var capturedPhoto: UIImage? + @Published var showPhotoPreview: Bool = false + + // Operational backend integration + private let opsAPIClient = OpsAPIClient() + let geminiAssistant = GeminiSessionViewModel() + private let geminiLiveSpotter = GeminiLiveSpotter() + let webrtcViewModel = WebRTCSessionViewModel() + private var workerAdminSync: WorkerAdminLiveSessionCoordinator? + private var currentSopSessionId: String? + private var sopCountdownTask: Task? + private var sopVideoRecorder: SopVideoRecorder? + private let liveFrameProcessingQueue = DispatchQueue( + label: "stream.live.frame-processing", + qos: .userInitiated + ) + private var proofImagesByTargetID: [String: Data] = [:] + private var spotterEvidenceWindow = SpotterEvidenceWindow() + private var lastSpotterInferenceTime: Date = .distantPast + private var currentStepBecameActiveAt: Date = Date() + private var isSpotterInferenceInFlight = false + private var isFinalizingAndShipping = false + private var successToastTask: Task? + private var hasLoadedWorkerContext = false + private var hasEnteredWorkerHome = false + private var isUsingLocalSessionFallback = false + private var roomCodeCancellable: AnyCancellable? + private var connectionStateCancellable: AnyCancellable? + private var livePressureCancellable: AnyCancellable? + private var locallyCompletedSopsByPackageKey: [String: Set] = [:] + private var lastLivePreviewSyncAt: Date = .distantPast + private var hasActiveHelpEscalation = false + private struct LiveRoomSyncSnapshot: Equatable { + let roomCode: String + let helpRequested: Bool + let backOfficeConnected: Bool + } + private var lastLiveRoomSyncSnapshot: LiveRoomSyncSnapshot? + private var lastLiveRoomSyncAt: Date = .distantPast + private let liveRoomRedundantSyncThrottleInterval: TimeInterval = 5 + private var hasReceivedBackOfficeConnectedHandshake = false + private var hasLoggedRoomCreatedForSession = false + private var hasLoggedRoomJoinedForSession = false + private var didAttemptPendingRecordingRecovery = false + private var shouldResumeAiSupportAfterBackOffice = false + private var isLiveRoomHandoffInProgress = false + private var lastWorkerLiveHeartbeatFailureWarningAt: Date = .distantPast + + private var isDemoWorkerMode: Bool { + let configuredCode = GeminiConfig.workerLoginCode.trimmingCharacters(in: .whitespacesAndNewlines) + let workerCode = workerProfile?.loginCode?.trimmingCharacters(in: .whitespacesAndNewlines) + let loginCode = workerCode?.isEmpty == false ? workerCode! : configuredCode + return loginCode.uppercased() == "EMBC-0001" + } + + private func pendingTaskKey(for sop: SOPTemplate) -> String { + [ + sop.shiftID ?? "shift", + sop.packageRunID ?? sop.packageID ?? "standalone", + sop.remoteID ?? sop.id.uuidString, + "\(sop.sortOrder)" + ].joined(separator: "::") + } + + // Hold-to-talk speech recognition + private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) + private let audioEngine = AVAudioEngine() + private var speechRequest: SFSpeechAudioBufferRecognitionRequest? + private var speechTask: SFSpeechRecognitionTask? + private var lastProcessedTranscript: String = "" + private var lastAiCommandKey: String = "" + private var lastAiCommandAt: Date = .distantPast + + private var currentPackageCompletionKey: String? { + if let runID = activePackageRunID { + return runID + } + return currentAssignedSOP?.packageID ?? assignedPackages.first?.id + } + + private var activePackageRunID: String? { + currentAssignedSOP?.packageRunID + ?? assignedPackages.first(where: { $0.id == currentAssignedSOP?.packageID })?.packageRunID + ?? assignedPackages.first?.packageRunID + } + + private var currentPackageRequiredRemoteIDs: [String] { + let packageID = currentAssignedSOP?.packageID ?? assignedPackages.first?.id + return availableSOPs + .filter { sop in + sop.required && + sop.packageID == packageID && + sop.sourceType == "package" + } + .compactMap(\.remoteID) + } + + private let historyDefaultsKey = "visionclaw.shipped.history.v2" + private let pendingRecordingDefaultsKey = "visionclaw.pending.worker.recording.v1" + private static let pipelineTimestampFormatter: DateFormatter = { + let formatter = DateFormatter() + formatter.locale = Locale(identifier: "en_US_POSIX") + formatter.dateFormat = "HH:mm:ss" + return formatter + }() + + // The core DAT SDK StreamSession - handles all streaming operations + private var streamSession: StreamSession + // Listener tokens are used to manage DAT SDK event subscriptions + private var stateListenerToken: AnyListenerToken? + private var videoFrameListenerToken: AnyListenerToken? + private var errorListenerToken: AnyListenerToken? + private var photoDataListenerToken: AnyListenerToken? + private let wearables: WearablesInterface + private let deviceSelector: AutoDeviceSelector + private var deviceMonitorTask: Task? + private var iPhoneCameraManager: IPhoneCameraManager? + private var conversationAudioRecorder: ConversationAudioRecorder? + private var holdToTalkAudioLease: WorkerAudioRouteLease? + private var viewerAudioRouteLease: WorkerAudioRouteLease? + private let iPhoneAnalysisLane = IPhoneAnalysisLane() + private let livePreviewFrameEncoder = LivePreviewFrameEncoder() + private let streamImageRenderer = StreamPixelBufferImageRenderer() + private let videoDecodeLane = GlassesVideoDecodeLane() + + private var backgroundFrameCount = 0 + private var bgDiagLogged = false + private var lastGlassesAnalysisFrameQueuedAt: CFTimeInterval = 0 + + init(wearables: WearablesInterface) { + self.wearables = wearables + // Let the SDK auto-select from available devices + self.deviceSelector = AutoDeviceSelector(wearables: wearables) + let config = StreamSessionConfig( + videoCodec: VideoCodec.raw, + resolution: StreamingResolution.low, + frameRate: 24) + streamSession = StreamSession(streamSessionConfig: config, deviceSelector: deviceSelector) + + // Monitor device availability + deviceMonitorTask = Task { @MainActor in + for await device in deviceSelector.activeDeviceStream() { + self.hasActiveDevice = device != nil + self.reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: true) + } + } + + setupVideoDecoder() + attachListeners() + loadHistoryFromDefaults() + requestSpeechPermissionsIfNeeded() + observeWebRTCSession() + geminiAssistant.onInputCommand = { [weak self] transcript in + Task { @MainActor [weak self] in + self?.handleVoiceTranscript(transcript) + } + } + geminiAssistant.onInputAudioChunk = { [weak self] data in + self?.sopVideoRecorder?.appendInputAudio(data) + self?.conversationAudioRecorder?.appendInputAudio(data) + } + geminiAssistant.onOutputAudioChunk = { [weak self] data in + self?.sopVideoRecorder?.appendOutputAudio(data) + self?.conversationAudioRecorder?.appendOutputAudio(data) + } + iPhoneAnalysisLane.onFrameReady = { [weak self] frame, completion in + Task { @MainActor [weak self] in + guard let self else { + completion() + return + } + self.handleIPhoneAnalysisFrame(frame) + completion() + } + } + } + + private func setupVideoDecoder() { + let imageRenderer = streamImageRenderer + videoDecodeLane.setFrameCallback { [weak self, imageRenderer] decodedFrame in + Task { @MainActor [weak self] in + guard let self else { return } + let pixelBuffer = decodedFrame.pixelBuffer + let width = CVPixelBufferGetWidth(pixelBuffer) + let height = CVPixelBufferGetHeight(pixelBuffer) + let timeStampNs = decodedFrame.presentationTimeStamp.isValid + ? Int64(CMTimeGetSeconds(decodedFrame.presentationTimeStamp) * 1_000_000_000) + : VideoFrameBufferFactory.currentTimestampNs() + if self.webrtcViewModel.isActive { + self.webrtcViewModel.realtimeVideoForwarder.enqueuePixelBuffer( + pixelBuffer, + timeStampNs: timeStampNs + ) + } + let shouldRecordAudit = self.isSopAuditRunning + if shouldRecordAudit { + self.sopVideoRecorder?.appendPixelBuffer(pixelBuffer) + } + + guard self.shouldQueueGlassesAnalysisFrame(now: CACurrentMediaTime()) else { return } + let sendablePixelBuffer = SendableStreamPixelBuffer(pixelBuffer: pixelBuffer) + self.liveFrameProcessingQueue.async { [weak self] in + guard let image = imageRenderer.makeUIImage(from: sendablePixelBuffer.pixelBuffer) else { return } + Task { @MainActor [weak self] in + guard let self else { return } + self.handleAnalysisImageFrame(image, shouldRecordAudit: shouldRecordAudit) + if self.backgroundFrameCount <= 5 || self.backgroundFrameCount % 120 == 0 { + NSLog("[Stream] Background frame #%d decoded and forwarded (%dx%d)", + self.backgroundFrameCount, width, height) + } + } + } + } + } + } + + /// Recreate the StreamSession with the current selectedResolution. + /// Only call when not actively streaming. + func updateResolution(_ resolution: StreamingResolution) { + guard !isStreaming else { return } + selectedResolution = resolution + let config = StreamSessionConfig( + videoCodec: VideoCodec.raw, + resolution: resolution, + frameRate: 24) + streamSession = StreamSession(streamSessionConfig: config, deviceSelector: deviceSelector) + attachListeners() + NSLog("[Stream] Resolution changed to %@", resolutionLabel) + } + + private func attachListeners() { + let realtimeVideoForwarder = webrtcViewModel.realtimeVideoForwarder + + // Subscribe to session state changes using the DAT SDK listener pattern + stateListenerToken = streamSession.statePublisher.listen { [weak self] state in + Task { @MainActor [weak self] in + self?.updateStatusFromState(state) + } + } + + // Subscribe to video frames from the device camera + // This callback fires whether the app is in the foreground or background, + // enabling continuous streaming even when the screen is locked. + videoFrameListenerToken = streamSession.videoFramePublisher.listen { [weak self] videoFrame in + Task { @MainActor [weak self] in + guard let self else { return } + let shouldForwardToWebRTC = self.webrtcViewModel.isActive + let shouldRecordAudit = self.isSopAuditRunning + + let isInBackground = UIApplication.shared.applicationState == .background + + if !isInBackground { + self.backgroundFrameCount = 0 + self.bgDiagLogged = false + + let sampleBuffer = videoFrame.sampleBuffer + let timeStampNs = Self.rtcTimestampNs(from: sampleBuffer) + if let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + if shouldForwardToWebRTC { + realtimeVideoForwarder.enqueuePixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } + if shouldRecordAudit { + self.sopVideoRecorder?.appendPixelBuffer(pixelBuffer) + } + guard self.shouldQueueGlassesAnalysisFrame(now: CACurrentMediaTime()) else { return } + let sendablePixelBuffer = SendableStreamPixelBuffer(pixelBuffer: pixelBuffer) + let imageRenderer = self.streamImageRenderer + self.liveFrameProcessingQueue.async { [weak self] in + guard let image = imageRenderer.makeUIImage(from: sendablePixelBuffer.pixelBuffer) else { return } + Task { @MainActor [weak self] in + self?.handleAnalysisImageFrame(image, shouldRecordAudit: shouldRecordAudit) + } + } + } else if CMSampleBufferGetDataBuffer(sampleBuffer) != nil { + let decodeLane = self.videoDecodeLane + decodeLane.decode(sampleBuffer) { errorMessage in + NSLog("[Stream] Foreground decode error: %@", errorMessage) + } + } else { + guard self.shouldQueueGlassesAnalysisFrame(now: CACurrentMediaTime()) else { return } + self.liveFrameProcessingQueue.async { [weak self] in + guard let self else { return } + guard let image = videoFrame.makeUIImage() else { return } + let timeStampNs = VideoFrameBufferFactory.currentTimestampNs() + + if shouldForwardToWebRTC { + realtimeVideoForwarder.enqueueImage(image) + } + + Task { @MainActor [weak self] in + self?.handleProcessedLiveFrame( + image: image, + pixelBuffer: nil, + timeStampNs: timeStampNs, + shouldForwardToWebRTC: false, + shouldRecordAudit: shouldRecordAudit + ) + } + } + } + } else { + // In background: makeUIImage() uses VideoToolbox GPU rendering which iOS suspends. + // Instead, use our VideoDecoder (VTDecompressionSession) to decode compressed + // frames into pixel buffers, then convert via CPU CIContext. + self.backgroundFrameCount += 1 + + let sampleBuffer = videoFrame.sampleBuffer + let hasCompressedData = CMSampleBufferGetDataBuffer(sampleBuffer) != nil + + if hasCompressedData { + // Compressed frame (HEVC/H.264) - decode via VTDecompressionSession + let decodeLane = self.videoDecodeLane + let frameCount = self.backgroundFrameCount + decodeLane.decode(sampleBuffer) { errorMessage in + if frameCount <= 5 || frameCount % 120 == 0 { + NSLog("[Stream] Background frame #%d decode error: %@", + frameCount, errorMessage) + } + } + } else if let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + // Raw pixel buffer - convert directly via CPU CIContext + let timeStampNs = Self.rtcTimestampNs(from: sampleBuffer) + if shouldForwardToWebRTC { + realtimeVideoForwarder.enqueuePixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } + if shouldRecordAudit { + self.sopVideoRecorder?.appendPixelBuffer(pixelBuffer) + } + if self.shouldQueueGlassesAnalysisFrame(now: CACurrentMediaTime()) { + let sendablePixelBuffer = SendableStreamPixelBuffer(pixelBuffer: pixelBuffer) + let imageRenderer = self.streamImageRenderer + self.liveFrameProcessingQueue.async { [weak self] in + guard let image = imageRenderer.makeUIImage(from: sendablePixelBuffer.pixelBuffer) else { return } + Task { @MainActor [weak self] in + self?.handleAnalysisImageFrame(image, shouldRecordAudit: shouldRecordAudit) + } + } + } + self.videoDecodeLane.invalidateSession() + } + } + } + } + + // Subscribe to streaming errors + errorListenerToken = streamSession.errorPublisher.listen { [weak self] error in + Task { @MainActor [weak self] in + guard let self else { return } + // Suppress device-not-found errors when user hasn't started streaming yet + if self.streamingStatus == .stopped { + if case .deviceNotConnected = error { return } + if case .deviceNotFound = error { return } + } + let newErrorMessage = formatStreamingError(error) + if newErrorMessage != self.errorMessage { + showError(newErrorMessage) + } + } + } + + updateStatusFromState(streamSession.state) + + // Subscribe to photo capture events + photoDataListenerToken = streamSession.photoDataPublisher.listen { [weak self] photoData in + Task { @MainActor [weak self] in + guard let self else { return } + if let uiImage = UIImage(data: photoData.data) { + self.capturedPhoto = uiImage + self.showPhotoPreview = true + } + } + } + } + + func handleStartStreaming() async { + let permission = Permission.camera + do { + let status = try await wearables.checkPermissionStatus(permission) + if status == .granted { + await startSession() + return + } + let requestStatus = try await wearables.requestPermission(permission) + if requestStatus == .granted { + await startSession() + return + } + showError("Permission denied") + } catch { + showError("Permission error: \(error.description)") + } + } + + func startSession() async { + geminiAssistant.streamingMode = streamingMode + await streamSession.start() + } + + private func showError(_ message: String) { + errorMessage = message + showError = true + } + + private func handleProcessedLiveFrame( + image: UIImage, + pixelBuffer: CVPixelBuffer?, + timeStampNs: Int64, + shouldForwardToWebRTC: Bool, + shouldRecordAudit: Bool + ) { + if shouldForwardToWebRTC { + if let pixelBuffer { + webrtcViewModel.pushVideoPixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } else { + webrtcViewModel.pushVideoFrame(image) + } + } + + handleAnalysisImageFrame(image, shouldRecordAudit: shouldRecordAudit) + + if shouldRecordAudit { + if let pixelBuffer { + sopVideoRecorder?.appendPixelBuffer(pixelBuffer) + } else { + sopVideoRecorder?.appendFrame(image) + } + } + } + + private func handleAnalysisImageFrame(_ image: UIImage, shouldRecordAudit: Bool) { + currentVideoFrame = image + if !hasReceivedFirstFrame { + hasReceivedFirstFrame = true + } + + geminiAssistant.sendVideoFrameIfThrottled(image: image) + + if shouldRecordAudit { + Task { await syncLivePreviewFrameIfNeeded(image: image) } + } + } + + private func shouldQueueGlassesAnalysisFrame(now: CFTimeInterval) -> Bool { + let interval: CFTimeInterval = webrtcViewModel.isUnderLiveVideoPressure ? 0.2 : 0.1 + if !hasReceivedFirstFrame || now - lastGlassesAnalysisFrameQueuedAt >= interval { + lastGlassesAnalysisFrameQueuedAt = now + return true + } + return false + } + + private func enqueueIPhoneAnalysisFrame(_ image: UIImage, shouldRecordAudit: Bool) { + iPhoneAnalysisLane.submit(image, shouldRecordAudit: shouldRecordAudit) + } + + private func handleIPhoneAnalysisFrame(_ frame: IPhoneAnalysisFrameEnvelope) { + handleAnalysisImageFrame(frame.image, shouldRecordAudit: frame.shouldRecordAudit) + } + + private func resetIPhoneAnalysisLane() { + iPhoneAnalysisLane.reset() + } + + private static func rtcTimestampNs(from sampleBuffer: CMSampleBuffer) -> Int64 { + let presentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) + if presentationTime.isValid { + return Int64(CMTimeGetSeconds(presentationTime) * 1_000_000_000) + } + return VideoFrameBufferFactory.currentTimestampNs() + } + + func stopSession() async { + await geminiAssistant.stopSession() + isAiGuideStarting = false + isStepValidationRunning = false + aiGuideStatusMessage = "" + if isSopAuditRunning { + await endAndShip(status: .userEnded) + } else { + await workerAdminSync?.stop() + workerAdminSync = nil + resetLiveRoomSyncSnapshot() + } + + if webrtcViewModel.isActive { + webrtcViewModel.stopSession() + } + activeCaptureSOP = nil + + if streamingMode == .iPhone { + stopIPhoneSession() + return + } + await streamSession.stop() + } + + func toggleGeminiAssistant() async { + guard canToggleAiGuide else { return } + if geminiAssistant.isGeminiActive { + await geminiAssistant.stopSession() + aiGuideStatusMessage = "AI guide paused." + sopAuditStatusMessage = aiGuideStatusMessage + return + } + + await startGeminiAssistant( + startingMessage: "Loading checklist guide...", + listeningMessage: "AI guide listening. Say \"I'm done\" when you want me to check this step." + ) + } + + @discardableResult + private func startGeminiAssistant( + startingMessage: String, + listeningMessage: String + ) async -> Bool { + guard isSopAuditRunning, !isAiGuideStarting, !hasActiveHelpEscalation else { return false } + geminiAssistant.streamingMode = streamingMode + geminiAssistant.configureWorkerAdminAPI( + opsAPIClient, + sessionID: activeExecutionSession?.id + ) + + isAiGuideStarting = true + aiGuideStatusMessage = startingMessage + sopAuditStatusMessage = aiGuideStatusMessage + await geminiAssistant.startSession(systemInstruction: buildGeminiSessionInstruction()) + isAiGuideStarting = false + if let errorMessage = geminiAssistant.errorMessage, !errorMessage.isEmpty { + sopAuditStatusMessage = errorMessage + aiGuideStatusMessage = errorMessage + await postExecutionEvent( + type: "ai_guide_failed", + payload: [ + "error": errorMessage, + "capture_mode": selectedCaptureModeLabel.lowercased() + ] + ) + return false + } else if geminiAssistant.isGeminiActive { + aiGuideStatusMessage = listeningMessage + sopAuditStatusMessage = aiGuideStatusMessage + await postExecutionEvent( + type: "ai_guide_started", + payload: [ + "capture_mode": selectedCaptureModeLabel.lowercased() + ] + ) + return true + } + return false + } + + private func autoStartAiGuideWhenCaptureIsReady() async { + await ensureAiGuideStarted(reason: "capture_ready") + } + + private func waitForMediaReadyBeforeAiStart(reason: String) async -> Bool { + guard streamingMode == .iPhone else { return true } + guard let camera = iPhoneCameraManager else { + await recordAiGuideMediaReadyTimeout(reason: reason, cameraReady: false) + return false + } + + aiGuideStatusMessage = "Waiting for phone camera and mic..." + sopAuditStatusMessage = aiGuideStatusMessage + + let timeout: CFTimeInterval = 3.0 + let deadline = CACurrentMediaTime() + timeout + var cameraReady = false + + while CACurrentMediaTime() < deadline { + guard isSopAuditRunning, !hasActiveHelpEscalation else { return false } + let remaining = max(0, deadline - CACurrentMediaTime()) + cameraReady = await camera.waitUntilRunningAndAudioConfigured( + timeout: min(0.25, remaining) + ) + if cameraReady && hasReceivedFirstFrame { + try? await Task.sleep(nanoseconds: 150_000_000) + return true + } + try? await Task.sleep(nanoseconds: 100_000_000) + } + + await recordAiGuideMediaReadyTimeout(reason: reason, cameraReady: cameraReady) + return false + } + + private func recordAiGuideMediaReadyTimeout(reason: String, cameraReady: Bool) async { + let message = "Phone camera/mic still warming up. Tap Start AI to retry." + aiGuideStatusMessage = message + sopAuditStatusMessage = message + await WorkerTelemetry.shared.record( + "ai_guide_media_ready_timeout", + source: "gemini_live", + stage: "timeout", + sessionID: currentSopSessionId, + payload: [ + "reason": reason, + "camera_ready": cameraReady, + "has_first_frame": hasReceivedFirstFrame, + "capture_mode": captureModeEventValue(streamingMode) + ] + ) + await postExecutionEvent( + type: "ai_guide_media_ready_timeout", + payload: [ + "reason": reason, + "camera_ready": cameraReady, + "has_first_frame": hasReceivedFirstFrame, + "capture_mode": captureModeEventValue(streamingMode) + ] + ) + } + + @discardableResult + private func ensureAiGuideStarted( + reason: String, + maxAttempts: Int = 3 + ) async -> Bool { + guard isSopAuditRunning, !hasActiveHelpEscalation else { return false } + if geminiAssistant.isGeminiActive { + return true + } + guard !isAiGuideStarting else { return false } + guard await waitForMediaReadyBeforeAiStart(reason: reason) else { return false } + + aiGuideStatusMessage = "Connecting AI voice..." + sopAuditStatusMessage = aiGuideStatusMessage + + for attempt in 1...maxAttempts { + guard isSopAuditRunning, !hasActiveHelpEscalation else { return false } + await WorkerTelemetry.shared.record( + "ai_guide_autostart_attempt", + source: "gemini_live", + stage: "attempt", + sessionID: currentSopSessionId, + payload: [ + "reason": reason, + "attempt": attempt, + "max_attempts": maxAttempts, + "has_first_frame": hasReceivedFirstFrame, + "capture_mode": captureModeEventValue(streamingMode) + ] + ) + await postExecutionEvent( + type: "ai_guide_autostart_attempt", + payload: [ + "reason": reason, + "attempt": attempt, + "has_first_frame": hasReceivedFirstFrame, + "capture_mode": captureModeEventValue(streamingMode) + ] + ) + + let started = await startGeminiAssistant( + startingMessage: attempt == 1 ? "Connecting AI voice..." : "Retrying AI voice...", + listeningMessage: "AI guide listening. Say \"I'm done\" or \"next step\" when you finish a step." + ) + if started { + await WorkerTelemetry.shared.record( + "ai_guide_autostart_ready", + source: "gemini_live", + stage: "ready", + sessionID: currentSopSessionId, + payload: [ + "reason": reason, + "attempt": attempt, + "has_first_frame": hasReceivedFirstFrame + ] + ) + return true + } + + guard attempt < maxAttempts else { break } + try? await Task.sleep(nanoseconds: UInt64(attempt) * 850_000_000) + } + + await WorkerTelemetry.shared.record( + "ai_guide_autostart_failed", + source: "gemini_live", + stage: "failed", + sessionID: currentSopSessionId, + payload: [ + "reason": reason, + "error": geminiAssistant.errorMessage ?? NSNull() + ] + ) + return false + } + + func beginLiveCapture(for sop: SOPTemplate) async { + selectedSOP = sop + activeCaptureSOP = sop + configureChecklist(for: sop) + showShipSuccessToast = false + shouldDismissCapture = false + sopAuditStatusMessage = "Loading checklist guide..." + aiGuideStatusMessage = sopAuditStatusMessage + isAiGuideStarting = false + isStepValidationRunning = false + helpStatusMessage = "" + + if !hasLoadedWorkerContext { + await loadWorkerContextIfNeeded() + } + + if !isStreaming { + await startPreferredCamera() + } + + guard isStreaming else { return } + + await startSopAudit(for: sop) + if isSopAuditRunning && !geminiAssistant.isGeminiActive { + await ensureAiGuideStarted(reason: "begin_live_capture") + } + } + + func selectCaptureMode(_ mode: StreamingMode) { + guard mode != .glasses || hasActiveDevice else { + sopAuditStatusMessage = "Meta camera not connected." + return + } + preferredCaptureMode = mode + if webrtcViewModel.isActive && webrtcViewModel.isSupportMode { + do { + if let routeWarning = try configureWorkerAudioRoute(for: mode, reason: .viewer) { + helpStatusMessage = routeWarning + } + } catch { + helpStatusMessage = "Audio route update failed: \(error.localizedDescription)" + } + } + } + + func selectCaptureModeFromUI(_ mode: StreamingMode) { + selectCaptureMode(mode) + Task { @MainActor [weak self] in + await self?.switchToPreferredCaptureModeIfNeeded() + } + } + + func startCurrentAssignmentFromHome() { + reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: false) + guard let sop = currentAssignedSOP else { + if operationsSyncError == nil { + setCriticalOperationsSyncIssue( + phase: "assignment", + message: "No active SOP assignment is available for this worker." + ) + } + return + } + + selectedSOP = sop + activeCaptureSOP = sop + shouldDismissCapture = false + } + + func presentCapture(for sop: SOPTemplate) { + selectedSOP = sop + activeCaptureSOP = sop + shouldDismissCapture = false + } + + func handleWorkerHomeEntered() async { + if !hasLoadedWorkerContext { + await loadWorkerContextIfNeeded() + } + reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: false) + await startHomeCameraPreviewIfNeeded() + + guard !hasEnteredWorkerHome else { return } + hasEnteredWorkerHome = true + await resetDemoShiftForHomeIfNeeded(reloadAssignments: false) + } + + func handleWorkerAppBecameActive() async { + guard hasEnteredWorkerHome else { return } + guard !isSopAuditRunning, activeCaptureSOP == nil else { return } + await resetDemoShiftForHomeIfNeeded(reloadAssignments: true) + await startHomeCameraPreviewIfNeeded() + } + + func restoreActiveCaptureIfNeeded() { + guard activeCaptureSOP == nil else { return } + guard isSopAuditRunning else { return } + if let activeSOP = selectedSOP ?? currentAssignedSOP { + activeCaptureSOP = activeSOP + } + } + + func switchToPreferredCaptureModeIfNeeded() async { + guard canSwitchCaptureMode else { return } + + if preferredCaptureMode == .glasses, !hasActiveDevice { + sopAuditStatusMessage = "Meta camera not connected." + return + } + + if isStreaming && streamingMode == preferredCaptureMode { + return + } + + let previousMode = streamingMode + isSwitchingCaptureMode = true + defer { isSwitchingCaptureMode = false } + + await stopCurrentCameraTransportOnly() + await startPreferredCamera() + + if isStreaming, streamingMode == preferredCaptureMode, previousMode != streamingMode { + geminiAssistant.streamingMode = streamingMode + if isSopAuditRunning { + await postExecutionEvent( + type: "capture_mode_switched", + payload: [ + "from": captureModeEventValue(previousMode), + "to": captureModeEventValue(streamingMode), + "label": selectedCaptureModeLabel + ] + ) + } + } + } + + func loadWorkerContextIfNeeded() async { + guard !hasLoadedWorkerContext else { return } + await refreshWorkerContext() + } + + func refreshWorkerContext() async { + guard GeminiConfig.isOpsConfigured else { + setCriticalOperationsSyncIssue( + phase: "bootstrap", + message: "Set the ops-api URL in Settings to load assignments." + ) + return + } + + let loginCode = GeminiConfig.workerLoginCode.trimmingCharacters(in: .whitespacesAndNewlines) + let workerEmail = GeminiConfig.workerEmail.trimmingCharacters(in: .whitespacesAndNewlines) + guard !loginCode.isEmpty || !workerEmail.isEmpty else { + setCriticalOperationsSyncIssue( + phase: "bootstrap", + message: "Set a worker email or login code in Settings to bootstrap assignments." + ) + return + } + + isSyncingOperations = true + clearOperationsSyncState() + packageClosureStatusMessage = "" + defer { isSyncingOperations = false } + + do { + let payload = try await opsAPIClient.bootstrap( + loginCode: loginCode.isEmpty ? nil : loginCode, + email: workerEmail.isEmpty ? nil : workerEmail, + platform: "ios", + label: UIDevice.current.name + ) + + workerProfile = payload.worker + registeredDevice = payload.device + activeShift = payload.shift + let canonicalLucasTemplates = lucasDemoQueueTemplates() + let resolvedQueue = canonicalLucasQueue(from: payload.queue) + assignedPackages = + payload.assignedPackages.isEmpty + ? deriveAssignedPackages(from: resolvedQueue, canonicalTemplates: canonicalLucasTemplates) + : payload.assignedPackages + availableSOPs = resolvedQueue.map { hydrateQueueItem($0, canonicalTemplates: canonicalLucasTemplates) } + if selectedSOP == nil || !pendingTaskSOPs.contains(selectedSOP!) { + selectedSOP = pendingTaskSOPs.first + } + reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: false) + hasLoadedWorkerContext = true + + if availableSOPs.isEmpty { + if isDemoWorkerMode { + applyLucasDemoWorkerFallback(reason: "No remote SOPs were assigned yet. Using the local Lucas demo queue.") + } else { + setCriticalOperationsSyncIssue( + phase: "bootstrap", + message: "No SOPs assigned to this worker yet." + ) + } + } + + await recoverPendingWorkerRecordingIfNeeded() + } catch { + if isDemoWorkerMode { + applyLucasDemoWorkerFallback( + reason: "Assignment sync failed: \(error.localizedDescription). Using the local Lucas demo queue." + ) + hasLoadedWorkerContext = true + } else { + setCriticalOperationsSyncIssue( + phase: "bootstrap", + message: "Assignment sync failed: \(error.localizedDescription)" + ) + } + } + } + + private func canonicalLucasQueue(from queue: [WorkerQueueItem]) -> [WorkerQueueItem] { + var seen = Set() + return queue + .sorted { lhs, rhs in + let leftOrder = lhs.sortOrder == 0 ? lucasCanonicalOrder(for: lhs.sopID) : lhs.sortOrder + let rightOrder = rhs.sortOrder == 0 ? lucasCanonicalOrder(for: rhs.sopID) : rhs.sortOrder + if leftOrder == rightOrder { + return lhs.sopTitle < rhs.sopTitle + } + return leftOrder < rightOrder + } + .filter { item in + let key = item.sopID.lowercased() + guard !seen.contains(key) else { return false } + seen.insert(key) + return true + } + } + + private func hydrateQueueItem( + _ queueItem: WorkerQueueItem, + canonicalTemplates: [SOPTemplate] + ) -> SOPTemplate { + let canonical = canonicalTemplates.first { template in + template.name.caseInsensitiveCompare(queueItem.sopTitle) == .orderedSame + } + + let stepTemplates: [SOPStepTemplate] + if queueItem.steps.isEmpty { + stepTemplates = canonical?.steps ?? [] + } else { + stepTemplates = queueItem.steps.enumerated().map { index, step in + SOPStepTemplate( + id: step.id, + order: step.order == 0 ? index + 1 : step.order, + title: step.title, + description: step.description, + duration: step.duration, + validation: step.validation, + critical: step.critical, + aiPrompt: step.aiPrompt, + expectedObjects: step.expectedObjects, + preconditions: step.preconditions, + postconditions: step.postconditions, + skipRisk: step.skipRisk, + evidenceRequired: step.evidenceRequired, + allowManualComplete: step.allowManualComplete + ) + } + } + + let resolvedSortOrder = canonical?.sortOrder ?? (queueItem.sortOrder == 0 ? lucasCanonicalOrder(for: queueItem.sopID) : queueItem.sortOrder) + let resolvedShiftName = queueItem.shiftName ?? canonical?.shiftName ?? "Morning" + let resolvedPackageTitle = queueItem.packageTitle ?? canonical?.packageTitle + let resolvedPackageVersion = queueItem.packageVersion ?? canonical?.packageVersion + let resolvedSopVersion = queueItem.sopVersion ?? canonical?.sopVersion + let resolvedSourceType = + queueItem.packageID == nil && canonical?.packageID != nil + ? canonical?.sourceType ?? queueItem.sourceType + : queueItem.sourceType + + return SOPTemplate( + id: UUID(uuidString: queueItem.sopID) ?? canonical?.id ?? UUID(), + remoteID: queueItem.sopID, + name: canonical?.name ?? queueItem.sopTitle, + steps: stepTemplates, + estimatedDuration: canonical?.estimatedDuration ?? max(Double(max(stepTemplates.count, 1)) * 18.0, 18.0), + shiftID: validRemoteUUID(queueItem.shiftAssignmentID), + shiftName: resolvedShiftName, + packageID: queueItem.packageID ?? canonical?.packageID, + packageRunID: queueItem.packageRunID ?? canonical?.packageRunID, + packageTitle: resolvedPackageTitle, + packageVersion: resolvedPackageVersion, + sopVersion: resolvedSopVersion, + sourceType: resolvedSourceType, + sortOrder: resolvedSortOrder, + required: queueItem.required + ) + } + + private func deriveAssignedPackages( + from queue: [WorkerQueueItem], + canonicalTemplates: [SOPTemplate] + ) -> [BackendAssignedPackage] { + var resolved: [String: BackendAssignedPackage] = [:] + + for item in queue { + guard let packageID = item.packageID else { continue } + let matchingTemplate = canonicalTemplates.first { template in + template.name.caseInsensitiveCompare(item.sopTitle) == .orderedSame + } + resolved[packageID] = BackendAssignedPackage( + id: packageID, + title: item.packageTitle ?? matchingTemplate?.packageTitle ?? "Assigned Package", + description: nil, + outcome: nil, + version: item.packageVersion ?? matchingTemplate?.packageVersion, + shiftName: item.shiftName ?? matchingTemplate?.shiftName ?? "Morning", + active: item.active ?? true, + packageRunID: item.packageRunID, + packageRunStatus: nil, + packageRunStartedAt: item.startsAt, + packageRunCompletedAt: item.endsAt + ) + } + + return resolved.values.sorted { lhs, rhs in + let leftOrder = lucasCanonicalPackageOrder(for: lhs.title) + let rightOrder = lucasCanonicalPackageOrder(for: rhs.title) + if leftOrder == rightOrder { + return lhs.title < rhs.title + } + return leftOrder < rightOrder + } + } + + private func lucasCanonicalOrder(for sopID: String) -> Int { + switch sopID { + case "22222222-2222-2222-2222-222222222222": + return 1 + case "a1000001-0000-0000-0000-000000000001": + return 2 + case "a1000002-0000-0000-0000-000000000002": + return 3 + case "a1000003-0000-0000-0000-000000000003": + return 4 + default: + return 99 + } + } + + private func lucasCanonicalPackageOrder(for packageTitle: String) -> Int { + switch packageTitle { + case "Inbound Cold Chain Audit": + return 1 + case "QSR Value Meal Order": + return 2 + default: + return 99 + } + } + + private func applyLucasDemoWorkerFallback(reason: String) { + workerProfile = BackendWorker( + id: "worker-lucas", + loginCode: "EMBC-0001", + displayName: "Lucas Pereira", + role: "Kitchen Staff", + status: "active" + ) + + let inboundPackage = BackendAssignedPackage( + id: "33333333-3333-3333-3333-333333333333", + title: "Inbound Cold Chain Audit", + description: "Verify cold-chain compliance for inbound product before storing.", + outcome: "Cold Chain Verified", + version: 2, + shiftName: "Morning", + active: true, + packageRunID: nil, + packageRunStatus: nil, + packageRunStartedAt: nil, + packageRunCompletedAt: nil + ) + + let mealPackage = BackendAssignedPackage( + id: "b2000001-0000-0000-0000-000000000001", + title: "QSR Value Meal Order", + description: "Standard meal execution from assembly to drink handoff.", + outcome: "Order Fulfilled", + version: 2, + shiftName: "Morning", + active: true, + packageRunID: nil, + packageRunStatus: nil, + packageRunStartedAt: nil, + packageRunCompletedAt: nil + ) + + let shiftPackage = BackendPackage( + id: inboundPackage.id, + title: inboundPackage.title, + description: inboundPackage.description, + outcome: inboundPackage.outcome, + version: inboundPackage.version, + status: "active" + ) + + activeShift = BackendShift( + id: "shift-lucas-morning", + packageID: inboundPackage.id, + shiftName: "Morning", + startsAt: nil, + endsAt: nil, + active: true, + package: shiftPackage + ) + + assignedPackages = [inboundPackage, mealPackage] + availableSOPs = lucasDemoQueueTemplates() + selectedSOP = pendingTaskSOPs.first + setCriticalOperationsSyncIssue(phase: "bootstrap", message: reason) + } + + private func lucasDemoQueueTemplates() -> [SOPTemplate] { + [ + SOPTemplate( + remoteID: "22222222-2222-2222-2222-222222222222", + name: "Cold Chain Verification SOP", + steps: [ + SOPStepTemplate( + id: "inspect_packaging_seal", + order: 1, + title: "Inspect packaging seal", + description: "Check the inbound package seal before accepting the delivery.", + duration: "30s", + validation: "visual", + critical: true, + aiPrompt: "Look at the image and confirm whether the operator inspected the package seal before accepting the delivery.", + expectedObjects: ["seal", "package"], + allowManualComplete: true + ), + SOPStepTemplate( + id: "record_temperature_log", + order: 2, + title: "Record temperature log", + description: "Read the temperature and confirm it is entered into the log.", + duration: "30s", + validation: "visual", + critical: false, + aiPrompt: "Look at the image and confirm whether the operator recorded the product temperature in the log.", + expectedObjects: ["thermometer", "clipboard"], + allowManualComplete: true + ), + SOPStepTemplate( + id: "verify_lot_number", + order: 3, + title: "Verify lot number", + description: "Confirm the lot number is visible and matches the manifest.", + duration: "30s", + validation: "visual", + critical: false, + aiPrompt: "Look at the image and confirm whether the operator verified the lot number on the inbound package.", + expectedObjects: ["label", "lot"], + allowManualComplete: true + ), + SOPStepTemplate( + id: "sign_off", + order: 4, + title: "Sign off", + description: "Acknowledge the cold-chain verification and release storage.", + duration: "30s", + validation: "tap", + critical: false, + aiPrompt: "Look at the image and confirm whether the cold-chain verification was signed off.", + expectedObjects: ["clipboard", "signature"], + allowManualComplete: true + ), + ], + estimatedDuration: 72, + shiftID: nil, + shiftName: "Morning", + packageID: "33333333-3333-3333-3333-333333333333", + packageTitle: "Inbound Cold Chain Audit", + packageVersion: 2, + sopVersion: 1, + sourceType: "package", + sortOrder: 1, + required: true + ), + SOPTemplate( + remoteID: "a1000001-0000-0000-0000-000000000001", + name: "Burger Assembly", + steps: [ + SOPStepTemplate(id: "toast_the_bun", order: 1, title: "Toast the bun", description: "Place bun halves on the grill until golden.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the bun has been toasted.", expectedObjects: ["bun"], allowManualComplete: true), + SOPStepTemplate(id: "place_patty_on_grill", order: 2, title: "Place patty on grill", description: "Place the patty on the grill and season as needed.", duration: "30s", validation: "visual", critical: true, aiPrompt: "Look at the image and confirm whether the patty was placed on the grill.", expectedObjects: ["patty", "grill"], allowManualComplete: true), + SOPStepTemplate(id: "add_cheese_slice", order: 3, title: "Add cheese slice", description: "Place cheese slice on the patty before removal.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether a cheese slice was added to the patty.", expectedObjects: ["cheese", "patty"], allowManualComplete: true), + SOPStepTemplate(id: "apply_condiments", order: 4, title: "Apply condiments", description: "Apply standard condiments to the bottom bun.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether condiments were applied to the bun.", expectedObjects: ["bun", "condiments"], allowManualComplete: true), + SOPStepTemplate(id: "stack_ingredients", order: 5, title: "Stack ingredients", description: "Assemble ingredients in the correct order.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the burger ingredients were stacked in the correct order.", expectedObjects: ["bun", "patty", "lettuce"], allowManualComplete: true), + SOPStepTemplate(id: "quality_check", order: 6, title: "Quality check", description: "Confirm finished burger matches the reference build.", duration: "30s", validation: "visual", critical: true, aiPrompt: "Look at the image and confirm whether the finished burger matches the reference build.", expectedObjects: ["burger"], allowManualComplete: true), + ], + estimatedDuration: 108, + shiftID: nil, + shiftName: "Morning", + packageID: "b2000001-0000-0000-0000-000000000001", + packageTitle: "QSR Value Meal Order", + packageVersion: 2, + sopVersion: 2, + sourceType: "package", + sortOrder: 2, + required: true + ), + SOPTemplate( + remoteID: "a1000002-0000-0000-0000-000000000002", + name: "Fries Assembly", + steps: [ + SOPStepTemplate(id: "load_fry_basket", order: 1, title: "Load fry basket", description: "Fill the basket to the correct portion.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the fry basket was loaded to the correct portion.", expectedObjects: ["basket", "fries"], allowManualComplete: true), + SOPStepTemplate(id: "cook_fries", order: 2, title: "Cook fries", description: "Start the fryer and monitor the timer.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the fries are cooking in the fryer.", expectedObjects: ["fryer", "basket"], allowManualComplete: true), + SOPStepTemplate(id: "drain_and_salt", order: 3, title: "Drain and salt", description: "Drain basket and season fries.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the fries were drained and salted.", expectedObjects: ["fries", "salt"], allowManualComplete: true), + SOPStepTemplate(id: "bag_fries", order: 4, title: "Bag fries", description: "Transfer fries into the correct serving container.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the fries were transferred into the serving container.", expectedObjects: ["fries", "container"], allowManualComplete: true), + ], + estimatedDuration: 72, + shiftID: nil, + shiftName: "Morning", + packageID: "b2000001-0000-0000-0000-000000000001", + packageTitle: "QSR Value Meal Order", + packageVersion: 2, + sopVersion: 2, + sourceType: "package", + sortOrder: 3, + required: true + ), + SOPTemplate( + remoteID: "a1000003-0000-0000-0000-000000000003", + name: "Drink Prep", + steps: [ + SOPStepTemplate(id: "select_cup_size", order: 1, title: "Select cup size", description: "Choose the cup size that matches the ticket.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the correct cup size was selected.", expectedObjects: ["cup"], allowManualComplete: true), + SOPStepTemplate(id: "fill_beverage", order: 2, title: "Fill beverage", description: "Dispense the beverage to the marked fill line.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the beverage was filled to the marked line.", expectedObjects: ["cup", "drink"], allowManualComplete: true), + SOPStepTemplate(id: "add_lid_and_straw", order: 3, title: "Add lid and straw", description: "Seal the cup and attach the straw.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the lid and straw were added to the drink.", expectedObjects: ["lid", "straw"], allowManualComplete: true), + SOPStepTemplate(id: "stage_for_pickup", order: 4, title: "Stage for pickup", description: "Place the drink in the order hand-off zone.", duration: "30s", validation: "visual", critical: false, aiPrompt: "Look at the image and confirm whether the drink was staged for pickup.", expectedObjects: ["cup", "handoff"], allowManualComplete: true), + ], + estimatedDuration: 72, + shiftID: nil, + shiftName: "Morning", + packageID: "b2000001-0000-0000-0000-000000000001", + packageTitle: "QSR Value Meal Order", + packageVersion: 2, + sopVersion: 1, + sourceType: "package", + sortOrder: 4, + required: true + ), + ] + } + + func startSopAudit(for sop: SOPTemplate) async { + guard !isSopAuditRunning else { return } + + let sessionId = await createOrFallbackSessionID(for: sop) + await workerAdminSync?.stop() + workerAdminSync = WorkerAdminLiveSessionCoordinator( + api: opsAPIClient, + telemetry: WorkerTelemetry.shared, + onHeartbeatResponse: { [weak self] response in + Task { @MainActor [weak self] in + await self?.handleWorkerLiveHeartbeatResponse(response) + } + }, + onHeartbeatFailure: { [weak self] sessionID, message in + Task { @MainActor [weak self] in + await self?.handleWorkerLiveHeartbeatFailure(sessionID: sessionID, message: message) + } + } + ) + geminiLiveSpotter.configure(api: opsAPIClient) + currentSopSessionId = sessionId + activeCaptureSOP = sop + isSopAuditRunning = true + sopAuditSecondsRemaining = sop.estimatedDuration + sopAuditStatusMessage = "" + aiGuideStatusMessage = "" + isAiGuideStarting = false + isStepValidationRunning = false + proofImagesByTargetID = [:] + lastLivePreviewSyncAt = .distantPast + resetLiveRoomSyncSnapshot() + lastGlassesAnalysisFrameQueuedAt = 0 + hasLoggedRoomCreatedForSession = false + hasLoggedRoomJoinedForSession = false + lastWorkerLiveHeartbeatFailureWarningAt = .distantPast + if streamingMode == .iPhone { + sopVideoRecorder = nil + conversationAudioRecorder = ConversationAudioRecorder(sessionID: sessionId) + } else { + conversationAudioRecorder = nil + sopVideoRecorder = SopVideoRecorder(sessionID: sessionId) + if let fileURL = sopVideoRecorder?.outputURL { + rememberPendingRecording(sessionID: sessionId, fileURL: fileURL) + } + } + isDossierUploading = false + dossierSpotterHitCount = 0 + updateDossierPipelineStatus("Recording execution...", kind: .info) + lastSpotterInferenceTime = .distantPast + isSpotterInferenceInFlight = false + isFinalizingAndShipping = false + lastProcessedTranscript = "" + helpStatusMessage = "" + hasActiveHelpEscalation = false + shouldResumeAiSupportAfterBackOffice = false + packageClosureStatusMessage = "" + clearOperationsSyncState() + + if webrtcViewModel.isActive { + webrtcViewModel.stopSession() + } + + WorkerLiveLogger.log( + "session_start", + sessionID: sessionId, + roomCode: nil, + uploadState: "active" + ) + await workerAdminSync?.start( + sessionID: sessionId, + currentStepIndex: nextIncompleteStepIndex(), + helpRequested: false, + roomCode: nil + ) + + if streamingMode == .iPhone { + iPhoneCameraManager?.startRecording(sessionID: sessionId) + rememberPendingRecording(sessionID: sessionId, fileURL: expectedIPhoneRecordingURL(for: sessionId)) + } + + if isSopAuditRunning && !geminiAssistant.isGeminiActive { + await ensureAiGuideStarted(reason: "sop_started") + } + + await ensureObservationLiveRoomSession() + + // No countdown/auto-timeout for long SOP runs. + sopCountdownTask?.cancel() + sopCountdownTask = nil + } + + func startSopAudit() { + let sop = selectedSOP ?? pendingTaskSOPs.first ?? availableSOPs.first ?? SOPTemplate(name: "Wallet & Thermos", items: ["Wallet", "Thermos"]) + selectedSOP = sop + configureChecklist(for: sop) + Task { await startSopAudit(for: sop) } + } + + func toggleChecklistItem(itemID: UUID, viaVoice: Bool) { + guard let index = checklistItems.firstIndex(where: { $0.id == itemID }) else { return } + if !checklistItems[index].allowManualComplete && !checklistItems[index].isChecked && !viaVoice { + sopAuditStatusMessage = "This step completes through visual AI only." + return + } + + checklistItems[index].isChecked.toggle() + checklistItems[index].completionSource = checklistItems[index].isChecked + ? (viaVoice ? .voice : .manual) + : .pending + + let item = checklistItems[index] + Task { + await handleChecklistMutation( + item: item, + stepIndex: index, + eventType: item.isChecked ? "step_complete" : "step_reopened" + ) + } + + if checklistItems.allSatisfy({ $0.isChecked }) { + Task { await endAndShip(status: .allItemsChecked) } + } + } + + func userTappedEndAndShip() { + Task { await endAndShip(status: .userEnded) } + } + + func requestSupervisorHelp() { + Task { await requestSupervisorHelpFlow() } + } + + func closeCurrentPackage() { + Task { await closeCurrentPackageFlow() } + } + + func closeSupervisorRoom() { + webrtcViewModel.stopSession() + helpStatusMessage = "Supervisor room closed." + hasActiveHelpEscalation = false + shouldResumeAiSupportAfterBackOffice = false + resetLiveRoomSyncSnapshot() + Task { @MainActor in + await workerAdminSync?.updateHelpRequested(false) + await patchActiveExecutionSession( + ExecutionSessionPatch( + helpRequested: false + ) + ) + await ensureObservationLiveRoomSession() + if isSopAuditRunning, !geminiAssistant.isGeminiActive { + await ensureAiGuideStarted(reason: "support_closed") + } + } + } + + func clearCaptureDismissFlag() { + shouldDismissCapture = false + } + + private func recordPackageProgressIfNeeded(for sop: SOPTemplate) { + guard sop.sourceType == "package" else { return } + guard let completionKey = sop.packageRunID ?? sop.packageID else { return } + guard let remoteSOPID = sop.remoteID else { return } + + var completed = locallyCompletedSopsByPackageKey[completionKey] ?? [] + completed.insert(remoteSOPID) + locallyCompletedSopsByPackageKey[completionKey] = completed + } + + private func markPendingTaskComplete(_ sop: SOPTemplate) { + locallyCompletedPendingTaskKeys.insert(pendingTaskKey(for: sop)) + } + + private func formatOperationsSyncMessage(phase: String, message: String) -> String { + "[\(phase)] \(message)" + } + + private func clearOperationsSyncState(clearWarning: Bool = true) { + operationsSyncError = nil + if clearWarning { + operationsSyncWarning = nil + } + } + + private func setCriticalOperationsSyncIssue(phase: String, message: String) { + operationsSyncError = formatOperationsSyncMessage(phase: phase, message: message) + } + + private func setOperationsSyncWarning(phase: String, message: String) { + operationsSyncWarning = formatOperationsSyncMessage(phase: phase, message: message) + } + + private func resetDemoShiftForHomeIfNeeded(reloadAssignments: Bool) async { + guard isDemoWorkerMode else { return } + guard !isSopAuditRunning, activeCaptureSOP == nil else { return } + + locallyCompletedPendingTaskKeys = [] + selectedSOP = nil + await syncGeminiSessionInstruction() + shouldDismissCapture = false + helpStatusMessage = "" + packageClosureStatusMessage = "" + + if reloadAssignments, hasLoadedWorkerContext { + await refreshWorkerContext() + } + } + + private func launchBackgroundMediaFinalization( + sessionID: String, + workerAdminSync: WorkerAdminLiveSessionCoordinator?, + preparedUpload: WorkerPreparedMediaUpload?, + wasIPhoneRecording: Bool, + iPhoneCameraManager: IPhoneCameraManager?, + conversationAudioRecorder: ConversationAudioRecorder?, + sopVideoRecorder: SopVideoRecorder? + ) { + let pendingDefaultsKey = pendingRecordingDefaultsKey + + Task.detached(priority: .utility) { + let backgroundTaskID = await SopMediaBackgroundTask.begin(name: "SOP media finalize") + let recordedVideoURL = await SopSessionMediaFinalizer.finishRecording( + wasIPhoneRecording: wasIPhoneRecording, + iPhoneCameraManager: iPhoneCameraManager, + conversationAudioRecorder: conversationAudioRecorder, + sopVideoRecorder: sopVideoRecorder + ) + + let result: WorkerMediaUploadResult + if let workerAdminSync, let preparedUpload { + result = await workerAdminSync.uploadPreparedVideoRecording( + from: recordedVideoURL, + preparedUpload: preparedUpload + ) + } else { + result = WorkerMediaUploadResult( + assetType: "video", + assetID: preparedUpload?.result.assetID, + bucket: preparedUpload?.result.bucket, + path: preparedUpload?.result.path, + byteSize: 0, + uploadState: "failed", + errorMessage: workerAdminSync == nil + ? "Worker admin sync was unavailable during background media finalize." + : "Recording upload target could not be prepared." + ) + } + + if result.succeeded { + PendingWorkerRecordingStore.clear(defaultsKey: pendingDefaultsKey) + if let recordedVideoURL { + try? FileManager.default.removeItem(at: recordedVideoURL) + } + } else if let recordedVideoURL { + PendingWorkerRecordingStore.remember( + defaultsKey: pendingDefaultsKey, + sessionID: sessionID, + fileURL: recordedVideoURL + ) + } + + WorkerLiveLogger.log( + "session_media_background_finalize_completed", + sessionID: sessionID, + assetID: result.assetID, + assetType: result.assetType, + bucket: result.bucket, + path: result.path, + byteSize: result.byteSize, + uploadState: result.uploadState, + error: result.errorMessage + ) + + await workerAdminSync?.flushTelemetryAndStop() + await SopMediaBackgroundTask.end(backgroundTaskID) + } + } + + func endAndShip(status: SopTerminationStatus, cancelCountdownTask: Bool = true) async { + guard !isFinalizingAndShipping, isSopAuditRunning, currentSopSessionId != nil else { return } + isFinalizingAndShipping = true + stopHoldToTalk() + let sessionID = currentSopSessionId + let syncedToBackend = activeExecutionSession != nil + let completedSOP = selectedSOP + let roomCodeAtEnd = webrtcViewModel.roomCode.isEmpty ? nil : webrtcViewModel.roomCode + let workerAdminSyncAtEnd = workerAdminSync + let wasIPhoneRecording = streamingMode == .iPhone + let recordingSource = wasIPhoneRecording ? "phone-recording" : "stream-capture" + let iPhoneCameraForFinalization = wasIPhoneRecording ? iPhoneCameraManager : nil + let conversationAudioRecorderForFinalization = conversationAudioRecorder + let sopVideoRecorderForFinalization = sopVideoRecorder + let proofImages = proofImagesByTargetID + let checklistPayload: [[String: Any]] = checklistItems.map { + [ + "name": $0.name, + "checked": $0.isChecked, + "source": $0.completionSource.rawValue + ] + } + let completedCount = checklistItems.filter(\.isChecked).count + let finalStepIndex = nextIncompleteStepIndex() + + WorkerLiveLogger.log( + "session_end_requested", + sessionID: sessionID, + roomCode: roomCodeAtEnd, + uploadState: "active" + ) + + isSopAuditRunning = false + isDossierUploading = true + updateDossierPipelineStatus("Registering replay upload...", kind: .active) + if cancelCountdownTask { + sopCountdownTask?.cancel() + } + sopCountdownTask = nil + + if wasIPhoneRecording { + resetIPhoneAnalysisLane() + iPhoneCameraManager = nil + iPhonePreviewSession = nil + currentVideoFrame = nil + hasReceivedFirstFrame = false + streamingStatus = .stopped + streamingMode = .glasses + geminiAssistant.streamingMode = .glasses + } + sopVideoRecorder = nil + conversationAudioRecorder = nil + proofImagesByTargetID = [:] + + await workerAdminSyncAtEnd?.updateCurrentStepIndex(finalStepIndex) + await workerAdminSyncAtEnd?.updateHelpRequested(false, sendImmediateHeartbeat: false) + let preparedVideoUpload = await workerAdminSyncAtEnd?.prepareVideoRecordingUpload(source: recordingSource) + + let videoUploadResult: WorkerMediaUploadResult + if let workerAdminSyncAtEnd { + videoUploadResult = await workerAdminSyncAtEnd.completeSession( + pendingVideoUpload: preparedVideoUpload + ) { [weak self] in + guard let self else { return } + + if activeExecutionSession != nil { + await self.postExecutionEvent( + type: "session_completed", + payload: [ + "termination_status": status.rawValue, + "completed_steps": completedCount, + "total_steps": self.checklistItems.count, + "checklist": checklistPayload + ] + ) + + await self.patchActiveExecutionSession( + ExecutionSessionPatch( + status: status == .allItemsChecked ? "completed" : "ended", + currentSopID: completedSOP?.remoteID, + currentStepIndex: finalStepIndex, + helpRequested: false, + endedAt: ISO8601DateFormatter().string(from: Date()) + ) + ) + } else { + self.updateDossierPipelineStatus("Execution ended locally. No backend session was active.", kind: .info) + } + } + } else { + videoUploadResult = WorkerMediaUploadResult( + assetType: "video", + assetID: nil, + bucket: nil, + path: nil, + byteSize: 0, + uploadState: "failed", + errorMessage: "Worker admin sync was unavailable during teardown." + ) + + if activeExecutionSession != nil { + await postExecutionEvent( + type: "session_completed", + payload: [ + "termination_status": status.rawValue, + "completed_steps": completedCount, + "total_steps": checklistItems.count, + "checklist": checklistPayload + ] + ) + + await patchActiveExecutionSession( + ExecutionSessionPatch( + status: status == .allItemsChecked ? "completed" : "ended", + currentSopID: completedSOP?.remoteID, + currentStepIndex: finalStepIndex, + helpRequested: false, + endedAt: ISO8601DateFormatter().string(from: Date()) + ) + ) + } else { + updateDossierPipelineStatus("Execution ended locally. No backend session was active.", kind: .info) + } + } + + if geminiAssistant.isGeminiActive { + await geminiAssistant.stopSession() + try? await Task.sleep(nanoseconds: 150_000_000) + } + + didAttemptPendingRecordingRecovery = false + launchBackgroundMediaFinalization( + sessionID: sessionID ?? UUID().uuidString, + workerAdminSync: workerAdminSyncAtEnd, + preparedUpload: preparedVideoUpload, + wasIPhoneRecording: wasIPhoneRecording, + iPhoneCameraManager: iPhoneCameraForFinalization, + conversationAudioRecorder: conversationAudioRecorderForFinalization, + sopVideoRecorder: sopVideoRecorderForFinalization + ) + + if let activeExecutionSession { + if let remoteSOPID = completedSOP?.remoteID { + await createExecutionMemoryLink( + sessionID: activeExecutionSession.id, + sopID: remoteSOPID, + completedSteps: completedCount + ) + } + await uploadEvidenceMediaAssets( + sessionID: activeExecutionSession.id, + proofImages: proofImages + ) + } + + if status == .allItemsChecked, let completedSOP { + recordPackageProgressIfNeeded(for: completedSOP) + } + + if let completedSOP { + markPendingTaskComplete(completedSOP) + } + + if videoUploadResult.uploadState == "failed" { + let errorMessage = videoUploadResult.errorMessage ?? "Video finalize failed." + let recordingLabel = wasIPhoneRecording ? "Phone recording" : "Session recording" + setCriticalOperationsSyncIssue( + phase: "media_reservation", + message: "\(recordingLabel) upload could not be reserved: \(errorMessage)" + ) + updateDossierPipelineStatus("\(recordingLabel) upload reservation failed.", kind: .error) + } else { + updateDossierPipelineStatus( + wasIPhoneRecording ? "Phone recording upload pending." : "Session recording upload pending.", + kind: .success + ) + } + + isDossierUploading = false + + appendHistoryRecord( + ShippedSessionRecord( + timestamp: Date(), + sopName: completedSOP?.name ?? "Unknown SOP", + status: videoUploadResult.isPending + ? "Upload pending" + : videoUploadResult.succeeded ? "Replay ready" : "Finalize failed" + ) + ) + + if let sessionID { + WorkerLiveLogger.log( + "session_end_completed", + sessionID: sessionID, + roomCode: roomCodeAtEnd, + assetID: videoUploadResult.assetID, + assetType: videoUploadResult.assetType, + bucket: videoUploadResult.bucket, + path: videoUploadResult.path, + byteSize: videoUploadResult.byteSize, + uploadState: videoUploadResult.uploadState, + error: videoUploadResult.errorMessage + ) + } + + if webrtcViewModel.isActive { + webrtcViewModel.stopSession() + } + workerAdminSync = nil + + hasActiveHelpEscalation = false + shouldResumeAiSupportAfterBackOffice = false + resetLiveRoomSyncSnapshot() + activeExecutionSession = nil + currentSopSessionId = nil + activeCaptureSOP = nil + selectedSOP = nil + await syncGeminiSessionInstruction() + sopAuditSecondsRemaining = 0.0 + if canCloseCurrentPackage { + packageClosureStatusMessage = "All required SOPs are complete. Close the package from NOW." + } + sopAuditStatusMessage = videoUploadResult.uploadState != "failed" + ? (syncedToBackend ? "Execution synced" : "Execution uploaded") + : "Execution ended with media reservation issues" + isSpotterInferenceInFlight = false + shouldDismissCapture = true + showShipSuccessToast = true + if !wasIPhoneRecording { + await startHomeCameraPreviewIfNeeded() + } + successToastTask?.cancel() + successToastTask = Task { @MainActor [weak self] in + try? await Task.sleep(nanoseconds: 2_000_000_000) + self?.showShipSuccessToast = false + } + isFinalizingAndShipping = false + } + + // MARK: - iPhone Camera Mode + + private func startHomeCameraPreviewIfNeeded() async { + guard !isSopAuditRunning, activeCaptureSOP == nil else { return } + if isStreaming, streamingMode == preferredCaptureMode { + return + } + + if isStreaming { + await stopCurrentCameraTransportOnly() + } + + switch preferredCaptureMode { + case .glasses where hasActiveDevice: + await handleStartStreaming() + default: + let granted = await IPhoneCameraManager.requestPermission() + if granted { + preferredCaptureMode = .iPhone + startIPhoneSession() + } else if !showError { + showError("Camera permission denied. Please grant access in Settings.") + } + } + } + + func handleStartIPhone() async { + let granted = await IPhoneCameraManager.requestPermission() + if granted { + startIPhoneSession() + } else { + showError("Camera permission denied. Please grant access in Settings.") + } + } + + private func startIPhoneSession() { + streamingMode = .iPhone + geminiAssistant.streamingMode = .iPhone + currentVideoFrame = nil + hasReceivedFirstFrame = false + resetIPhoneAnalysisLane() + let camera = IPhoneCameraManager() + camera.analysisFrameInterval = webrtcViewModel.isUnderLiveVideoPressure ? 0.45 : 0.2 + let realtimeVideoForwarder = webrtcViewModel.realtimeVideoForwarder + camera.onFirstPreviewFrame = { [weak self] in + Task { @MainActor [weak self] in + self?.hasReceivedFirstFrame = true + } + } + camera.onSampleBufferCaptured = { sampleBuffer in + guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return } + realtimeVideoForwarder.enqueuePixelBuffer( + pixelBuffer, + timeStampNs: Self.rtcTimestampNs(from: sampleBuffer) + ) + } + camera.onFrameCaptured = { [weak self] image in + Task { @MainActor [weak self] in + guard let self else { return } + self.enqueueIPhoneAnalysisFrame(image, shouldRecordAudit: self.isSopAuditRunning) + } + } + camera.start() + iPhoneCameraManager = camera + iPhonePreviewSession = camera.previewSession + streamingStatus = .streaming + NSLog("[Stream] iPhone camera mode started") + } + + private func observeWebRTCSession() { + roomCodeCancellable = webrtcViewModel.$roomCode + .removeDuplicates() + .sink { [weak self] roomCode in + guard let self else { return } + Task { @MainActor [weak self] in + guard let self else { return } + if !roomCode.isEmpty, !self.hasLoggedRoomCreatedForSession { + self.hasLoggedRoomCreatedForSession = true + WorkerLiveLogger.log( + "room_created", + sessionID: self.currentSopSessionId, + roomCode: roomCode, + uploadState: "active" + ) + } + await self.syncLiveRoomState(roomCode: roomCode) + } + } + + connectionStateCancellable = webrtcViewModel.$connectionState + .removeDuplicates { lhs, rhs in + String(describing: lhs) == String(describing: rhs) + } + .sink { [weak self] state in + guard let self else { return } + Task { @MainActor [weak self] in + self?.updateHelpStatus(for: state) + } + } + + livePressureCancellable = webrtcViewModel.$isUnderLiveVideoPressure + .removeDuplicates() + .sink { [weak self] isUnderPressure in + guard let self else { return } + Task { @MainActor [weak self] in + self?.iPhoneCameraManager?.analysisFrameInterval = isUnderPressure ? 0.45 : 0.2 + } + } + } + + private func stopIPhoneSession() { + sopCountdownTask?.cancel() + sopCountdownTask = nil + isSopAuditRunning = false + isSpotterInferenceInFlight = false + stopHoldToTalk() + resetIPhoneAnalysisLane() + + iPhoneCameraManager?.stop() + iPhoneCameraManager = nil + iPhonePreviewSession = nil + currentVideoFrame = nil + hasReceivedFirstFrame = false + streamingStatus = .stopped + streamingMode = .glasses + geminiAssistant.streamingMode = .glasses + NSLog("[Stream] iPhone camera mode stopped") + } + + func dismissError() { + showError = false + errorMessage = "" + } + + func capturePhoto() { + streamSession.capturePhoto(format: .jpeg) + } + + func dismissPhotoPreview() { + showPhotoPreview = false + capturedPhoto = nil + } + + private func updateStatusFromState(_ state: StreamSessionState) { + switch state { + case .stopped: + currentVideoFrame = nil + streamingStatus = .stopped + case .waitingForDevice, .starting, .stopping, .paused: + streamingStatus = .waiting + case .streaming: + streamingStatus = .streaming + } + } + + private func updateGuidancePolicy(_ policy: GuidancePolicy, reason: String) { + guidancePolicy = policy + guidancePolicyReason = reason + } + + private func formatStreamingError(_ error: StreamSessionError) -> String { + switch error { + case .internalError: + return "An internal error occurred. Please try again." + case .deviceNotFound: + return "Device not found. Please ensure your device is connected." + case .deviceNotConnected: + return "Device not connected. Please check your connection and try again." + case .timeout: + return "The operation timed out. Please try again." + case .videoStreamingError: + return "Video streaming failed. Please try again." + case .audioStreamingError: + return "Audio streaming failed. Please try again." + case .permissionDenied: + return "Camera permission denied. Please grant permission in Settings." + case .hingesClosed: + return "The hinges on the glasses were closed. Please open the hinges and try again." + @unknown default: + return "An unknown streaming error occurred." + } + } + + private func configureChecklist(for sop: SOPTemplate) { + checklistItems = sop.steps + .sorted { $0.order < $1.order } + .map { step in + ChecklistItemState( + itemID: step.id, + name: step.title, + description: step.description, + duration: step.duration, + validation: step.validation, + critical: step.critical, + aiPrompt: step.aiPrompt, + expectedObjects: step.expectedObjects, + preconditions: step.preconditions, + postconditions: step.postconditions, + skipRisk: step.skipRisk, + evidenceRequired: step.evidenceRequired, + allowManualComplete: step.allowManualComplete + ) + } + spotterEvidenceWindow.resetAll() + currentStepBecameActiveAt = Date() + updateGuidancePolicy(.nextInstruction, reason: "A new SOP assignment started.") + + Task { @MainActor [weak self] in + await self?.syncGeminiSessionInstruction(for: sop) + } + } + + private func appendHistoryRecord(_ record: ShippedSessionRecord) { + shippedHistory.insert(record, at: 0) + if shippedHistory.count > 100 { + shippedHistory = Array(shippedHistory.prefix(100)) + } + saveHistoryToDefaults() + } + + private func loadHistoryFromDefaults() { + guard let data = UserDefaults.standard.data(forKey: historyDefaultsKey) else { return } + guard let decoded = try? JSONDecoder().decode([ShippedSessionRecord].self, from: data) else { return } + shippedHistory = decoded + } + + private func saveHistoryToDefaults() { + guard let encoded = try? JSONEncoder().encode(shippedHistory) else { return } + UserDefaults.standard.set(encoded, forKey: historyDefaultsKey) + } + + private func rememberPendingRecording(sessionID: String, fileURL: URL) { + PendingWorkerRecordingStore.remember( + defaultsKey: pendingRecordingDefaultsKey, + sessionID: sessionID, + fileURL: fileURL + ) + } + + private func clearPendingRecording() { + PendingWorkerRecordingStore.clear(defaultsKey: pendingRecordingDefaultsKey) + } + + private func loadPendingRecording() -> PendingWorkerRecording? { + guard let data = UserDefaults.standard.data(forKey: pendingRecordingDefaultsKey) else { return nil } + return try? JSONDecoder().decode(PendingWorkerRecording.self, from: data) + } + + private func expectedIPhoneRecordingURL(for sessionID: String) -> URL { + FileManager.default.temporaryDirectory + .appendingPathComponent("sop_\(sessionID)") + .appendingPathExtension("mp4") + } + + private func recoverPendingWorkerRecordingIfNeeded() async { + guard !didAttemptPendingRecordingRecovery else { return } + guard let pendingRecording = loadPendingRecording() else { + didAttemptPendingRecordingRecovery = true + return + } + + didAttemptPendingRecordingRecovery = true + let pendingURL = URL(fileURLWithPath: pendingRecording.filePath) + let recoveryURL = FileManager.default.fileExists(atPath: pendingURL.path) ? pendingURL : nil + let recoverySync = WorkerAdminLiveSessionCoordinator( + api: opsAPIClient, + sessionID: pendingRecording.sessionID, + heartbeatIntervalNanoseconds: 0, + telemetry: WorkerTelemetry.shared + ) + + let result = await recoverySync.uploadVideoRecording( + from: recoveryURL, + source: "phone-recording" + ) + if result.succeeded { + clearPendingRecording() + if let recoveryURL { + try? FileManager.default.removeItem(at: recoveryURL) + } + } else { + setCriticalOperationsSyncIssue( + phase: "media_finalize", + message: "Recovered recording finalize failed: \(result.errorMessage ?? "Unknown error")" + ) + } + + do { + _ = try await opsAPIClient.updateExecutionSession( + id: pendingRecording.sessionID, + patch: ExecutionSessionPatch( + status: "ended", + helpRequested: false, + endedAt: ISO8601DateFormatter().string(from: Date()) + ) + ) + } catch { + if result.succeeded { + setCriticalOperationsSyncIssue( + phase: "session_patch", + message: "Recovered video uploaded, but session end sync failed: \(error.localizedDescription)" + ) + } + } + } + + private func requestSpeechPermissionsIfNeeded() { + SFSpeechRecognizer.requestAuthorization { status in + if status != .authorized { + NSLog("[Speech] Speech recognition authorization denied: %@", String(describing: status)) + } + } + + AVAudioApplication.requestRecordPermission { granted in + if !granted { + NSLog("[Speech] Microphone permission denied") + } + } + } + + func startHoldToTalk() { + guard !isListeningForVoice else { return } + guard !geminiAssistant.isGeminiActive else { + sopAuditStatusMessage = "AI guide is already listening through the active audio route." + aiGuideStatusMessage = sopAuditStatusMessage + return + } + guard let speechRecognizer, speechRecognizer.isAvailable else { + sopAuditStatusMessage = "Speech recognizer unavailable" + return + } + + do { + if let routeWarning = try configureWorkerAudioRoute(for: preferredCaptureMode, reason: .holdToTalk) { + helpStatusMessage = routeWarning + } + } catch { + sopAuditStatusMessage = "Audio session error: \(error.localizedDescription)" + return + } + + speechTask?.cancel() + speechTask = nil + + let request = SFSpeechAudioBufferRecognitionRequest() + request.shouldReportPartialResults = true + speechRequest = request + + let inputNode = audioEngine.inputNode + let recordingFormat = inputNode.outputFormat(forBus: 0) + inputNode.removeTap(onBus: 0) + inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in + request.append(buffer) + } + + audioEngine.prepare() + do { + try audioEngine.start() + } catch { + sopAuditStatusMessage = "Mic start failed: \(error.localizedDescription)" + stopHoldToTalk() + return + } + + isListeningForVoice = true + + speechTask = speechRecognizer.recognitionTask(with: request) { [weak self] result, error in + guard let self else { return } + + if let result { + let transcript = result.bestTranscription.formattedString.lowercased() + self.handleVoiceTranscript(transcript) + } - var isStreaming: Bool { - streamingStatus != .stopped + if error != nil { + self.stopHoldToTalk() + } + } } - var resolutionLabel: String { - switch selectedResolution { - case .low: return "360x640" - case .medium: return "504x896" - case .high: return "720x1280" - @unknown default: return "Unknown" + func stopHoldToTalk() { + guard isListeningForVoice || audioEngine.isRunning else { return } + + if audioEngine.isRunning { + audioEngine.stop() + audioEngine.inputNode.removeTap(onBus: 0) + } + + speechRequest?.endAudio() + speechRequest = nil + speechTask?.cancel() + speechTask = nil + isListeningForVoice = false + let lease = holdToTalkAudioLease + holdToTalkAudioLease = nil + Task { + if let lease { + await WorkerAudioRouteCoordinator.shared.release(lease: lease) + } + } + + if webrtcViewModel.isActive && webrtcViewModel.isSupportMode { + do { + if let routeWarning = try configureWorkerAudioRoute(for: preferredCaptureMode, reason: .viewer) { + helpStatusMessage = routeWarning + } + } catch { + NSLog("[Speech] Failed to restore talkback audio route: %@", error.localizedDescription) + } } } - // Photo capture properties - @Published var capturedPhoto: UIImage? - @Published var showPhotoPreview: Bool = false + private func handleVoiceTranscript(_ transcript: String) { + let normalized = transcript + .trimmingCharacters(in: .whitespacesAndNewlines) + .lowercased() + guard !normalized.isEmpty, normalized != lastProcessedTranscript else { return } + lastProcessedTranscript = normalized - // Gemini Live integration - var geminiSessionVM: GeminiSessionViewModel? + if isBackOfficeCallIntent(normalized) { + recordAiCommandDetected("call_back_office", transcript: normalized) + requestSupervisorHelp() + return + } - // WebRTC Live streaming integration - var webrtcSessionVM: WebRTCSessionViewModel? + if isStopAiGuideIntent(normalized) { + if geminiAssistant.isGeminiActive { + Task { @MainActor [weak self] in + await self?.geminiAssistant.stopSession() + } + } + aiGuideStatusMessage = "AI guide paused." + sopAuditStatusMessage = aiGuideStatusMessage + recordAiCommandDetected("pause_ai", transcript: normalized) + return + } - // The core DAT SDK StreamSession - handles all streaming operations - private var streamSession: StreamSession - // Listener tokens are used to manage DAT SDK event subscriptions - private var stateListenerToken: AnyListenerToken? - private var videoFrameListenerToken: AnyListenerToken? - private var errorListenerToken: AnyListenerToken? - private var photoDataListenerToken: AnyListenerToken? - private let wearables: WearablesInterface - private let deviceSelector: AutoDeviceSelector - private var deviceMonitorTask: Task? - private var iPhoneCameraManager: IPhoneCameraManager? + if isVoiceStepAdvanceIntent(normalized) { + guard shouldProcessAiCommand("advance_step", transcript: normalized) else { return } + recordAiCommandDetected("advance_step", transcript: normalized) + completeActiveStepByVoice(transcript: normalized) + return + } - // CPU-based CIContext for rendering decoded pixel buffers in background - private let cpuCIContext = CIContext(options: [.useSoftwareRenderer: true]) - // VideoDecoder for decompressing HEVC/H.264 frames in background - private let videoDecoder = VideoDecoder() - private var backgroundFrameCount = 0 - private var bgDiagLogged = false + if isGuidedStepCheckIntent(normalized) { + guard shouldProcessAiCommand("check_step", transcript: normalized) else { return } + recordAiCommandDetected("check_step", transcript: normalized) + requestGuidedStepValidation(trigger: "voice_check") + return + } - init(wearables: WearablesInterface) { - self.wearables = wearables - // Let the SDK auto-select from available devices - self.deviceSelector = AutoDeviceSelector(wearables: wearables) - let config = StreamSessionConfig( - videoCodec: VideoCodec.raw, - resolution: StreamingResolution.low, - frameRate: 24) - streamSession = StreamSession(streamSessionConfig: config, deviceSelector: deviceSelector) + guard let checkRange = normalized.range(of: "check ") else { return } + let spokenItem = normalized[checkRange.upperBound...].trimmingCharacters(in: .whitespacesAndNewlines) + guard !spokenItem.isEmpty else { return } + + if let active = activeSpotterRequestItems().first, + active.name.lowercased().contains(spokenItem) || spokenItem.contains(active.name.lowercased()) { + guard shouldProcessAiCommand("check_named_step", transcript: normalized) else { return } + recordAiCommandDetected("check_named_step", transcript: normalized) + requestGuidedStepValidation(trigger: "voice_check") + } + } + + private func isStopAiGuideIntent(_ transcript: String) -> Bool { + transcript.contains("stop ai") || + transcript.contains("pause ai") || + transcript.contains("stop guide") || + transcript.contains("pause guide") + } + + private func isVoiceStepAdvanceIntent(_ transcript: String) -> Bool { + transcript.contains("i'm done") || + transcript.contains("im done") || + transcript.contains("done with this") || + transcript.contains("done with the step") || + transcript.contains("next step") || + transcript.contains("ready for next") || + transcript.contains("move on") + } + + private func isGuidedStepCheckIntent(_ transcript: String) -> Bool { + transcript.contains("check this step") || + transcript.contains("check step") || + transcript.contains("check again") || + transcript.contains("validate this") || + transcript.contains("validate step") || + transcript.contains("what is missing") || + transcript.contains("what am i missing") || + transcript.contains("what's missing") || + transcript.contains("did i do it right") + } + + private func shouldProcessAiCommand(_ commandKey: String, transcript: String) -> Bool { + let now = Date() + if commandKey == lastAiCommandKey, + now.timeIntervalSince(lastAiCommandAt) < 3.0 { + return false + } + lastAiCommandKey = commandKey + lastAiCommandAt = now + return true + } + + private func recordAiCommandDetected(_ commandKey: String, transcript: String) { + Task { [weak self] in + guard let self else { return } + await WorkerTelemetry.shared.record( + "ai_command_detected", + source: "gemini_live", + stage: commandKey, + sessionID: self.currentSopSessionId, + payload: [ + "command": commandKey, + "transcript": transcript + ] + ) + await self.postExecutionEvent( + type: "ai_command_detected", + payload: [ + "command": commandKey, + "transcript": transcript + ] + ) + } + } + + private func isBackOfficeCallIntent(_ transcript: String) -> Bool { + let normalized = transcript.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + guard normalized.contains("back office") || normalized.contains("supervisor") || normalized.contains("support") else { + return false + } + return normalized.contains("call") || + normalized.contains("dial") || + normalized.contains("ring") || + normalized.contains("request help") || + normalized.contains("need help") + } + + private func markFirstUncheckedAsVoice() { + guard let firstUnchecked = checklistItems.first(where: { !$0.isChecked }) else { return } + setChecklistItemChecked(itemID: firstUnchecked.id, source: .voice) + } + + private func completeActiveStepByVoice(transcript: String) { + guard let firstUnchecked = checklistItems.first(where: { !$0.isChecked }) else { + sopAuditStatusMessage = "All checklist steps are complete." + aiGuideStatusMessage = sopAuditStatusMessage + return + } + setChecklistItemChecked(itemID: firstUnchecked.id, source: .voice, voiceTranscript: transcript) + sopAuditStatusMessage = "Step confirmed by voice. Moving to the next step." + aiGuideStatusMessage = sopAuditStatusMessage + } + + private func setChecklistItemChecked( + itemID: UUID, + source: ChecklistCompletionSource, + voiceTranscript: String? = nil + ) { + guard let index = checklistItems.firstIndex(where: { $0.id == itemID }) else { return } + guard !checklistItems[index].isChecked else { return } + checklistItems[index].isChecked = true + checklistItems[index].completionSource = source + + let item = checklistItems[index] + spotterEvidenceWindow.reset(stepID: item.itemID) + currentStepBecameActiveAt = Date() + updateGuidancePolicy(.nextInstruction, reason: "Step completed by \(source.rawValue).") + Task { + await handleChecklistMutation( + item: item, + stepIndex: index, + eventType: "step_complete" + ) + if source == .voice { + let nextIndex = nextIncompleteStepIndex() + await WorkerTelemetry.shared.record( + "voice_step_advanced", + source: "gemini_live", + stage: "advanced", + sessionID: currentSopSessionId, + payload: [ + "step_index": index, + "step_id": item.itemID, + "step_name": item.name, + "next_step_index": nextIndex, + "transcript": voiceTranscript ?? NSNull() + ] + ) + await postExecutionEvent( + type: "voice_step_advanced", + payload: [ + "step_index": index, + "step_id": item.itemID, + "step_name": item.name, + "next_step_index": nextIndex, + "transcript": voiceTranscript ?? NSNull() + ] + ) + } + } + + if checklistItems.allSatisfy({ $0.isChecked }) { + Task { await endAndShip(status: .allItemsChecked) } + } + } + + private func setChecklistItemCheckedBySpotterID(_ itemID: String) { + setChecklistItemCheckedBySpotterID(itemID, evidence: nil) + } + + private func setChecklistItemCheckedBySpotterID(_ itemID: String, evidence: [String: Any]?) { + guard let index = checklistItems.firstIndex(where: { $0.itemID == itemID }) else { return } + guard !checklistItems[index].isChecked else { return } + + checklistItems[index].isChecked = true + checklistItems[index].completionSource = .vision + dossierSpotterHitCount += 1 + spotterEvidenceWindow.reset(stepID: itemID) + currentStepBecameActiveAt = Date() + updateGuidancePolicy(.nextInstruction, reason: "Step completed after stable visual evidence.") + updateDossierPipelineStatus("Live spotter hit #\(dossierSpotterHitCount)", kind: .active) + + let item = checklistItems[index] + Task { + await handleChecklistMutation( + item: item, + stepIndex: index, + eventType: "step_complete", + evidence: evidence + ) + } + + if checklistItems.allSatisfy({ $0.isChecked }) { + Task { await endAndShip(status: .allItemsChecked) } + } + } + + private func reconcileChecklistAfterServerAdvance( + match: GeminiLiveSpotter.SpotterMatch, + evidence: [String: Any] + ) { + guard let serverStepIndex = match.advancedToStepIndex else { return } + + let completedCount = match.completedSop + ? checklistItems.count + : min(max(serverStepIndex, 0), checklistItems.count) + guard completedCount > 0 || match.completedSop else { return } + let matchedWasAlreadyChecked = + checklistItems.first(where: { $0.itemID == match.id })?.isChecked == true + + for index in checklistItems.indices { + guard index < completedCount else { continue } + if !checklistItems[index].isChecked { + checklistItems[index].isChecked = true + checklistItems[index].completionSource = .vision + } + spotterEvidenceWindow.reset(stepID: checklistItems[index].itemID) + } + + if !matchedWasAlreadyChecked { + dossierSpotterHitCount += 1 + } + + updateGuidancePolicy(.nextInstruction, reason: "Step advanced by server-confirmed visual evidence.") + updateDossierPipelineStatus("Live spotter server advance", kind: .active) + currentStepBecameActiveAt = Date() + + let nextIndex = nextIncompleteStepIndex() + Task { + await workerAdminSync?.updateCurrentStepIndex(nextIndex, sendImmediateHeartbeat: true) + await postExecutionEvent( + type: "phone_step_reconciled", + payload: [ + "step_id": match.id, + "server_step_index": serverStepIndex, + "local_next_step_index": nextIndex, + "completed_sop": match.completedSop, + "evidence": evidence + ] + ) + await syncGeminiSessionInstruction() + } + + if checklistItems.allSatisfy({ $0.isChecked }) || match.completedSop { + Task { await endAndShip(status: .allItemsChecked) } + } + } + + private func captureProofImageIfNeeded(for itemID: String, from image: UIImage) { + guard proofImagesByTargetID[itemID] == nil else { return } + guard let jpegData = image.jpegData(compressionQuality: 0.9) else { return } + proofImagesByTargetID[itemID] = jpegData + } + + private func buildDossierMetadataJSONString() -> String { + let checkedCount = checklistItems.filter { $0.isChecked }.count + let complianceLevel: String + if !checklistItems.isEmpty, checkedCount == checklistItems.count { + complianceLevel = "FULLY" + } else if checkedCount == 0 { + complianceLevel = "NON" + } else { + complianceLevel = "PARTIALLY" + } + + let foundItems: [[String: Any]] = checklistItems.map { item in + let notes: String + if item.isChecked { + switch item.completionSource { + case .vision: + notes = "Spotted live by edge AI" + case .voice: + notes = "Confirmed by voice" + case .manual: + notes = "Checked by operator" + case .pending: + notes = "Found" + } + } else { + notes = "Not found" + } + + return [ + "target": item.itemID, + "found": item.isChecked, + "notes": notes + ] + } + + let checklist: [[String: Any]] = checklistItems.map { item in + [ + "name": item.name, + "checked": item.isChecked + ] + } + + let metadata: [String: Any] = [ + "compliance_level": complianceLevel, + "found_items": foundItems, + "checklist": checklist + ] + + guard let data = try? JSONSerialization.data(withJSONObject: metadata, options: []), + let json = String(data: data, encoding: .utf8) else { + return "{}" + } + + return json + } + + func requestGuidedStepValidation(trigger: String = "tap") { + guard isSopAuditRunning else { return } + guard !isSpotterInferenceInFlight, !isStepValidationRunning else { return } + guard let image = currentVideoFrame else { + sopAuditStatusMessage = "Keep the current step visible so AI can check it." + aiGuideStatusMessage = sopAuditStatusMessage + return + } + let pendingItems = activeSpotterRequestItems() + + guard !pendingItems.isEmpty else { + sopAuditStatusMessage = "All checklist steps are complete." + aiGuideStatusMessage = sopAuditStatusMessage + return + } + + lastSpotterInferenceTime = Date() + isSpotterInferenceInFlight = true + isStepValidationRunning = true + sopAuditStatusMessage = "Checking the current step..." + aiGuideStatusMessage = sopAuditStatusMessage + updateDossierPipelineStatus("On-demand AI check running", kind: .active) + + Task { [weak self] in + guard let self else { return } + let matches: [GeminiLiveSpotter.SpotterMatch] + var spotterErrorMessage: String? + var spotterConflict = false + let requestStartedAt = CACurrentMediaTime() + do { + matches = try await self.geminiLiveSpotter.detectVisibleItemMatches( + image: image, + items: pendingItems, + sessionID: self.currentSopSessionId, + elapsedActiveMs: self.elapsedActiveMsForCurrentStep() + ) + } catch { + matches = [] + spotterErrorMessage = error.localizedDescription + spotterConflict = self.isStaleSpotterConflict(error) + await WorkerTelemetry.shared.record( + spotterConflict ? "gemini_spotter_conflict" : "gemini_spotter_failed", + source: "gemini_spotter", + stage: spotterConflict ? "conflict" : "failed", + sessionID: self.currentSopSessionId, + payload: [ + "error": error.localizedDescription, + "target_count": pendingItems.count + ] + ) + } + let durationMs = (CACurrentMediaTime() - requestStartedAt) * 1000 + NSLog( + "[Spotter] Guided step check trigger=%@ targets=%@ matched=%@ autoComplete=%@ duration=%.1fms", + trigger, + pendingItems.map(\.id).joined(separator: ","), + matches.filter(\.matched).map(\.id).joined(separator: ","), + matches.filter(\.autoComplete).map(\.id).joined(separator: ","), + durationMs + ) + if let firstMatch = matches.first { + await WorkerTelemetry.shared.record( + "gemini_spotter_result", + source: "gemini_spotter", + stage: firstMatch.autoComplete ? "auto_complete" : firstMatch.matched ? "matched" : "not_matched", + sessionID: self.currentSopSessionId, + durationMs: durationMs, + metricValue: firstMatch.confidence, + metricUnit: "confidence", + payload: [ + "step_id": firstMatch.id, + "trigger": trigger, + "on_demand": true, + "matched": firstMatch.matched, + "auto_complete": firstMatch.autoComplete, + "threshold": firstMatch.threshold, + "reason": firstMatch.reason + ] + ) + } + + await MainActor.run { + if let spotterErrorMessage { + if spotterConflict { + self.sopAuditStatusMessage = "Checklist moved on in the backend. Refreshing the active step..." + self.aiGuideStatusMessage = self.sopAuditStatusMessage + self.updateGuidancePolicy(.helpPrompt, reason: self.sopAuditStatusMessage) + self.updateDossierPipelineStatus("AI check refreshed active step", kind: .info) + Task { await self.recoverFromStaleSpotterConflict(message: spotterErrorMessage) } + self.isStepValidationRunning = false + self.isSpotterInferenceInFlight = false + return + } + self.sopAuditStatusMessage = "AI check could not confirm the active step: \(spotterErrorMessage)" + self.aiGuideStatusMessage = self.sopAuditStatusMessage + self.updateGuidancePolicy(.helpPrompt, reason: self.sopAuditStatusMessage) + self.updateDossierPipelineStatus("AI check conflict", kind: .info) + Task { await self.syncGeminiSessionInstruction() } + self.isStepValidationRunning = false + self.isSpotterInferenceInFlight = false + return + } + + if let match = matches.first { + if match.autoComplete, match.advancedToStepIndex != nil { + self.captureProofImageIfNeeded(for: match.id, from: image) + self.reconcileChecklistAfterServerAdvance(match: match, evidence: [ + "guided_trigger": trigger, + "on_demand": true, + "ai_confidence": match.confidence, + "ai_reason": match.reason, + "evidence_timestamp": match.evidenceTimestamp, + "auto_complete": match.autoComplete, + "advanced_to_step_index": match.advancedToStepIndex ?? NSNull(), + "completed_sop": match.completedSop, + "threshold": match.threshold + ]) + self.sopAuditStatusMessage = "Step checked. Moving to the next step." + self.aiGuideStatusMessage = self.sopAuditStatusMessage + } else { + let reason = match.reason.trimmingCharacters(in: .whitespacesAndNewlines) + let message = reason.isEmpty + ? (match.autoComplete + ? "The server has not advanced this step yet. Try again in a moment." + : "I still need clearer evidence before moving to the next step.") + : reason + self.sopAuditStatusMessage = "Step needs more evidence: \(message)" + self.aiGuideStatusMessage = self.sopAuditStatusMessage + self.updateGuidancePolicy(.helpPrompt, reason: self.sopAuditStatusMessage) + self.updateDossierPipelineStatus("AI check incomplete", kind: .info) + } + } else { + self.sopAuditStatusMessage = "AI check could not read the current step. Keep the work area visible and try again." + self.aiGuideStatusMessage = self.sopAuditStatusMessage + self.updateGuidancePolicy(.helpPrompt, reason: self.sopAuditStatusMessage) + self.updateDossierPipelineStatus("AI check unavailable", kind: .info) + } + self.isStepValidationRunning = false + self.isSpotterInferenceInFlight = false + } + } + } + + private func elapsedActiveMsForCurrentStep() -> Int? { + max(0, Int(Date().timeIntervalSince(currentStepBecameActiveAt) * 1000)) + } + + private func isStaleSpotterConflict(_ error: Error) -> Bool { + if case AdminIngestError.server(let statusCode, _, _) = error { + return statusCode == 409 + } + return error.localizedDescription.contains("HTTP 409") + } + + private func recoverFromStaleSpotterConflict(message: String) async { + await postExecutionEvent( + type: "phone_step_validation_conflict", + payload: [ + "message": message, + "local_next_step_index": nextIncompleteStepIndex() + ] + ) + await syncGeminiSessionInstruction() + } + + private func startPreferredCamera() async { + reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: false) + switch preferredCaptureMode { + case .iPhone: + await handleStartIPhone() + case .glasses: + guard hasActiveDevice else { + showError("Meta camera unavailable. Connect glasses or switch to iPhone.") + return + } + await handleStartStreaming() + } + } + + private func reconcileCaptureModeWithDeviceAvailability(allowTransportSwitch: Bool) { + if !hasActiveDevice, preferredCaptureMode == .glasses { + preferredCaptureMode = .iPhone + sopAuditStatusMessage = "Meta camera disconnected." + } else if hasActiveDevice { + sopAuditStatusMessage = "Meta camera ready. Use the camera selector when you want glasses." + } + + guard allowTransportSwitch, isStreaming, !isSopAuditRunning else { return } + Task { @MainActor [weak self] in + await self?.switchToPreferredCaptureModeIfNeeded() + } + } + + private func captureModeEventValue(_ mode: StreamingMode) -> String { + switch mode { + case .glasses: return "glasses" + case .iPhone: return "iphone" + } + } + + private func stopCurrentCameraTransportOnly() async { + switch streamingMode { + case .iPhone: + iPhoneCameraManager?.stop() + iPhoneCameraManager = nil + currentVideoFrame = nil + hasReceivedFirstFrame = false + streamingStatus = .stopped + // Default to glasses mode when stopped; next start sets active mode explicitly. + streamingMode = .glasses + case .glasses: + await streamSession.stop() + currentVideoFrame = nil + hasReceivedFirstFrame = false + streamingStatus = .stopped + } + } + + private enum WorkerAudioRouteReason { + case viewer + case holdToTalk + } + + private func describeAudioPorts(_ ports: [AVAudioSessionPortDescription]) -> String { + if ports.isEmpty { + return "none" + } + + return ports + .map { "\($0.portType.rawValue):\($0.portName)" } + .joined(separator: ",") + } + + private func logWorkerAudioRoute( + session: AVAudioSession, + mode: StreamingMode, + reason: WorkerAudioRouteReason, + note: String? = nil + ) { + let modeLabel = mode == .iPhone ? "iphone" : "glasses" + let reasonLabel = reason == .viewer ? "viewer" : "hold_to_talk" + NSLog( + "[WorkerAudio] reason=%@ mode=%@ muted=%@ inputs=%@ outputs=%@ note=%@", + reasonLabel, + modeLabel, + webrtcViewModel.isMuted ? "true" : "false", + describeAudioPorts(session.currentRoute.inputs), + describeAudioPorts(session.currentRoute.outputs), + note ?? "none" + ) + } + + private func hasBluetoothTalkbackRoute(_ route: AVAudioSessionRouteDescription) -> Bool { + route.inputs.contains { + $0.portType == .bluetoothHFP || + $0.portType == .bluetoothLE + } || + route.outputs.contains { + $0.portType == .bluetoothHFP + || $0.portType == .bluetoothLE + } + } + + private func preferredBluetoothHFPInput(_ session: AVAudioSession) -> AVAudioSessionPortDescription? { + session.availableInputs?.first { + $0.portType == .bluetoothHFP || $0.portType == .bluetoothLE + } + } + + @discardableResult + private func configureWorkerAudioRoute( + for mode: StreamingMode, + reason: WorkerAudioRouteReason + ) throws -> String? { + if reason == .viewer, webrtcViewModel.isActive, webrtcViewModel.isSupportMode { + return webrtcViewModel.refreshSupportAudioRoute(captureMode: mode) + } + + let owner: WorkerAudioRouteOwner = + reason == .holdToTalk + ? .holdToTalk + : .viewer + let snapshot = try WorkerAudioRouteCoordinator.shared.acquire( + owner: owner, + mode: mode, + reason: reason == .holdToTalk ? "hold_to_talk" : "live_support", + forceSpeaker: SettingsManager.shared.speakerOutputEnabled, + preferredIOBufferDuration: 0.02 + ) + switch owner { + case .holdToTalk: + holdToTalkAudioLease = snapshot.lease + case .viewer: + viewerAudioRouteLease = snapshot.lease + case .aiGuide, .backOfficeWebRTC: + break + } + return snapshot.fallbackMessage + } + + private func liveRoomStatusMessage(localOnly: Bool, helpRequested: Bool, roomCode: String? = nil) -> String { + if localOnly { + return helpRequested + ? "Live room is local-only. Admin can't join until the backend session sync succeeds." + : "Local live room active. Admin visibility will start once the backend session sync succeeds." + } + + if let roomCode, !roomCode.isEmpty { + return helpRequested + ? "Supervisor request sent. Room \(roomCode) is ready for manager join." + : "Admin video observation active: \(roomCode)" + } + + return helpRequested + ? "Supervisor request sent. Waiting for the live room to finish syncing." + : "Opening live execution room..." + } + + private func waitForRoomCode(timeoutNanoseconds: UInt64 = 5_000_000_000) async -> String? { + if !webrtcViewModel.roomCode.isEmpty { + return webrtcViewModel.roomCode + } + + let step: UInt64 = 150_000_000 + var waited: UInt64 = 0 + while waited < timeoutNanoseconds { + try? await Task.sleep(nanoseconds: step) + waited += step + if !webrtcViewModel.roomCode.isEmpty { + return webrtcViewModel.roomCode + } + } + return nil + } + + private func createOrFallbackSessionID(for sop: SOPTemplate) async -> String { + if let activeExecutionSession { + currentSopSessionId = activeExecutionSession.id + isUsingLocalSessionFallback = false + return activeExecutionSession.id + } + + guard let workerID = workerProfile?.id else { + isUsingLocalSessionFallback = true + let fallback = UUID().uuidString + setCriticalOperationsSyncIssue( + phase: "session_create", + message: "Worker context unavailable. Recording locally until ops-api is reachable." + ) + return fallback + } + + do { + let shiftID = validRemoteUUID(sop.shiftID) ?? validRemoteUUID(activeShift?.id) + let packageID = validRemoteUUID(sop.packageID) ?? validRemoteUUID(activeShift?.packageID) ?? validRemoteUUID(activeShift?.package?.id) + let packageRunID = validRemoteUUID(sop.packageRunID) ?? validRemoteUUID(activePackageRunID) + let currentSopID = validRemoteUUID(sop.remoteID) + let createdSession = try await opsAPIClient.createExecutionSession( + workerID: workerID, + deviceID: registeredDevice?.id, + shiftID: shiftID, + packageID: packageID, + packageRunID: packageRunID, + currentSopID: currentSopID, + sopVersion: sop.sopVersion, + packageVersion: sop.packageVersion + ) + activeExecutionSession = createdSession + isUsingLocalSessionFallback = false + clearOperationsSyncState(clearWarning: false) + await postExecutionEvent( + type: "session_started", + payload: [ + "sop_name": sop.name, + "capture_mode": selectedCaptureModeLabel.lowercased() + ] + ) + return createdSession.id + } catch { + isUsingLocalSessionFallback = true + setCriticalOperationsSyncIssue( + phase: "session_create", + message: "Execution session could not sync. Continuing locally: \(error.localizedDescription)" + ) + return UUID().uuidString + } + } + + private func handleChecklistMutation( + item: ChecklistItemState, + stepIndex: Int, + eventType: String, + evidence: [String: Any]? = nil + ) async { + let nextIndex = nextIncompleteStepIndex() + await workerAdminSync?.updateCurrentStepIndex(nextIndex, sendImmediateHeartbeat: true) + var eventPayload: [String: Any] = [ + "step_index": stepIndex, + "step_name": item.name, + "source": item.completionSource.rawValue, + "checked": item.isChecked + ] + if let evidence { + eventPayload["evidence"] = evidence + } + await postExecutionEvent( + type: eventType, + payload: eventPayload + ) + await patchActiveExecutionSession( + ExecutionSessionPatch( + status: "active", + currentSopID: selectedSOP?.remoteID, + currentStepIndex: nextIndex + ) + ) + await syncGeminiSessionInstruction() + } + + private func nextIncompleteStepIndex() -> Int { + checklistItems.firstIndex(where: { !$0.isChecked }) ?? checklistItems.count + } + + private func activeSpotterRequestItems() -> [GeminiLiveSpotter.SpotterRequestItem] { + let nextIndex = nextIncompleteStepIndex() + guard nextIndex < checklistItems.count else { return [] } + let currentStep = checklistItems[nextIndex] + return [ + GeminiLiveSpotter.SpotterRequestItem( + id: currentStep.itemID, + name: currentStep.name, + aiPrompt: currentStep.aiPrompt, + expectedObjects: currentStep.expectedObjects, + preconditions: currentStep.preconditions, + postconditions: currentStep.postconditions, + skipRisk: currentStep.skipRisk, + evidenceRequired: currentStep.evidenceRequired, + validation: currentStep.validation, + critical: currentStep.critical + ) + ] + } + + private func nextCriticalStepTitleAfterActive(in sop: SOPTemplate, nextIndex: Int) -> String? { + let orderedSteps = sop.steps.sorted { $0.order < $1.order } + guard nextIndex + 1 < orderedSteps.count else { return nil } + return orderedSteps + .dropFirst(nextIndex + 1) + .first(where: { $0.critical })? + .title + } + + func debugSpotterTargetIDs() -> [String] { + activeSpotterRequestItems().map(\.id) + } + + private func buildGeminiSessionInstruction(for sopOverride: SOPTemplate? = nil) -> String? { + let sop = sopOverride ?? activeCaptureSOP ?? selectedSOP ?? currentAssignedSOP + guard let sop else { + geminiInstructionSyncStatus = "" + return nil + } + + let orderedSteps = sop.steps.sorted { $0.order < $1.order } + let resolvedBaseInstruction = GeminiConfig.defaultSystemInstruction + + guard !orderedSteps.isEmpty else { + geminiInstructionSyncStatus = "Gemini sync: \(sop.name) · no structured steps" + return """ + \(resolvedBaseInstruction) + + Active SOP: \(sop.name) + The SOP has no structured steps loaded. Ask clarifying questions, narrate what you need to see, and guide the worker toward the next safe action. + """ + } + + let nextIndex = nextIncompleteStepIndex() + let hasRemainingSteps = nextIndex < orderedSteps.count + let step = orderedSteps[min(nextIndex, orderedSteps.count - 1)] + let stepDescription = step.description.trimmingCharacters(in: .whitespacesAndNewlines) + let aiPrompt = step.aiPrompt.trimmingCharacters(in: .whitespacesAndNewlines) + let expectedObjects = step.expectedObjects + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .joined(separator: ", ") + let preconditions = step.preconditions + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .joined(separator: "; ") + let postconditions = step.postconditions + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .joined(separator: "; ") + + let directNextAction: String + if hasRemainingSteps { + directNextAction = "Guide the worker through this step now: \(step.title). Use the live camera to decide if it is truly complete before moving on." + } else { + directNextAction = "All SOP steps are marked complete. Confirm the finished state, call out any missing proof, and help the worker wrap up cleanly." + } + + geminiInstructionSyncStatus = hasRemainingSteps + ? "Gemini sync: \(sop.name) · Step \(step.order)/\(orderedSteps.count) · \(step.title)" + : "Gemini sync: \(sop.name) · wrap-up guidance active" + + var lines = [ + resolvedBaseInstruction, + "", + "Live SOP context:", + "SOP title: \(sop.name)", + "Current step: \(step.order) of \(orderedSteps.count)", + "Step title: \(step.title)" + ] + + if !stepDescription.isEmpty { + lines.append("Step description: \(stepDescription)") + } + + lines.append("Vision completion prompt: \(aiPrompt.isEmpty ? "Use the step title and description as the visual rule." : aiPrompt)") + + if !expectedObjects.isEmpty { + lines.append("Expected objects to look for: \(expectedObjects)") + } + if !preconditions.isEmpty { + lines.append("Preconditions before this step: \(preconditions)") + } + if !postconditions.isEmpty { + lines.append("Postconditions that prove completion: \(postconditions)") + } + lines.append("Skip risk: \(step.skipRisk)") + lines.append("Evidence required: \(step.evidenceRequired ? "yes" : "no")") + lines.append("Guidance policy: \(guidancePolicy.rawValue)") + lines.append("Guidance policy reason: \(guidancePolicyReason)") + lines.append("Guidance policy instruction: \(guidancePolicy.instruction)") + + if let nextCritical = nextCriticalStepTitleAfterActive(in: sop, nextIndex: nextIndex) { + lines.append("Next critical checkpoint after this: \(nextCritical)") + } + + lines.append("Direct next action: \(directNextAction)") + lines.append("Treat the vision prompt and expected objects as the active verification rule for your spoken guidance.") + + return lines.joined(separator: "\n") + } + + func debugGeminiInstructionPreview(for sop: SOPTemplate) -> String? { + buildGeminiSessionInstruction(for: sop) + } + + private func syncGeminiSessionInstruction(for sopOverride: SOPTemplate? = nil) async { + await geminiAssistant.refreshSessionInstruction(buildGeminiSessionInstruction(for: sopOverride)) + } + + private func syncLivePreviewFrameIfNeeded(image: UIImage) async { + guard isSopAuditRunning else { return } + guard let sessionID = currentSopSessionId else { return } + + let now = Date() + let underLiveVideoPressure = webrtcViewModel.isUnderLiveVideoPressure + let uploadInterval: TimeInterval = underLiveVideoPressure + ? (hasActiveHelpEscalation ? 1.5 : 2.0) + : (hasActiveHelpEscalation ? 0.75 : 1.0) + guard now.timeIntervalSince(lastLivePreviewSyncAt) >= uploadInterval else { return } + lastLivePreviewSyncAt = now + + let compressionQuality = hasActiveHelpEscalation ? 0.5 : 0.45 + let encodeStartedAt = CACurrentMediaTime() + guard let jpegData = await livePreviewFrameEncoder.encode( + image: image, + maxDimension: 640, + compressionQuality: compressionQuality + ) else { return } + await WorkerTelemetry.shared.record( + "live_preview_encode", + source: "ios_app", + stage: underLiveVideoPressure ? "pressure" : "normal", + sessionID: sessionID, + durationMs: (CACurrentMediaTime() - encodeStartedAt) * 1000, + payload: [ + "bytes": jpegData.count, + "under_live_video_pressure": underLiveVideoPressure, + "upload_interval_seconds": uploadInterval + ] + ) + + if streamingMode == .glasses, let fileURL = sopVideoRecorder?.outputURL { + rememberPendingRecording(sessionID: sessionID, fileURL: fileURL) + } + + await workerAdminSync?.enqueueFrameUpload(data: jpegData) + } - // Monitor device availability - deviceMonitorTask = Task { @MainActor in - for await device in deviceSelector.activeDeviceStream() { - self.hasActiveDevice = device != nil - } + private func handleWorkerLiveHeartbeatFailure(sessionID: String, message: String) async { + guard sessionID == currentSopSessionId else { return } + + let now = Date() + guard now.timeIntervalSince(lastWorkerLiveHeartbeatFailureWarningAt) >= 5 else { return } + lastWorkerLiveHeartbeatFailureWarningAt = now + + setOperationsSyncWarning( + phase: "live_heartbeat", + message: "Live heartbeat failed; keeping active audio and handoff routes unchanged. \(message)" + ) + await WorkerTelemetry.shared.record( + "worker_live_heartbeat_failed_no_audio_teardown", + source: "ios_app", + stage: "non_authoritative_error", + sessionID: sessionID, + payload: [ + "error": message, + "gemini_active": geminiAssistant.isGeminiActive, + "gemini_audio_ready": geminiAssistant.isAudioReady, + "webrtc_active": webrtcViewModel.isActive, + "has_active_help_escalation": hasActiveHelpEscalation, + "audio_route_lease_protected": true, + "av_audio_engine_state_protected": true, + "support_mode_trusted": false + ] + ) + } + + private func isAuthoritativeHumanSupportEnd(_ response: WorkerLiveHeartbeatResponse) -> Bool { + guard response.supportMode == "ai", response.humanSupportStatus == "ended" else { + return false } - setupVideoDecoder() - attachListeners() + return hasActiveHelpEscalation || + shouldResumeAiSupportAfterBackOffice || + (webrtcViewModel.isActive && webrtcViewModel.isSupportMode) || + response.supportUpdatedAt != nil } - private func setupVideoDecoder() { - videoDecoder.setFrameCallback { [weak self] decodedFrame in - Task { @MainActor [weak self] in - guard let self else { return } - let pixelBuffer = decodedFrame.pixelBuffer - let width = CVPixelBufferGetWidth(pixelBuffer) - let height = CVPixelBufferGetHeight(pixelBuffer) - let ciImage = CIImage(cvPixelBuffer: pixelBuffer) - let rect = CGRect(x: 0, y: 0, width: width, height: height) - if let cgImage = self.cpuCIContext.createCGImage(ciImage, from: rect) { - let image = UIImage(cgImage: cgImage) - self.geminiSessionVM?.sendVideoFrameIfThrottled(image: image) - self.webrtcSessionVM?.pushVideoFrame(image) - if self.backgroundFrameCount <= 5 || self.backgroundFrameCount % 120 == 0 { - NSLog("[Stream] Background frame #%d decoded and forwarded (%dx%d)", - self.backgroundFrameCount, width, height) - } + private func resetLiveRoomSyncSnapshot() { + lastLiveRoomSyncSnapshot = nil + lastLiveRoomSyncAt = .distantPast + hasReceivedBackOfficeConnectedHandshake = false + } + + private func handleWorkerLiveHeartbeatResponse(_ response: WorkerLiveHeartbeatResponse) async { + guard response.sessionID == currentSopSessionId else { return } + + // Only successful heartbeat responses are authoritative enough to mutate + // handoff state. HTTP failures are handled above and never release audio. + let humanConnected = + response.shouldOpenLiveRoom || + (response.supportMode == "back_office" && response.humanSupportStatus == "connected") + let reportedHumanEnded = + response.supportMode == "ai" && response.humanSupportStatus == "ended" + let humanEnded = isAuthoritativeHumanSupportEnd(response) + let humanRinging = + response.supportMode == "handoff_requested" || response.humanSupportStatus == "ringing" + + if humanConnected { + hasActiveHelpEscalation = true + hasReceivedBackOfficeConnectedHandshake = true + if !webrtcViewModel.isActive || !webrtcViewModel.isSupportMode { + let wasObservationRoom = webrtcViewModel.isActive && !webrtcViewModel.isSupportMode + helpStatusMessage = wasObservationRoom + ? "Back office answered. Upgrading live room to video and audio..." + : "Back office answered. Opening live video and audio..." + if wasObservationRoom { + await WorkerTelemetry.shared.record( + "worker_live_support_upgrade_from_observation", + source: "ios_app", + stage: "handoff", + sessionID: response.sessionID, + payload: [ + "previous_room_code_present": !webrtcViewModel.roomCode.isEmpty, + "support_mode": response.supportMode, + "human_support_status": response.humanSupportStatus + ] + ) } + await ensureLiveRoomSession() + } else if !webrtcViewModel.roomCode.isEmpty { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) } + return } - } - /// Recreate the StreamSession with the current selectedResolution. - /// Only call when not actively streaming. - func updateResolution(_ resolution: StreamingResolution) { - guard !isStreaming else { return } - selectedResolution = resolution - let config = StreamSessionConfig( - videoCodec: VideoCodec.raw, - resolution: resolution, - frameRate: 24) - streamSession = StreamSession(streamSessionConfig: config, deviceSelector: deviceSelector) - attachListeners() - NSLog("[Stream] Resolution changed to %@", resolutionLabel) - } + if reportedHumanEnded && !humanEnded { + await WorkerTelemetry.shared.record( + "worker_live_support_end_ignored_non_authoritative", + source: "ios_app", + stage: "non_authoritative_heartbeat", + sessionID: response.sessionID, + payload: [ + "support_mode": response.supportMode, + "human_support_status": response.humanSupportStatus, + "webrtc_active": webrtcViewModel.isActive, + "webrtc_support_mode": webrtcViewModel.isSupportMode, + "has_active_help_escalation": hasActiveHelpEscalation, + "should_resume_ai": shouldResumeAiSupportAfterBackOffice, + "support_updated_at_present": response.supportUpdatedAt != nil, + "audio_route_lease_protected": true + ] + ) + return + } - private func attachListeners() { - // Subscribe to session state changes using the DAT SDK listener pattern - stateListenerToken = streamSession.statePublisher.listen { [weak self] state in - Task { @MainActor [weak self] in - self?.updateStatusFromState(state) + if humanEnded { + if webrtcViewModel.isActive && webrtcViewModel.isSupportMode { + webrtcViewModel.stopSession() + } + hasActiveHelpEscalation = false + resetLiveRoomSyncSnapshot() + await workerAdminSync?.updateHelpRequested(false, sendImmediateHeartbeat: false) + helpStatusMessage = "Back office ended. AI support is available again." + await ensureObservationLiveRoomSession() + + if shouldResumeAiSupportAfterBackOffice, isSopAuditRunning, !geminiAssistant.isGeminiActive { + shouldResumeAiSupportAfterBackOffice = false + await ensureAiGuideStarted(reason: "support_ended") + } else { + shouldResumeAiSupportAfterBackOffice = false } + return } - // Subscribe to video frames from the device camera - // This callback fires whether the app is in the foreground or background, - // enabling continuous streaming even when the screen is locked. - videoFrameListenerToken = streamSession.videoFramePublisher.listen { [weak self] videoFrame in - Task { @MainActor [weak self] in - guard let self else { return } + if humanRinging { + hasActiveHelpEscalation = true + helpStatusMessage = "Calling back office. Live video and audio will open after they answer." + } + } - let isInBackground = UIApplication.shared.applicationState == .background + private func requestSupervisorHelpFlow() async { + guard canRequestHelp else { + helpStatusMessage = "Start an SOP before requesting live support." + return + } - if !isInBackground { - self.backgroundFrameCount = 0 - self.bgDiagLogged = false - if let image = videoFrame.makeUIImage() { - self.currentVideoFrame = image - if !self.hasReceivedFirstFrame { - self.hasReceivedFirstFrame = true - } - self.geminiSessionVM?.sendVideoFrameIfThrottled(image: image) - self.webrtcSessionVM?.pushVideoFrame(image) - } - } else { - // In background: makeUIImage() uses VideoToolbox GPU rendering which iOS suspends. - // Instead, use our VideoDecoder (VTDecompressionSession) to decode compressed - // frames into pixel buffers, then convert via CPU CIContext. - self.backgroundFrameCount += 1 + isRequestingHelp = true + defer { isRequestingHelp = false } - let sampleBuffer = videoFrame.sampleBuffer - let hasCompressedData = CMSampleBufferGetDataBuffer(sampleBuffer) != nil + let notes = helpRequestNotes.trimmingCharacters(in: .whitespacesAndNewlines) + shouldResumeAiSupportAfterBackOffice = geminiAssistant.isGeminiActive + if geminiAssistant.isGeminiActive { + await geminiAssistant.stopSessionForHumanSupportHandoff() + aiGuideStatusMessage = "AI guide paused while back office support is requested." + sopAuditStatusMessage = aiGuideStatusMessage + } - if hasCompressedData { - // Compressed frame (HEVC/H.264) - decode via VTDecompressionSession - do { - try self.videoDecoder.decode(sampleBuffer) - } catch { - if self.backgroundFrameCount <= 5 || self.backgroundFrameCount % 120 == 0 { - NSLog("[Stream] Background frame #%d decode error: %@", - self.backgroundFrameCount, String(describing: error)) - } - } - } else if let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { - // Raw pixel buffer - convert directly via CPU CIContext - let width = CVPixelBufferGetWidth(pixelBuffer) - let height = CVPixelBufferGetHeight(pixelBuffer) - let ciImage = CIImage(cvPixelBuffer: pixelBuffer) - let rect = CGRect(x: 0, y: 0, width: width, height: height) - if let cgImage = self.cpuCIContext.createCGImage(ciImage, from: rect) { - let image = UIImage(cgImage: cgImage) - self.geminiSessionVM?.sendVideoFrameIfThrottled(image: image) - self.webrtcSessionVM?.pushVideoFrame(image) - } - self.videoDecoder.invalidateSession() - } - } - } + guard let sessionID = activeExecutionSession?.id else { + hasActiveHelpEscalation = true + await workerAdminSync?.updateHelpRequested(true) + helpStatusMessage = "Calling back office locally. Backend session sync is required before live media can open." + return } - // Subscribe to streaming errors - errorListenerToken = streamSession.errorPublisher.listen { [weak self] error in - Task { @MainActor [weak self] in - guard let self else { return } - // Suppress device-not-found errors when user hasn't started streaming yet - if self.streamingStatus == .stopped { - if case .deviceNotConnected = error { return } - if case .deviceNotFound = error { return } - } - let newErrorMessage = formatStreamingError(error) - if newErrorMessage != self.errorMessage { - showError(newErrorMessage) - } + do { + _ = try await opsAPIClient.createIntervention( + sessionID: sessionID, + type: "help_request", + notes: notes.isEmpty ? "Worker requested assistance." : notes + ) + hasActiveHelpEscalation = true + await workerAdminSync?.updateHelpRequested(true) + await postExecutionEvent( + type: "help_requested", + payload: [ + "notes": notes, + "room_code": NSNull() + ] + ) + await patchActiveExecutionSession( + ExecutionSessionPatch( + helpRequested: true + ) + ) + helpStatusMessage = "Calling back office. Live video and audio will open after they answer." + } catch { + hasActiveHelpEscalation = false + let shouldResumeAI = shouldResumeAiSupportAfterBackOffice + shouldResumeAiSupportAfterBackOffice = false + if shouldResumeAI, isSopAuditRunning, !geminiAssistant.isGeminiActive { + await geminiAssistant.startSession(systemInstruction: buildGeminiSessionInstruction()) } + helpStatusMessage = "Back office call request failed: \(error.localizedDescription)" } + } - updateStatusFromState(streamSession.state) + private func syncLiveRoomState(roomCode: String) async { + let normalizedRoomCode = roomCode.trimmingCharacters(in: .whitespacesAndNewlines) + guard !normalizedRoomCode.isEmpty else { return } - // Subscribe to photo capture events - photoDataListenerToken = streamSession.photoDataPublisher.listen { [weak self] photoData in - Task { @MainActor [weak self] in - guard let self else { return } - if let uiImage = UIImage(data: photoData.data) { - self.capturedPhoto = uiImage - self.showPhotoPreview = true - } - } + let backOfficeConnected = + hasReceivedBackOfficeConnectedHandshake || + (hasActiveHelpEscalation && + webrtcViewModel.isActive && + webrtcViewModel.isSupportMode && + webrtcViewModel.connectionState == .connected) + let nextSnapshot = LiveRoomSyncSnapshot( + roomCode: normalizedRoomCode, + helpRequested: hasActiveHelpEscalation, + backOfficeConnected: backOfficeConnected + ) + let previousSnapshot = lastLiveRoomSyncSnapshot + let now = Date() + let isFirstRoomCodePublish = previousSnapshot?.roomCode == nil + let didChangeRoomCode = previousSnapshot?.roomCode != nil && + previousSnapshot?.roomCode != normalizedRoomCode + let didInitiallyRequestHelp = hasActiveHelpEscalation && + previousSnapshot?.helpRequested != true + let didInitiallyConnectBackOffice = backOfficeConnected && + previousSnapshot?.backOfficeConnected != true + let shouldSendImmediateHeartbeat = + isFirstRoomCodePublish || + didChangeRoomCode || + didInitiallyRequestHelp || + didInitiallyConnectBackOffice + let isRedundantSync = previousSnapshot == nextSnapshot + let didClearThrottle = + now.timeIntervalSince(lastLiveRoomSyncAt) >= liveRoomRedundantSyncThrottleInterval + + let localOnly = activeExecutionSession == nil + helpStatusMessage = liveRoomStatusMessage( + localOnly: localOnly, + helpRequested: hasActiveHelpEscalation, + roomCode: normalizedRoomCode + ) + + guard shouldSendImmediateHeartbeat || !isRedundantSync || didClearThrottle else { + return + } + + await workerAdminSync?.updateRoomCode( + normalizedRoomCode, + sendImmediateHeartbeat: shouldSendImmediateHeartbeat + ) + + if !localOnly { + await patchActiveExecutionSession( + ExecutionSessionPatch( + helpRequested: hasActiveHelpEscalation, + webrtcRoomCode: normalizedRoomCode + ) + ) } + + lastLiveRoomSyncSnapshot = nextSnapshot + lastLiveRoomSyncAt = now } - func handleStartStreaming() async { - let permission = Permission.camera + private func closeCurrentPackageFlow() async { + guard !isClosingPackage else { return } + guard canCloseCurrentPackage else { + packageClosureStatusMessage = "Complete all required SOPs before closing the package." + return + } + guard let packageRunID = activePackageRunID else { + packageClosureStatusMessage = "Package run is not synced yet. Ask ops-api to expose the package close route." + return + } + guard let workerID = workerProfile?.id else { + packageClosureStatusMessage = "Worker context missing. Refresh the assignment queue first." + return + } + + isClosingPackage = true + defer { isClosingPackage = false } + do { - let status = try await wearables.checkPermissionStatus(permission) - if status == .granted { - await startSession() - return - } - let requestStatus = try await wearables.requestPermission(permission) - if requestStatus == .granted { - await startSession() - return - } - showError("Permission denied") + _ = try await opsAPIClient.closePackageRun( + packageRunID: packageRunID, + workerID: workerID + ) + packageClosureStatusMessage = "Package closed and synced." + await refreshWorkerContext() } catch { - showError("Permission error: \(error.description)") + packageClosureStatusMessage = "Package close failed: \(error.localizedDescription)" } } - func startSession() async { - await streamSession.start() + private func updateHelpStatus(for state: WebRTCConnectionState) { + let localOnly = activeExecutionSession == nil + switch state { + case .connected: + if !hasLoggedRoomJoinedForSession { + hasLoggedRoomJoinedForSession = true + WorkerLiveLogger.log( + "room_joined", + sessionID: currentSopSessionId, + roomCode: webrtcViewModel.roomCode.isEmpty ? nil : webrtcViewModel.roomCode, + uploadState: "active" + ) + } + helpStatusMessage = localOnly + ? "Local live room connected. Admin join stays unavailable until backend sync succeeds." + : (webrtcViewModel.isSupportMode ? "Back office audio connected." : "Admin video observer connected.") + case .waitingForPeer: + if !webrtcViewModel.roomCode.isEmpty { + helpStatusMessage = liveRoomStatusMessage( + localOnly: localOnly, + helpRequested: hasActiveHelpEscalation, + roomCode: webrtcViewModel.roomCode + ) + } + case .connecting: + helpStatusMessage = localOnly + ? "Opening local live room..." + : (webrtcViewModel.isSupportMode ? "Opening back office audio room..." : "Opening admin video observation...") + case .backgrounded: + helpStatusMessage = "Live room paused in background. Returning will reconnect." + case .error(let message): + helpStatusMessage = "Live room error: \(message)" + case .disconnected: + if !isRequestingHelp { + helpStatusMessage = "" + } + } } - private func showError(_ message: String) { - errorMessage = message - showError = true - } + private func ensureLiveRoomSession() async { + // Heartbeats can arrive while Gemini is still yielding the audio route. + // Only one support-room handoff may perform that awaited stop/start path. + if webrtcViewModel.isActive && webrtcViewModel.isSupportMode { + switch webrtcViewModel.connectionState { + case .connected, .waitingForPeer, .connecting: + if !webrtcViewModel.roomCode.isEmpty { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) + } + return + case .backgrounded, .error, .disconnected: + break + } + } - func stopSession() async { - if streamingMode == .iPhone { - stopIPhoneSession() + guard !isLiveRoomHandoffInProgress else { + if !webrtcViewModel.roomCode.isEmpty { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) + } return } - await streamSession.stop() - } + isLiveRoomHandoffInProgress = true + defer { isLiveRoomHandoffInProgress = false } - // MARK: - iPhone Camera Mode + if geminiAssistant.isGeminiActive { + shouldResumeAiSupportAfterBackOffice = true + aiGuideStatusMessage = "AI guide paused while back office support connects." + sopAuditStatusMessage = aiGuideStatusMessage + await geminiAssistant.stopSessionForHumanSupportHandoff() + } - func handleStartIPhone() async { - let granted = await IPhoneCameraManager.requestPermission() - if granted { - startIPhoneSession() + let hasRoomCode = !webrtcViewModel.roomCode.isEmpty + + if webrtcViewModel.isActive && webrtcViewModel.isSupportMode { + switch webrtcViewModel.connectionState { + case .connected, .waitingForPeer: + if hasRoomCode { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) + return + } + case .connecting: + if hasRoomCode { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) + return + } + case .backgrounded, .error, .disconnected: + break + } + + helpStatusMessage = "Restarting live room for this SOP..." + webrtcViewModel.stopSession() + } else if webrtcViewModel.isActive { + helpStatusMessage = "Switching video observation into back office audio..." + webrtcViewModel.stopSession() + } + + await webrtcViewModel.startSession(captureMode: streamingMode, roomMode: .support) + if let roomCode = await waitForRoomCode() { + await syncLiveRoomState(roomCode: roomCode) + } else if activeExecutionSession == nil { + helpStatusMessage = "Opening local-only live room..." + setOperationsSyncWarning( + phase: "session_patch", + message: "Live room is local-only until the backend execution session sync succeeds." + ) } else { - showError("Camera permission denied. Please grant access in Settings.") + helpStatusMessage = "Live room still syncing. Manager join will unlock once the room code is published." + setOperationsSyncWarning( + phase: "session_patch", + message: "Live room did not publish a room code yet. Verify signal settings and session sync before expecting admin join." + ) } } - private func startIPhoneSession() { - streamingMode = .iPhone - let camera = IPhoneCameraManager() - camera.onFrameCaptured = { [weak self] image in - Task { @MainActor [weak self] in - guard let self else { return } - self.currentVideoFrame = image - if !self.hasReceivedFirstFrame { - self.hasReceivedFirstFrame = true + private func ensureObservationLiveRoomSession() async { + guard isSopAuditRunning, !hasActiveHelpEscalation else { return } + guard WebRTCConfig.isConfigured else { + setOperationsSyncWarning( + phase: "live_room", + message: "Video observation room is unavailable because signal settings are not configured." + ) + return + } + + if webrtcViewModel.isActive { + if !webrtcViewModel.isSupportMode { + if !webrtcViewModel.roomCode.isEmpty { + await syncLiveRoomState(roomCode: webrtcViewModel.roomCode) } - self.geminiSessionVM?.sendVideoFrameIfThrottled(image: image) - self.webrtcSessionVM?.pushVideoFrame(image) + return } + webrtcViewModel.stopSession() + } + + helpStatusMessage = "Opening admin video observation..." + await webrtcViewModel.startSession(captureMode: streamingMode, roomMode: .observation) + if let roomCode = await waitForRoomCode(timeoutNanoseconds: 4_000_000_000) { + await syncLiveRoomState(roomCode: roomCode) + await WorkerTelemetry.shared.record( + "webrtc_observation_room_ready", + source: "webrtc", + stage: "observation", + sessionID: currentSopSessionId, + payload: [ + "room_code_present": true, + "capture_mode": captureModeEventValue(streamingMode) + ] + ) + } else { + helpStatusMessage = "Admin video observation is still syncing." + setOperationsSyncWarning( + phase: "live_room", + message: "Observation room did not publish a room code yet. Admin can still use frame fallback while signaling catches up." + ) } - camera.start() - iPhoneCameraManager = camera - streamingStatus = .streaming - NSLog("[Stream] iPhone camera mode started") } - private func stopIPhoneSession() { - iPhoneCameraManager?.stop() - iPhoneCameraManager = nil - currentVideoFrame = nil - hasReceivedFirstFrame = false - streamingStatus = .stopped - streamingMode = .glasses - NSLog("[Stream] iPhone camera mode stopped") + private func postExecutionEvent(type: String, payload: [String: Any]) async { + guard let sessionID = activeExecutionSession?.id else { return } + do { + _ = try await opsAPIClient.postExecutionEvent( + sessionID: sessionID, + eventType: type, + payload: payload + ) + } catch { + setOperationsSyncWarning( + phase: "session_event", + message: "Event sync failed: \(error.localizedDescription)" + ) + } } - func dismissError() { - showError = false - errorMessage = "" + private func patchActiveExecutionSession(_ patch: ExecutionSessionPatch) async { + guard let sessionID = activeExecutionSession?.id else { return } + do { + let updatedSession = try await opsAPIClient.updateExecutionSession(id: sessionID, patch: patch) + activeExecutionSession = updatedSession + if let warning = updatedSession.packageProgressWarning?.trimmingCharacters(in: .whitespacesAndNewlines), + !warning.isEmpty { + setOperationsSyncWarning(phase: "package_progress", message: warning) + } + } catch { + setCriticalOperationsSyncIssue( + phase: "session_patch", + message: "Session sync failed: \(error.localizedDescription)" + ) + } } - func capturePhoto() { - streamSession.capturePhoto(format: .jpeg) + private func uploadMediaAssetIfPossible( + assetID: String, + data: Data, + contentType: String, + label: String + ) async -> Bool { + guard !data.isEmpty else { return false } + + do { + let uploadTarget = try await opsAPIClient.requestMediaUploadTarget( + assetID: assetID, + contentType: contentType, + byteCount: data.count + ) + try await opsAPIClient.uploadBinary(to: uploadTarget, data: data, contentType: contentType) + _ = try await opsAPIClient.finalizeMediaAssetUpload( + assetID: assetID, + byteCount: data.count, + contentType: contentType + ) + return true + } catch { + setOperationsSyncWarning( + phase: "media_upload", + message: "\(label) upload is pending until ops-api exposes upload targets. \(error.localizedDescription)" + ) + return false + } } - func dismissPhotoPreview() { - showPhotoPreview = false - capturedPhoto = nil + private func uploadEvidenceMediaAssets( + sessionID: String, + proofImages: [String: Data] + ) async { + for (targetID, imageData) in proofImages { + do { + let evidenceAsset = try await opsAPIClient.registerMediaAsset( + sessionID: sessionID, + bucket: "evidence-images", + path: "sessions/\(sessionID)/proof/\(targetID).jpg", + assetType: "photo", + metadata: [ + "item_id": targetID, + "upload_state": "pending", + "bytes": imageData.count + ] + ) + _ = await uploadMediaAssetIfPossible( + assetID: evidenceAsset.id, + data: imageData, + contentType: "image/jpeg", + label: "Evidence image" + ) + } catch { + setOperationsSyncWarning( + phase: "evidence_upload", + message: "Evidence registration failed: \(error.localizedDescription)" + ) + } + } } - private func updateStatusFromState(_ state: StreamSessionState) { - switch state { - case .stopped: - currentVideoFrame = nil - streamingStatus = .stopped - case .waitingForDevice, .starting, .stopping, .paused: - streamingStatus = .waiting - case .streaming: - streamingStatus = .streaming + private func createExecutionMemoryLink( + sessionID: String, + sopID: String, + completedSteps: Int + ) async { + do { + _ = try await opsAPIClient.createMemoryLink( + sourceID: sessionID, + sourceType: "execution_session", + targetID: sopID, + targetType: "sop", + linkType: "executed", + metadata: [ + "completed_steps": completedSteps, + "total_steps": checklistItems.count + ] + ) + } catch { + setOperationsSyncWarning( + phase: "memory_link", + message: "Memory link sync failed: \(error.localizedDescription)" + ) } } - private func formatStreamingError(_ error: StreamSessionError) -> String { - switch error { - case .internalError: - return "An internal error occurred. Please try again." - case .deviceNotFound: - return "Device not found. Please ensure your device is connected." - case .deviceNotConnected: - return "Device not connected. Please check your connection and try again." - case .timeout: - return "The operation timed out. Please try again." - case .videoStreamingError: - return "Video streaming failed. Please try again." - case .audioStreamingError: - return "Audio streaming failed. Please try again." - case .permissionDenied: - return "Camera permission denied. Please grant permission in Settings." - case .hingesClosed: - return "The hinges on the glasses were closed. Please open the hinges and try again." - @unknown default: - return "An unknown streaming error occurred." + private func updateDossierPipelineStatus(_ message: String, kind: DossierPipelineStatusKind) { + dossierPipelineStatusMessage = message + dossierPipelineStatusKind = kind + dossierPipelineStatusTimestamp = Self.pipelineTimestampFormatter.string(from: Date()) + } +} + +private extension UIImage { + func resizedForLivePreview(maxDimension: CGFloat) -> UIImage { + let width = size.width + let height = size.height + guard width > 0, height > 0, maxDimension > 0 else { return self } + + let longest = max(width, height) + guard longest > maxDimension else { return self } + + let scale = maxDimension / longest + let targetSize = CGSize(width: width * scale, height: height * scale) + let renderer = UIGraphicsImageRenderer(size: targetSize) + return renderer.image { _ in + self.draw(in: CGRect(origin: .zero, size: targetSize)) } } } diff --git a/samples/CameraAccess/CameraAccess/Views/CaptureView.swift b/samples/CameraAccess/CameraAccess/Views/CaptureView.swift new file mode 100644 index 00000000..d146520f --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Views/CaptureView.swift @@ -0,0 +1,755 @@ +import AVFoundation +import SwiftUI +import UIKit + +struct CaptureView: View { + @Environment(\.dismiss) private var dismiss + @ObservedObject var viewModel: StreamSessionViewModel + @ObservedObject private var geminiAssistant: GeminiSessionViewModel + let sop: SOPTemplate + @State private var highlightedChecklistItemID: UUID? + @State private var isFinishingSOP: Bool = false + @State private var finishButtonPulse: Bool = false + + init(viewModel: StreamSessionViewModel, sop: SOPTemplate) { + self.viewModel = viewModel + self.sop = sop + self._geminiAssistant = ObservedObject(wrappedValue: viewModel.geminiAssistant) + } + + var body: some View { + GeometryReader { geometry in + ZStack { + DesignSystem.colors.deepNavy + .ignoresSafeArea() + + if viewModel.streamingMode == .iPhone, let previewSession = viewModel.iPhonePreviewSession { + IPhoneCameraPreviewSurface(session: previewSession) + .frame(width: geometry.size.width, height: geometry.size.height) + .clipped() + .ignoresSafeArea() + + if !viewModel.hasReceivedFirstFrame { + ProgressView() + .progressViewStyle(.circular) + .tint(DesignSystem.colors.vibrantTeal) + } + } else if let frame = viewModel.currentVideoFrame, viewModel.hasReceivedFirstFrame { + Image(uiImage: frame) + .resizable() + .aspectRatio(contentMode: .fill) + .frame(width: geometry.size.width, height: geometry.size.height) + .clipped() + .ignoresSafeArea() + } else { + ProgressView() + .progressViewStyle(.circular) + .tint(DesignSystem.colors.vibrantTeal) + } + + if viewModel.streamingMode == .iPhone, + viewModel.webrtcViewModel.incomingRemoteVideoEnabled, + viewModel.webrtcViewModel.hasRemoteVideo { + VStack { + HStack { + Spacer() + RTCVideoView(videoTrack: viewModel.webrtcViewModel.remoteVideoTrack) + .frame(width: 116, height: 156) + .clipShape(RoundedRectangle(cornerRadius: 12)) + .overlay( + RoundedRectangle(cornerRadius: 12) + .stroke(DesignSystem.colors.white.opacity(0.8), lineWidth: 1) + ) + .padding(.top, 12) + .padding(.trailing, 12) + } + Spacer() + } + } + + VStack(spacing: 10) { + topControls + .padding(.top, 12) + .padding(.horizontal, 12) + + Spacer() + + VStack(spacing: 10) { + if shouldShowAiConversationPanel { + aiConversationPanel + } + activeStepOverlay + bottomBar + } + .padding(12) + } + } + } + .navigationTitle(sop.name) + .navigationBarTitleDisplayMode(.inline) + .toolbarColorScheme(.dark, for: .navigationBar) + .task { + await viewModel.beginLiveCapture(for: sop) + } + .onChange(of: viewModel.shouldDismissCapture) { _, shouldDismiss in + guard shouldDismiss else { return } + viewModel.clearCaptureDismissFlag() + dismiss() + } + .onDisappear { + viewModel.stopHoldToTalk() + if viewModel.isSopAuditRunning { + viewModel.userTappedEndAndShip() + } + } + } + + private var topControls: some View { + ViewThatFits(in: .horizontal) { + HStack(alignment: .top, spacing: 10) { + topStatusCluster + Spacer(minLength: 10) + callBackOfficeButton + } + + topControlsCompact + } + } + + private var topStatusCluster: some View { + VStack(alignment: .leading, spacing: 8) { + captureStatusBadge + cameraSelector + if !viewModel.dossierPipelineStatusMessage.isEmpty || viewModel.isDossierUploading { + pipelineStatusBadge + } + if viewModel.webrtcViewModel.isActive || + !viewModel.helpStatusMessage.isEmpty || + !viewModel.geminiInstructionSyncStatus.isEmpty || + !viewModel.aiGuideStatusMessage.isEmpty { + supportStatusBar + } + } + .frame(maxWidth: 270, alignment: .leading) + } + + private var topControlsCompact: some View { + VStack(alignment: .leading, spacing: 8) { + HStack(alignment: .top, spacing: 10) { + captureStatusBadge + Spacer(minLength: 8) + callBackOfficeButton + } + cameraSelector + if !viewModel.dossierPipelineStatusMessage.isEmpty || viewModel.isDossierUploading { + pipelineStatusBadge + } + if viewModel.webrtcViewModel.isActive || + !viewModel.helpStatusMessage.isEmpty || + !viewModel.geminiInstructionSyncStatus.isEmpty || + !viewModel.aiGuideStatusMessage.isEmpty { + supportStatusBar + } + } + } + + private var bottomBar: some View { + ViewThatFits(in: .horizontal) { + HStack(spacing: 12) { + aiGuideButton + checkStepButton + finishSOPButton + } + + VStack(spacing: 10) { + HStack(spacing: 10) { + aiGuideButton + checkStepButton + } + finishSOPButton + } + } + } + + private var finishSOPButton: some View { + Button { + animateFinishSOPPress() + viewModel.userTappedEndAndShip() + } label: { + HStack(spacing: 8) { + if isFinishingSOP { + Image(systemName: "checkmark.circle.fill") + .transition(.scale.combined(with: .opacity)) + } + Text(isFinishingSOP ? "FINISHING..." : "FINISH SOP") + } + } + .brutalistDangerButton() + .scaleEffect(finishButtonPulse ? 1.04 : 1.0) + .opacity(isFinishingSOP ? 0.92 : 1.0) + .animation(.spring(response: 0.25, dampingFraction: 0.65), value: finishButtonPulse) + .animation(.easeInOut(duration: 0.2), value: isFinishingSOP) + .disabled(isFinishingSOP) + } + + private var aiGuideButton: some View { + let active = geminiAssistant.isGeminiActive + let starting = viewModel.isAiGuideStarting + return Button { + Task { + await viewModel.toggleGeminiAssistant() + } + } label: { + HStack(spacing: 8) { + if starting { + ProgressView() + .progressViewStyle(.circular) + .tint(DesignSystem.colors.deepNavy) + .scaleEffect(0.75) + } else { + Image(systemName: active ? "waveform.circle.fill" : "sparkles") + .font(.system(size: 15, weight: .bold)) + } + Text(viewModel.aiGuideButtonTitle) + .lineLimit(1) + .minimumScaleFactor(0.72) + } + .font(DesignSystem.fonts.mono(size: 13, weight: .semibold)) + .foregroundColor(active || starting ? DesignSystem.colors.deepNavy : DesignSystem.colors.white) + .padding(.horizontal, 12) + .frame(maxWidth: .infinity) + .frame(height: 48) + .background(active || starting ? DesignSystem.colors.vibrantTeal : DesignSystem.colors.deepNavy) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.white.opacity(0.9), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + .disabled(!viewModel.canToggleAiGuide) + .opacity(viewModel.canToggleAiGuide ? 1 : 0.66) + } + + private var checkStepButton: some View { + Button { + viewModel.requestGuidedStepValidation(trigger: "tap") + } label: { + HStack(spacing: 6) { + if viewModel.isStepValidationRunning { + ProgressView() + .progressViewStyle(.circular) + .tint(DesignSystem.colors.white) + .scaleEffect(0.7) + } else { + Image(systemName: "checkmark.seal") + .font(.system(size: 14, weight: .bold)) + } + Text(viewModel.isStepValidationRunning ? "CHECKING" : "CHECK STEP") + .lineLimit(1) + .minimumScaleFactor(0.68) + } + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 10) + .frame(width: 118, height: 48) + .background(DesignSystem.colors.deepNavy.opacity(0.9)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.82), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + .disabled(!viewModel.canRequestStepValidation) + .opacity(viewModel.canRequestStepValidation ? 1 : 0.62) + } + + private var cameraSelector: some View { + HStack(spacing: 8) { + cameraModeButton( + title: "iPhone", + systemName: "iphone", + mode: .iPhone, + enabled: true + ) + cameraModeButton( + title: viewModel.hasActiveDevice ? "Glasses" : "No Glasses", + systemName: "eyeglasses", + mode: .glasses, + enabled: viewModel.hasActiveDevice + ) + } + .frame(maxWidth: 230) + } + + private func cameraModeButton( + title: String, + systemName: String, + mode: StreamingMode, + enabled: Bool + ) -> some View { + let selected = viewModel.preferredCaptureMode == mode + return Button { + viewModel.selectCaptureModeFromUI(mode) + } label: { + HStack(spacing: 5) { + Image(systemName: systemName) + .font(.system(size: 10, weight: .bold)) + Text(title) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .lineLimit(1) + .minimumScaleFactor(0.72) + } + .foregroundColor(selected ? DesignSystem.colors.deepNavy : DesignSystem.colors.white) + .padding(.horizontal, 8) + .frame(maxWidth: .infinity) + .frame(height: 32) + .background(selected ? DesignSystem.colors.vibrantTeal : DesignSystem.colors.deepNavy.opacity(0.72)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(selected ? DesignSystem.colors.white.opacity(0.9) : DesignSystem.colors.vibrantTeal.opacity(0.65), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + .disabled(!enabled || !viewModel.canSwitchCaptureMode) + .opacity((enabled && viewModel.canSwitchCaptureMode) ? 1 : 0.5) + } + + private var captureStatusBadge: some View { + HStack(spacing: 8) { + Image(systemName: "record.circle") + .font(.system(size: 13, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + Text("EXECUTING") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + Text(viewModel.progressText) + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + } + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(DesignSystem.colors.deepNavy.opacity(0.72)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.55), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private var callBackOfficeButton: some View { + Button { + viewModel.requestSupervisorHelp() + } label: { + HStack(spacing: 8) { + Image(systemName: "phone.fill") + .font(.system(size: 15, weight: .bold)) + Text(viewModel.backOfficeCallButtonTitle) + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .lineLimit(1) + .minimumScaleFactor(0.72) + } + .foregroundColor(DesignSystem.colors.deepNavy) + .padding(.horizontal, 12) + .frame(height: 44) + .background(DesignSystem.colors.vibrantTeal) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.white.opacity(0.92), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + .shadow(color: DesignSystem.colors.vibrantTeal.opacity(0.32), radius: 16, x: 0, y: 6) + } + .buttonStyle(.plain) + .disabled(!viewModel.canTapBackOfficeCall) + .opacity(viewModel.canTapBackOfficeCall ? 1 : 0.72) + } + + private var activeStepOverlay: some View { + let step = currentStep + let isHighlighted = step.map { highlightedChecklistItemID == $0.id } ?? false + + return VStack(alignment: .leading, spacing: 10) { + HStack(alignment: .center, spacing: 8) { + Text("STEP \(currentStepNumber)/\(max(viewModel.checklistItems.count, 1))") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + + Spacer(minLength: 8) + + if let step, step.allowManualComplete || step.isChecked { + Button { + withAnimation(.spring(response: 0.28, dampingFraction: 0.76)) { + viewModel.toggleChecklistItem(itemID: step.id, viaVoice: false) + } + animateChecklistSelection(step.id) + } label: { + HStack(spacing: 6) { + Image(systemName: step.isChecked ? "arrow.uturn.backward" : "checkmark") + .font(.system(size: 11, weight: .bold)) + Text(step.isChecked ? "REOPEN" : "DONE") + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + } + .foregroundColor(step.isChecked ? DesignSystem.colors.white : DesignSystem.colors.deepNavy) + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(step.isChecked ? DesignSystem.colors.deepNavy.opacity(0.88) : DesignSystem.colors.vibrantTeal) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + } + } + + if let step { + Text(step.name) + .font(DesignSystem.fonts.body(size: 20, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .lineLimit(2) + .minimumScaleFactor(0.75) + + HStack(spacing: 8) { + if !step.duration.isEmpty { + stepTag(step.duration.uppercased(), color: DesignSystem.colors.blueGrey) + } + stepTag(step.validation.uppercased(), color: DesignSystem.colors.vibrantTeal) + if step.critical { + stepTag("CRITICAL", color: .red) + } + if !step.allowManualComplete && !step.isChecked { + stepTag("VISION", color: .orange) + } + } + } else { + Text("All steps complete") + .font(DesignSystem.fonts.body(size: 20, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + } + } + .padding(14) + .frame(maxWidth: .infinity, alignment: .leading) + .background(DesignSystem.colors.deepNavy.opacity(0.78)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.vibrantTeal.opacity(isHighlighted ? 0.95 : 0.5), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private func stepTag(_ title: String, color: Color) -> some View { + Text(title) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(color) + .lineLimit(1) + .padding(.horizontal, 7) + .padding(.vertical, 4) + .background(color.opacity(0.14)) + .clipShape(RoundedRectangle(cornerRadius: 6)) + } + + private var currentStep: ChecklistItemState? { + viewModel.checklistItems.first(where: { !$0.isChecked }) ?? viewModel.checklistItems.last + } + + private var currentStepNumber: Int { + guard let step = currentStep, + let index = viewModel.checklistItems.firstIndex(where: { $0.id == step.id }) + else { return 0 } + return index + 1 + } + + private var pipelineStatusBadge: some View { + let accentColor: Color = { + switch viewModel.dossierPipelineStatusKind { + case .info: + return DesignSystem.colors.blueGrey + case .active: + return DesignSystem.colors.vibrantTeal + case .success: + return DesignSystem.colors.deepGreen + case .error: + return .red + } + }() + + let statusIconName: String = { + switch viewModel.dossierPipelineStatusKind { + case .info: + return "info.circle.fill" + case .active: + return "dot.radiowaves.left.and.right" + case .success: + return "checkmark.seal.fill" + case .error: + return "exclamationmark.triangle.fill" + } + }() + + return HStack(spacing: 8) { + if viewModel.isDossierUploading { + ProgressView() + .tint(accentColor) + .scaleEffect(0.8) + } else { + Image(systemName: statusIconName) + .foregroundColor(accentColor) + .font(.system(size: 12, weight: .semibold)) + } + + HStack(spacing: 6) { + if !viewModel.dossierPipelineStatusTimestamp.isEmpty { + Text("[\(viewModel.dossierPipelineStatusTimestamp)]") + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(accentColor) + .lineLimit(1) + } + + Text(viewModel.dossierPipelineStatusMessage) + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .lineLimit(2) + } + + Spacer(minLength: 0) + } + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(DesignSystem.colors.deepNavy.opacity(0.78)) + .background(accentColor.opacity(0.12)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(accentColor.opacity(0.9), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + .transition(.move(edge: .top).combined(with: .opacity)) + .animation(.easeInOut(duration: 0.2), value: viewModel.dossierPipelineStatusMessage) + .animation(.easeInOut(duration: 0.2), value: viewModel.isDossierUploading) + .animation(.easeInOut(duration: 0.2), value: viewModel.dossierPipelineStatusKind) + .animation(.easeInOut(duration: 0.2), value: viewModel.dossierPipelineStatusTimestamp) + } + + private var supportStatusBar: some View { + VStack(alignment: .leading, spacing: 8) { + HStack(spacing: 8) { + if viewModel.webrtcViewModel.isActive { + WebRTCStatusBar(webrtcVM: viewModel.webrtcViewModel) + } else { + Text("SUPPORT") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + .padding(.horizontal, 10) + .padding(.vertical, 6) + .background(DesignSystem.colors.deepNavy.opacity(0.6)) + .overlay( + RoundedRectangle(cornerRadius: 16) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.8), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 16)) + } + + Spacer(minLength: 0) + + if viewModel.streamingMode == .iPhone, + viewModel.webrtcViewModel.isActive { + Button( + viewModel.webrtcViewModel.incomingRemoteVideoEnabled + ? "HIDE SUP VIDEO" + : "SHOW SUP VIDEO" + ) { + viewModel.webrtcViewModel.setIncomingRemoteVideoEnabled( + !viewModel.webrtcViewModel.incomingRemoteVideoEnabled + ) + } + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(DesignSystem.colors.deepNavy.opacity(0.82)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.white.opacity(0.8), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + Text(viewModel.backOfficeCallButtonTitle) + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + .padding(.horizontal, 10) + .padding(.vertical, 7) + .background(DesignSystem.colors.deepNavy.opacity(0.65)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.6), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + if !viewModel.helpStatusMessage.isEmpty { + Text(viewModel.helpStatusMessage) + .font(DesignSystem.fonts.body(size: 12)) + .foregroundColor(DesignSystem.colors.white) + .multilineTextAlignment(.leading) + } + + if !viewModel.aiGuideStatusMessage.isEmpty { + Text(viewModel.aiGuideStatusMessage) + .font(DesignSystem.fonts.body(size: 12)) + .foregroundColor(DesignSystem.colors.white) + .multilineTextAlignment(.leading) + } + + if !viewModel.geminiInstructionSyncStatus.isEmpty { + Text(viewModel.geminiInstructionSyncStatus) + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(DesignSystem.colors.blueGrey) + .multilineTextAlignment(.leading) + } + } + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(DesignSystem.colors.deepNavy.opacity(0.78)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.8), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private var shouldShowAiConversationPanel: Bool { + geminiAssistant.isGeminiActive || + viewModel.isAiGuideStarting || + !geminiAssistant.userTranscript.isEmpty || + !geminiAssistant.aiTranscript.isEmpty || + !(geminiAssistant.errorMessage ?? "").isEmpty + } + + private var aiConversationPanel: some View { + VStack(alignment: .leading, spacing: 8) { + HStack(spacing: 8) { + Image(systemName: geminiAssistant.isModelSpeaking ? "speaker.wave.2.fill" : "waveform.circle.fill") + .font(.system(size: 13, weight: .semibold)) + .foregroundColor(aiConversationColor) + + Text(aiConversationTitle) + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + + Spacer(minLength: 6) + + Text(aiConnectionLabel) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(aiConversationColor) + } + + if !geminiAssistant.userTranscript.isEmpty { + Text("You: \(geminiAssistant.userTranscript)") + .font(DesignSystem.fonts.body(size: 12, weight: .medium)) + .foregroundColor(DesignSystem.colors.white.opacity(0.78)) + .lineLimit(2) + } + + if !geminiAssistant.aiTranscript.isEmpty { + Text("AI: \(geminiAssistant.aiTranscript)") + .font(DesignSystem.fonts.body(size: 13, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .lineLimit(3) + } + + if let error = geminiAssistant.errorMessage, !error.isEmpty { + Text(error) + .font(DesignSystem.fonts.body(size: 12, weight: .medium)) + .foregroundColor(.white) + .lineLimit(3) + } + } + .padding(12) + .frame(maxWidth: .infinity, alignment: .leading) + .background(DesignSystem.colors.deepNavy.opacity(0.82)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(aiConversationColor.opacity(0.75), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private var aiConversationTitle: String { + if geminiAssistant.isModelSpeaking { + return "AI speaking" + } + if viewModel.isAiGuideStarting { + return "Starting AI guide" + } + if geminiAssistant.isGeminiActive && geminiAssistant.isAudioReady { + return "AI listening" + } + if geminiAssistant.isGeminiActive { + return "AI connecting" + } + return "AI guide" + } + + private var aiConnectionLabel: String { + switch geminiAssistant.connectionState { + case .ready: + return "READY" + case .connecting: + return "CONNECTING" + case .settingUp: + return "SETTING UP" + case .error: + return "ERROR" + case .disconnected: + return "OFF" + } + } + + private var aiConversationColor: Color { + switch geminiAssistant.connectionState { + case .ready: + return DesignSystem.colors.vibrantTeal + case .connecting, .settingUp: + return .orange + case .error: + return .red + case .disconnected: + return DesignSystem.colors.blueGrey + } + } + + private func animateChecklistSelection(_ itemID: UUID) { + withAnimation(.easeInOut(duration: 0.18)) { + highlightedChecklistItemID = itemID + } + + DispatchQueue.main.asyncAfter(deadline: .now() + 0.55) { + withAnimation(.easeOut(duration: 0.22)) { + if highlightedChecklistItemID == itemID { + highlightedChecklistItemID = nil + } + } + } + } + + private func animateFinishSOPPress() { + guard !isFinishingSOP else { return } + + withAnimation(.spring(response: 0.24, dampingFraction: 0.7)) { + isFinishingSOP = true + finishButtonPulse = true + } + + Task { @MainActor in + try? await Task.sleep(nanoseconds: 450_000_000) + withAnimation(.easeOut(duration: 0.2)) { + finishButtonPulse = false + } + + try? await Task.sleep(nanoseconds: 700_000_000) + withAnimation(.easeInOut(duration: 0.2)) { + isFinishingSOP = false + } + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/Components/CustomButton.swift b/samples/CameraAccess/CameraAccess/Views/Components/CustomButton.swift index 2766f522..e255f7d1 100644 --- a/samples/CameraAccess/CameraAccess/Views/Components/CustomButton.swift +++ b/samples/CameraAccess/CameraAccess/Views/Components/CustomButton.swift @@ -26,20 +26,33 @@ struct CustomButton: View { var backgroundColor: Color { switch self { case .primary: - return .appPrimary + return DesignSystem.colors.vibrantTeal case .secondary: - return Color(white: 0.25) + return DesignSystem.colors.surface case .destructive: - return .destructiveBackground + return DesignSystem.colors.dangerRed } } var foregroundColor: Color { switch self { - case .primary, .secondary: + case .primary: + return DesignSystem.colors.deepNavy + case .secondary: return .white case .destructive: - return .destructiveForeground + return .white + } + } + + var borderColor: Color { + switch self { + case .primary: + return DesignSystem.colors.border + case .secondary: + return DesignSystem.colors.border + case .destructive: + return DesignSystem.colors.dangerRed } } } @@ -52,6 +65,10 @@ struct CustomButton: View { .frame(maxWidth: .infinity) .frame(height: 56) .background(style.backgroundColor) + .overlay( + RoundedRectangle(cornerRadius: 30) + .stroke(style.borderColor, lineWidth: 1) + ) .cornerRadius(30) } .disabled(isDisabled) diff --git a/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift b/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift index 67ec11fb..cf6489e4 100644 --- a/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift +++ b/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift @@ -5,11 +5,7 @@ struct GeminiStatusBar: View { var body: some View { HStack(spacing: 8) { - // Gemini connection pill StatusPill(color: geminiStatusColor, text: geminiStatusText) - - // OpenClaw connection pill - StatusPill(color: openClawStatusColor, text: openClawStatusText) } } @@ -31,23 +27,6 @@ struct GeminiStatusBar: View { } } - private var openClawStatusColor: Color { - switch geminiVM.openClawConnectionState { - case .connected: return .green - case .checking: return .yellow - case .unreachable: return .red - case .notConfigured: return .gray - } - } - - private var openClawStatusText: String { - switch geminiVM.openClawConnectionState { - case .connected: return "OpenClaw" - case .checking: return "OpenClaw..." - case .unreachable: return "OpenClaw Off" - case .notConfigured: return "No OpenClaw" - } - } } struct StatusPill: View { @@ -95,60 +74,6 @@ struct TranscriptView: View { } } -struct ToolCallStatusView: View { - let status: ToolCallStatus - - var body: some View { - if status != .idle { - HStack(spacing: 8) { - statusIcon - Text(status.displayText) - .font(.system(size: 13, weight: .medium)) - .foregroundColor(.white) - .lineLimit(1) - } - .padding(.horizontal, 14) - .padding(.vertical, 8) - .background(statusBackground) - .cornerRadius(16) - } - } - - @ViewBuilder - private var statusIcon: some View { - switch status { - case .executing: - ProgressView() - .scaleEffect(0.7) - .tint(.white) - case .completed: - Image(systemName: "checkmark.circle.fill") - .foregroundColor(.green) - .font(.system(size: 14)) - case .failed: - Image(systemName: "exclamationmark.circle.fill") - .foregroundColor(.red) - .font(.system(size: 14)) - case .cancelled: - Image(systemName: "xmark.circle.fill") - .foregroundColor(.yellow) - .font(.system(size: 14)) - case .idle: - EmptyView() - } - } - - private var statusBackground: Color { - switch status { - case .executing: return Color.black.opacity(0.7) - case .completed: return Color.black.opacity(0.6) - case .failed: return Color.red.opacity(0.3) - case .cancelled: return Color.black.opacity(0.6) - case .idle: return Color.clear - } - } -} - struct SpeakingIndicator: View { @State private var animating = false @@ -170,3 +95,58 @@ struct SpeakingIndicator: View { .onDisappear { animating = false } } } + +struct GeminiAssistantOverlay: View { + @ObservedObject var geminiVM: GeminiSessionViewModel + let onToggle: () -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 10) { + HStack(alignment: .center, spacing: 10) { + GeminiStatusBar(geminiVM: geminiVM) + Spacer(minLength: 0) + Button(action: onToggle) { + Text(geminiVM.isGeminiActive ? "STOP AI" : "START AI") + .font(.system(size: 12, weight: .semibold, design: .monospaced)) + .foregroundColor(.white) + .padding(.horizontal, 12) + .padding(.vertical, 8) + .background(Color.black.opacity(0.68)) + .overlay( + RoundedRectangle(cornerRadius: 14) + .stroke(Color.white.opacity(0.2), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 14)) + } + } + + if !geminiVM.userTranscript.isEmpty || !geminiVM.aiTranscript.isEmpty { + TranscriptView(userText: geminiVM.userTranscript, aiText: geminiVM.aiTranscript) + } + + HStack(spacing: 10) { + if geminiVM.isModelSpeaking { + SpeakingIndicator() + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(Color.black.opacity(0.55)) + .cornerRadius(16) + } + Spacer(minLength: 0) + } + + if let errorMessage = geminiVM.errorMessage, !errorMessage.isEmpty { + Text(errorMessage) + .font(.system(size: 12, weight: .medium)) + .foregroundColor(.white) + .padding(.horizontal, 14) + .padding(.vertical, 10) + .background(Color.red.opacity(0.28)) + .cornerRadius(12) + } + } + .padding(.horizontal, 16) + .padding(.top, 18) + .frame(maxWidth: .infinity, alignment: .leading) + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/DesignSystem.swift b/samples/CameraAccess/CameraAccess/Views/DesignSystem.swift new file mode 100644 index 00000000..f62ea750 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Views/DesignSystem.swift @@ -0,0 +1,116 @@ +import SwiftUI + +extension Color { + init(hex: String) { + let hex = hex.trimmingCharacters(in: CharacterSet.alphanumerics.inverted) + var int: UInt64 = 0 + Scanner(string: hex).scanHexInt64(&int) + let a, r, g, b: UInt64 + switch hex.count { + case 3: + (a, r, g, b) = (255, (int >> 8) * 17, (int >> 4 & 0xF) * 17, (int & 0xF) * 17) + case 6: + (a, r, g, b) = (255, int >> 16, int >> 8 & 0xFF, int & 0xFF) + case 8: + (a, r, g, b) = (int >> 24, int >> 16 & 0xFF, int >> 8 & 0xFF, int & 0xFF) + default: + (a, r, g, b) = (255, 0, 0, 0) + } + + self.init( + .sRGB, + red: Double(r) / 255, + green: Double(g) / 255, + blue: Double(b) / 255, + opacity: Double(a) / 255 + ) + } +} + +enum DesignSystem { + enum colors { + static let adminBackground = Color(hex: "#F9FAF7") + static let adminSurface = Color(hex: "#FFFFFF") + static let adminInk = Color(hex: "#2C2C2C") + static let adminMuted = Color(hex: "#646464") + static let adminSubtle = Color(hex: "#B4B8B4") + static let adminStroke = Color(hex: "#DEE2DE") + static let brandOrange = Color(hex: "#DD6B2E") + static let brandOrangeDeep = Color(hex: "#B85222") + static let successGreen = Color(hex: "#1F9D6F") + static let adminWarning = Color(hex: "#D8A838") + + static let deepNavy = Color(hex: "#080D18") + static let surface = Color(hex: "#111827") + static let surfaceRaised = Color(hex: "#1F2937") + static let border = Color(hex: "#374151") + static let vibrantTeal = Color(hex: "#06B6D4") + static let deepGreen = Color(hex: "#10B981") + static let warningAmber = Color(hex: "#F59E0B") + static let dangerRed = Color(hex: "#EF4444") + static let white = Color(hex: "#FFFFFF") + static let blueGrey = Color(hex: "#9CA3AF") + } + + enum fonts { + static func mono(size: CGFloat, weight: Font.Weight = .regular) -> Font { + .system(size: size, weight: weight, design: .monospaced) + } + + static func body(size: CGFloat, weight: Font.Weight = .regular) -> Font { + .system(size: size, weight: weight, design: .default) + } + } +} + +struct BrutalistCardModifier: ViewModifier { + let stroke: Color + + func body(content: Content) -> some View { + content + .padding(16) + .background(DesignSystem.colors.surface) + .overlay( + Rectangle() + .stroke(stroke, lineWidth: 1) + ) + } +} + +struct BrutalistPrimaryButtonModifier: ViewModifier { + func body(content: Content) -> some View { + content + .font(DesignSystem.fonts.mono(size: 16, weight: .semibold)) + .foregroundColor(DesignSystem.colors.deepNavy) + .frame(maxWidth: .infinity) + .padding(.vertical, 14) + .background(DesignSystem.colors.vibrantTeal) + .overlay(Rectangle().stroke(DesignSystem.colors.border, lineWidth: 1)) + } +} + +struct BrutalistDangerButtonModifier: ViewModifier { + func body(content: Content) -> some View { + content + .font(DesignSystem.fonts.mono(size: 16, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .frame(maxWidth: .infinity) + .padding(.vertical, 14) + .background(DesignSystem.colors.surface) + .overlay(Rectangle().stroke(DesignSystem.colors.vibrantTeal, lineWidth: 1)) + } +} + +extension View { + func brutalistCard(stroke: Color = DesignSystem.colors.blueGrey) -> some View { + modifier(BrutalistCardModifier(stroke: stroke)) + } + + func brutalistPrimaryButton() -> some View { + modifier(BrutalistPrimaryButtonModifier()) + } + + func brutalistDangerButton() -> some View { + modifier(BrutalistDangerButtonModifier()) + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/HistoryView.swift b/samples/CameraAccess/CameraAccess/Views/HistoryView.swift new file mode 100644 index 00000000..be6fa474 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Views/HistoryView.swift @@ -0,0 +1,42 @@ +import SwiftUI + +struct HistoryView: View { + @ObservedObject var viewModel: StreamSessionViewModel + + var body: some View { + ScrollView { + VStack(alignment: .leading, spacing: 14) { + if viewModel.shippedHistory.isEmpty { + VStack(alignment: .leading, spacing: 8) { + Text("NO EXECUTIONS YET") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + Text("Completed sessions will appear here after you finish a run.") + .font(DesignSystem.fonts.body(size: 14)) + .foregroundColor(DesignSystem.colors.blueGrey) + } + .brutalistCard(stroke: DesignSystem.colors.blueGrey) + } else { + ForEach(viewModel.shippedHistory) { session in + VStack(alignment: .leading, spacing: 6) { + Text(session.sopName) + .font(DesignSystem.fonts.mono(size: 16, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + + Text(session.timestampText) + .font(DesignSystem.fonts.body(size: 13)) + .foregroundColor(DesignSystem.colors.blueGrey) + + Text(session.status.uppercased()) + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(session.status.lowercased().contains("local") ? .orange : DesignSystem.colors.deepGreen) + } + .brutalistCard(stroke: DesignSystem.colors.blueGrey) + } + } + } + .padding(14) + } + .background(DesignSystem.colors.deepNavy) + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/HomeScreenView.swift b/samples/CameraAccess/CameraAccess/Views/HomeScreenView.swift index 8a40bbf1..5fd08572 100644 --- a/samples/CameraAccess/CameraAccess/Views/HomeScreenView.swift +++ b/samples/CameraAccess/CameraAccess/Views/HomeScreenView.swift @@ -22,59 +22,84 @@ struct HomeScreenView: View { var body: some View { ZStack { - Color.white.edgesIgnoringSafeArea(.all) + DesignSystem.colors.deepNavy + .ignoresSafeArea() - VStack(spacing: 12) { + VStack(spacing: 18) { HStack { Spacer() Button { showSettings = true } label: { - Image(systemName: "gearshape") - .resizable() - .aspectRatio(contentMode: .fit) - .foregroundColor(.black) - .frame(width: 24, height: 24) + Image(systemName: "slider.horizontal.3") + .font(.system(size: 18, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + .frame(width: 42, height: 42) + .background(DesignSystem.colors.surface) + .overlay( + RoundedRectangle(cornerRadius: 12) + .stroke(DesignSystem.colors.border, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 12)) } } - Spacer() - - Image(.cameraAccessIcon) - .resizable() - .aspectRatio(contentMode: .fit) - .frame(width: 120) + Spacer(minLength: 0) + + VStack(spacing: 14) { + Image(.cameraAccessIcon) + .resizable() + .aspectRatio(contentMode: .fit) + .frame(width: 92) + .padding(18) + .background(DesignSystem.colors.surface) + .overlay( + RoundedRectangle(cornerRadius: 20) + .stroke(DesignSystem.colors.vibrantTeal.opacity(0.45), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 20)) + + Text("EMBARCADERO WORKER") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.vibrantTeal) + + Text("Connect your glasses, load today’s package, and execute the next step hands-free.") + .font(DesignSystem.fonts.body(size: 17)) + .foregroundColor(DesignSystem.colors.white) + .multilineTextAlignment(.center) + .padding(.horizontal, 10) + } VStack(spacing: 12) { HomeTipItemView( resource: .smartGlassesIcon, - title: "Video Capture", - text: "Record videos directly from your glasses, from your point of view." + title: "Assigned SOP Packages", + text: "Sync your worker queue from ops-api so the current package and SOP line are ready when you begin." ) HomeTipItemView( resource: .soundIcon, - title: "Open-Ear Audio", - text: "Hear notifications while keeping your ears open to the world around you." + title: "Live Supervisor Support", + text: "Request jump-in help during an execution and keep the room synced while you stay on task." ) HomeTipItemView( resource: .walkingIcon, - title: "Enjoy On-the-Go", - text: "Stay hands-free while you move through your day. Move freely, stay connected." + title: "Phone Or Glasses", + text: "Use Ray-Bans when available, or continue on iPhone without breaking the execution flow." ) } - Spacer() + Spacer(minLength: 0) - VStack(spacing: 20) { - Text("You'll be redirected to the Meta AI app to confirm your connection.") - .font(.system(size: 14)) - .foregroundColor(.gray) + VStack(spacing: 16) { + Text("You’ll be redirected to the Meta AI app once to confirm the glasses connection.") + .font(DesignSystem.fonts.body(size: 14)) + .foregroundColor(DesignSystem.colors.blueGrey) .multilineTextAlignment(.center) .fixedSize(horizontal: false, vertical: true) .padding(.horizontal, 12) CustomButton( - title: viewModel.registrationState == .registering ? "Connecting..." : "Connect my glasses", + title: viewModel.registrationState == .registering ? "CONNECTING GLASSES..." : "CONNECT RAY-BAN GLASSES", style: .primary, isDisabled: viewModel.registrationState == .registering ) { @@ -82,7 +107,7 @@ struct HomeScreenView: View { } CustomButton( - title: "Start on iPhone", + title: "USE IPHONE CAMERA", style: .secondary, isDisabled: false ) { @@ -109,7 +134,7 @@ struct HomeTipItemView: View { Image(resource) .resizable() .renderingMode(.template) - .foregroundColor(.black) + .foregroundColor(DesignSystem.colors.vibrantTeal) .aspectRatio(contentMode: .fit) .frame(width: 24) .padding(.leading, 4) @@ -117,14 +142,21 @@ struct HomeTipItemView: View { VStack(alignment: .leading, spacing: 6) { Text(title) - .font(.system(size: 18, weight: .semibold)) - .foregroundColor(.black) + .font(DesignSystem.fonts.mono(size: 16, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) Text(text) - .font(.system(size: 15)) - .foregroundColor(.gray) + .font(DesignSystem.fonts.body(size: 14)) + .foregroundColor(DesignSystem.colors.blueGrey) } Spacer() } + .padding(14) + .background(DesignSystem.colors.surface) + .overlay( + RoundedRectangle(cornerRadius: 16) + .stroke(DesignSystem.colors.border, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 16)) } } diff --git a/samples/CameraAccess/CameraAccess/Views/HomeView.swift b/samples/CameraAccess/CameraAccess/Views/HomeView.swift new file mode 100644 index 00000000..2b32f43f --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Views/HomeView.swift @@ -0,0 +1,421 @@ +import MWDATCore +import SwiftUI + +private enum WorkerHomeSheet: String, Identifiable { + case history + case settings + + var id: String { rawValue } +} + +struct HomeView: View { + @Environment(\.scenePhase) private var scenePhase + @ObservedObject var viewModel: StreamSessionViewModel + @ObservedObject var wearablesViewModel: WearablesViewModel + @State private var activeSheet: WorkerHomeSheet? + + var body: some View { + GeometryReader { geometry in + ZStack { + cameraBackdrop + + VStack(alignment: .leading, spacing: 0) { + header + .padding(.top, geometry.safeAreaInsets.top + 10) + + Spacer(minLength: 16) + + VStack(alignment: .leading, spacing: 12) { + sopQueuePanel(maxHeight: min(geometry.size.height * 0.46, 390)) + notices + } + .padding(.bottom, geometry.safeAreaInsets.bottom + 12) + } + .padding(.horizontal, 16) + } + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(DesignSystem.colors.deepNavy.ignoresSafeArea()) + .toolbar(.hidden, for: .navigationBar) + .sheet(item: $activeSheet) { sheet in + switch sheet { + case .history: + NavigationStack { + HistoryView(viewModel: viewModel) + .background(DesignSystem.colors.adminBackground.ignoresSafeArea()) + .navigationTitle("History") + .navigationBarTitleDisplayMode(.inline) + } + case .settings: + SettingsView() + } + } + .fullScreenCover(item: $viewModel.activeCaptureSOP) { sop in + NavigationStack { + CaptureView(viewModel: viewModel, sop: sop) + } + } + .task { + await viewModel.handleWorkerHomeEntered() + } + .onChange(of: scenePhase) { _, newPhase in + guard newPhase == .active else { return } + Task { + await viewModel.handleWorkerAppBecameActive() + } + } + } + + @ViewBuilder + private var cameraBackdrop: some View { + ZStack { + if viewModel.streamingMode == .iPhone, + let previewSession = viewModel.iPhonePreviewSession { + IPhoneCameraPreviewSurface(session: previewSession) + .ignoresSafeArea() + } else if let frame = viewModel.currentVideoFrame { + Image(uiImage: frame) + .resizable() + .aspectRatio(contentMode: .fill) + .ignoresSafeArea() + } else { + VStack(spacing: 12) { + ProgressView() + .tint(DesignSystem.colors.vibrantTeal) + Text("OPENING CAMERA") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.blueGrey) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(DesignSystem.colors.deepNavy) + } + + LinearGradient( + colors: [ + .black.opacity(0.58), + .black.opacity(0.12), + .black.opacity(0.72) + ], + startPoint: .top, + endPoint: .bottom + ) + .ignoresSafeArea() + } + } + + private var header: some View { + HStack(alignment: .top, spacing: 12) { + VStack(alignment: .leading, spacing: 6) { + Text("EMBARCADERO") + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(DesignSystem.colors.brandOrange) + .clipShape(RoundedRectangle(cornerRadius: 8)) + + Text(viewModel.workerDisplayName) + .font(DesignSystem.fonts.body(size: 28, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .lineLimit(1) + .minimumScaleFactor(0.72) + + Text("\(viewModel.activePackageTitle) · \(viewModel.currentPackageProgressText)") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white.opacity(0.76)) + .lineLimit(2) + .minimumScaleFactor(0.72) + } + + Spacer(minLength: 12) + + HStack(spacing: 8) { + iconButton(systemName: "arrow.clockwise") { + Task { await viewModel.refreshWorkerContext() } + } + .disabled(viewModel.isSyncingOperations) + .opacity(viewModel.isSyncingOperations ? 0.55 : 1) + + iconButton(systemName: "clock.arrow.circlepath") { + activeSheet = .history + } + + iconButton(systemName: "slider.horizontal.3") { + activeSheet = .settings + } + } + } + } + + private func sopQueuePanel(maxHeight: CGFloat) -> some View { + VStack(alignment: .leading, spacing: 14) { + HStack(spacing: 8) { + Text("PENDING SOPS") + .font(DesignSystem.fonts.mono(size: 12, weight: .semibold)) + .foregroundColor(DesignSystem.colors.brandOrange) + Spacer() + statusPill(title: viewModel.assignmentQueueSummary, color: DesignSystem.colors.brandOrange) + } + + HStack(spacing: 8) { + statusPill(title: viewModel.pendingShiftLabel, color: DesignSystem.colors.adminMuted) + statusPill(title: viewModel.selectedCaptureModeLabel, color: DesignSystem.colors.successGreen) + } + + cameraSelector + + if viewModel.pendingTaskSOPs.isEmpty { + VStack(alignment: .leading, spacing: 10) { + Text("No more SOPs pending") + .font(DesignSystem.fonts.body(size: 24, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminInk) + + Text("This package queue is complete. Refresh assignments when the next package is ready.") + .font(DesignSystem.fonts.body(size: 15, weight: .medium)) + .foregroundColor(DesignSystem.colors.adminMuted) + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.vertical, 10) + } else { + ScrollView(showsIndicators: true) { + VStack(spacing: 8) { + ForEach(Array(viewModel.pendingTaskSOPs.enumerated()), id: \.element.id) { index, sop in + sopQueueRow(sop, isNext: index == 0) + } + } + } + .frame(maxHeight: maxHeight) + } + } + .padding(16) + .background(DesignSystem.colors.adminSurface.opacity(0.96)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.adminStroke, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + .shadow(color: .black.opacity(0.05), radius: 18, x: 0, y: 10) + } + + private var cameraSelector: some View { + HStack(spacing: 8) { + cameraModeButton( + title: "iPhone Camera", + systemName: "iphone", + mode: .iPhone, + enabled: true + ) + cameraModeButton( + title: viewModel.hasActiveDevice ? "Glasses Camera" : "Glasses Unavailable", + systemName: "eyeglasses", + mode: .glasses, + enabled: viewModel.hasActiveDevice + ) + } + } + + private func cameraModeButton( + title: String, + systemName: String, + mode: StreamingMode, + enabled: Bool + ) -> some View { + let selected = viewModel.preferredCaptureMode == mode + return Button { + viewModel.selectCaptureModeFromUI(mode) + } label: { + HStack(spacing: 6) { + Image(systemName: systemName) + .font(.system(size: 11, weight: .semibold)) + Text(title) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .lineLimit(1) + .minimumScaleFactor(0.72) + } + .foregroundColor(selected ? DesignSystem.colors.white : DesignSystem.colors.adminInk) + .padding(.horizontal, 9) + .frame(maxWidth: .infinity) + .frame(height: 34) + .background(selected ? DesignSystem.colors.adminInk : DesignSystem.colors.adminBackground) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(selected ? DesignSystem.colors.successGreen : DesignSystem.colors.adminStroke, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + .disabled(!enabled || !viewModel.canSwitchCaptureMode) + .opacity((enabled && viewModel.canSwitchCaptureMode) ? 1 : 0.5) + } + + private func sopQueueRow(_ sop: SOPTemplate, isNext: Bool) -> some View { + Button { + viewModel.presentCapture(for: sop) + } label: { + HStack(spacing: 12) { + VStack(alignment: .leading, spacing: 6) { + HStack(spacing: 8) { + if isNext { + Text("NEXT") + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background(DesignSystem.colors.successGreen) + .clipShape(RoundedRectangle(cornerRadius: 6)) + } + + Text(sop.packageTitle ?? viewModel.activePackageTitle) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminMuted) + .lineLimit(1) + } + + Text(sop.name) + .font(DesignSystem.fonts.body(size: 17, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminInk) + .lineLimit(2) + .minimumScaleFactor(0.78) + + Text("\(sop.steps.count) steps · \(sop.validationSummary)") + .font(DesignSystem.fonts.body(size: 12, weight: .medium)) + .foregroundColor(DesignSystem.colors.adminMuted) + .lineLimit(1) + } + + Spacer(minLength: 8) + + HStack(spacing: 6) { + Image(systemName: "play.fill") + .font(.system(size: 11, weight: .bold)) + Text("START") + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + } + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 10) + .padding(.vertical, 9) + .background(DesignSystem.colors.adminInk) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .padding(12) + .background(isNext ? DesignSystem.colors.successGreen.opacity(0.08) : DesignSystem.colors.adminBackground) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(isNext ? DesignSystem.colors.successGreen.opacity(0.5) : DesignSystem.colors.adminStroke, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + .disabled(viewModel.isSyncingOperations) + } + + @ViewBuilder + private var notices: some View { + if viewModel.isSyncingOperations { + noticeRow( + icon: "arrow.triangle.2.circlepath", + title: "Syncing assignments", + body: "Loading the current worker queue.", + color: DesignSystem.colors.brandOrange + ) + } + + if let warning = viewModel.operationsSyncWarning, + !warning.isEmpty { + noticeRow( + icon: "exclamationmark.triangle.fill", + title: "Sync warning", + body: warning, + color: DesignSystem.colors.adminWarning + ) + } + + if let error = viewModel.operationsSyncError, + !error.isEmpty { + noticeRow( + icon: "exclamationmark.octagon.fill", + title: "Assignment issue", + body: error, + color: DesignSystem.colors.dangerRed + ) + } + } + + private func iconButton(systemName: String, action: @escaping () -> Void) -> some View { + Button(action: action) { + Image(systemName: systemName) + .font(.system(size: 15, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminInk) + .frame(width: 40, height: 40) + .background(DesignSystem.colors.adminSurface) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.adminStroke, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + .buttonStyle(.plain) + } + + private func statusPill(title: String, color: Color) -> some View { + Text(title.uppercased()) + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(color) + .lineLimit(1) + .minimumScaleFactor(0.7) + .padding(.horizontal, 10) + .padding(.vertical, 7) + .background(DesignSystem.colors.adminSurface) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(DesignSystem.colors.adminStroke, lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private func assignmentMetric(_ value: String, _ label: String) -> some View { + VStack(alignment: .leading, spacing: 2) { + Text(value.uppercased()) + .font(DesignSystem.fonts.mono(size: 13, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminInk) + .lineLimit(1) + .minimumScaleFactor(0.7) + + Text(label.uppercased()) + .font(DesignSystem.fonts.mono(size: 10, weight: .semibold)) + .foregroundColor(DesignSystem.colors.adminMuted) + } + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background(DesignSystem.colors.adminBackground) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } + + private func noticeRow(icon: String, title: String, body: String, color: Color) -> some View { + HStack(alignment: .top, spacing: 10) { + Image(systemName: icon) + .font(.system(size: 14, weight: .semibold)) + .foregroundColor(color) + .frame(width: 24, height: 24) + + VStack(alignment: .leading, spacing: 4) { + Text(title.uppercased()) + .font(DesignSystem.fonts.mono(size: 11, weight: .semibold)) + .foregroundColor(color) + + Text(body) + .font(DesignSystem.fonts.body(size: 13)) + .foregroundColor(DesignSystem.colors.adminMuted) + .fixedSize(horizontal: false, vertical: true) + } + Spacer(minLength: 0) + } + .padding(12) + .background(DesignSystem.colors.adminSurface) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(color.opacity(0.35), lineWidth: 1) + ) + .clipShape(RoundedRectangle(cornerRadius: 8)) + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/IPhoneCameraPreviewSurface.swift b/samples/CameraAccess/CameraAccess/Views/IPhoneCameraPreviewSurface.swift new file mode 100644 index 00000000..600f848d --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Views/IPhoneCameraPreviewSurface.swift @@ -0,0 +1,42 @@ +import AVFoundation +import SwiftUI +import UIKit + +struct IPhoneCameraPreviewSurface: UIViewRepresentable { + let session: AVCaptureSession + + func makeUIView(context: Context) -> PreviewContainerView { + let view = PreviewContainerView() + view.previewLayer.videoGravity = .resizeAspectFill + view.previewLayer.session = session + if let connection = view.previewLayer.connection, + connection.isVideoRotationAngleSupported(90) { + connection.videoRotationAngle = 90 + } + return view + } + + func updateUIView(_ uiView: PreviewContainerView, context: Context) { + if uiView.previewLayer.session !== session { + uiView.previewLayer.session = session + } + if let connection = uiView.previewLayer.connection, + connection.isVideoRotationAngleSupported(90) { + connection.videoRotationAngle = 90 + } + } + + static func dismantleUIView(_ uiView: PreviewContainerView, coordinator: ()) { + uiView.previewLayer.session = nil + } +} + +final class PreviewContainerView: UIView { + override class var layerClass: AnyClass { + AVCaptureVideoPreviewLayer.self + } + + var previewLayer: AVCaptureVideoPreviewLayer { + layer as! AVCaptureVideoPreviewLayer + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/MainAppView.swift b/samples/CameraAccess/CameraAccess/Views/MainAppView.swift index 4bfe2bde..265e0895 100644 --- a/samples/CameraAccess/CameraAccess/Views/MainAppView.swift +++ b/samples/CameraAccess/CameraAccess/Views/MainAppView.swift @@ -27,11 +27,6 @@ struct MainAppView: View { } var body: some View { - if viewModel.registrationState == .registered || viewModel.hasMockDevice || viewModel.skipToIPhoneMode { - StreamSessionView(wearables: wearables, wearablesVM: viewModel) - } else { - // User not registered - show registration/onboarding flow - HomeScreenView(viewModel: viewModel) - } + StreamSessionView(wearables: wearables, wearablesVM: viewModel) } } diff --git a/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift b/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift index 8fa01b55..bd9f03aa 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift @@ -19,8 +19,6 @@ struct StreamSessionView: View { let wearables: WearablesInterface @ObservedObject private var wearablesViewModel: WearablesViewModel @StateObject private var viewModel: StreamSessionViewModel - @StateObject private var geminiVM = GeminiSessionViewModel() - @StateObject private var webrtcVM = WebRTCSessionViewModel() init(wearables: WearablesInterface, wearablesVM: WearablesViewModel) { self.wearables = wearables @@ -29,23 +27,24 @@ struct StreamSessionView: View { } var body: some View { - ZStack { - if viewModel.isStreaming { - // Full-screen video view with streaming controls - StreamView(viewModel: viewModel, wearablesVM: wearablesViewModel, geminiVM: geminiVM, webrtcVM: webrtcVM) - } else { - // Pre-streaming setup view with permissions and start button - NonStreamView(viewModel: viewModel, wearablesVM: wearablesViewModel) - } - } - .task { - viewModel.geminiSessionVM = geminiVM - viewModel.webrtcSessionVM = webrtcVM - geminiVM.streamingMode = viewModel.streamingMode + NavigationStack { + HomeView(viewModel: viewModel, wearablesViewModel: wearablesViewModel) + .background(DesignSystem.colors.adminBackground.ignoresSafeArea()) } - .onChange(of: viewModel.streamingMode) { newMode in - geminiVM.streamingMode = newMode + .overlay(alignment: .bottom) { + if viewModel.showShipSuccessToast { + Text("Execution recorded") + .font(DesignSystem.fonts.mono(size: 13, weight: .semibold)) + .foregroundColor(DesignSystem.colors.white) + .padding(.horizontal, 16) + .padding(.vertical, 12) + .background(DesignSystem.colors.deepGreen) + .overlay(Rectangle().stroke(DesignSystem.colors.white, lineWidth: 1)) + .padding(.bottom, 20) + .transition(.move(edge: .bottom).combined(with: .opacity)) + } } + .animation(.easeInOut(duration: 0.2), value: viewModel.showShipSuccessToast) .onAppear { UIApplication.shared.isIdleTimerDisabled = true } diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index 3fc83f72..8d058ab4 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -20,8 +20,6 @@ import SwiftUI struct StreamView: View { @ObservedObject var viewModel: StreamSessionViewModel @ObservedObject var wearablesVM: WearablesViewModel - @ObservedObject var geminiVM: GeminiSessionViewModel - @ObservedObject var webrtcVM: WebRTCSessionViewModel var body: some View { ZStack { @@ -29,14 +27,8 @@ struct StreamView: View { Color.black .edgesIgnoringSafeArea(.all) - // Video backdrop: PiP when WebRTC connected, otherwise single local feed - if webrtcVM.isActive && webrtcVM.connectionState == .connected { - PiPVideoView( - localFrame: viewModel.currentVideoFrame, - remoteVideoTrack: webrtcVM.remoteVideoTrack, - hasRemoteVideo: webrtcVM.hasRemoteVideo - ) - } else if let videoFrame = viewModel.currentVideoFrame, viewModel.hasReceivedFirstFrame { + // Single local feed only (pure SOP capture interface) + if let videoFrame = viewModel.currentVideoFrame, viewModel.hasReceivedFirstFrame { GeometryReader { geometry in Image(uiImage: videoFrame) .resizable() @@ -51,67 +43,67 @@ struct StreamView: View { .foregroundColor(.white) } - // Gemini status overlay (top) + speaking indicator - if geminiVM.isGeminiActive { - VStack { - GeminiStatusBar(geminiVM: geminiVM) - Spacer() + // SOP status + single action control + VStack { + if viewModel.isSopAuditRunning { + Text(String(format: "%.1fs", viewModel.sopAuditSecondsRemaining)) + .font(.system(size: 56, weight: .bold, design: .rounded)) + .foregroundColor(.white) + .padding(.top, 40) + + Text("Uploading at 2 FPS") + .font(.system(size: 16, weight: .semibold)) + .foregroundColor(.white.opacity(0.9)) + } + + Spacer() - VStack(spacing: 8) { - if !geminiVM.userTranscript.isEmpty || !geminiVM.aiTranscript.isEmpty { - TranscriptView( - userText: geminiVM.userTranscript, - aiText: geminiVM.aiTranscript - ) - } + if !viewModel.sopAuditStatusMessage.isEmpty { + Text(viewModel.sopAuditStatusMessage) + .font(.system(size: 16, weight: .semibold)) + .foregroundColor(.white) + .multilineTextAlignment(.center) + .padding(.horizontal, 20) + .padding(.vertical, 12) + .background(Color.black.opacity(0.55)) + .cornerRadius(12) + .padding(.bottom, 12) + } - ToolCallStatusView(status: geminiVM.toolCallStatus) + VStack(spacing: 10) { + CustomButton( + title: viewModel.isSopAuditRunning ? "AUDIT RUNNING..." : "INITIATE WALLET AUDIT", + style: .primary, + isDisabled: viewModel.isSopAuditRunning + ) { + viewModel.startSopAudit() + } - if geminiVM.isModelSpeaking { - HStack(spacing: 8) { - Image(systemName: "speaker.wave.2.fill") - .foregroundColor(.white) - .font(.system(size: 14)) - SpeakingIndicator() - } - .padding(.horizontal, 16) - .padding(.vertical, 8) - .background(Color.black.opacity(0.5)) - .cornerRadius(20) + CustomButton( + title: "TOGGLE AI COPILOT", + style: .secondary, + isDisabled: false + ) { + Task { + await viewModel.toggleGeminiAssistant() } } - .padding(.bottom, 80) } - .padding(.all, 24) } - - // WebRTC status overlay (top) - if webrtcVM.isActive { - VStack { - WebRTCStatusBar(webrtcVM: webrtcVM) - Spacer() + .padding(.all, 24) + } + .overlay(alignment: .top) { + GeminiAssistantOverlay(geminiVM: viewModel.geminiAssistant) { + Task { + await viewModel.toggleGeminiAssistant() } - .padding(.all, 24) } - - // Bottom controls layer - VStack { - Spacer() - ControlsView(viewModel: viewModel, geminiVM: geminiVM, webrtcVM: webrtcVM) - } - .padding(.all, 24) } .onDisappear { Task { if viewModel.streamingStatus != .stopped { await viewModel.stopSession() } - if geminiVM.isGeminiActive { - geminiVM.stopSession() - } - if webrtcVM.isActive { - webrtcVM.stopSession() - } } } // Show captured photos from DAT SDK in a preview sheet @@ -125,86 +117,5 @@ struct StreamView: View { ) } } - // Gemini error alert - .alert("AI Assistant", isPresented: Binding( - get: { geminiVM.errorMessage != nil }, - set: { if !$0 { geminiVM.errorMessage = nil } } - )) { - Button("OK") { geminiVM.errorMessage = nil } - } message: { - Text(geminiVM.errorMessage ?? "") - } - // WebRTC error alert - .alert("Live Stream", isPresented: Binding( - get: { webrtcVM.errorMessage != nil }, - set: { if !$0 { webrtcVM.errorMessage = nil } } - )) { - Button("OK") { webrtcVM.errorMessage = nil } - } message: { - Text(webrtcVM.errorMessage ?? "") - } - } -} - -// Extracted controls for clarity -struct ControlsView: View { - @ObservedObject var viewModel: StreamSessionViewModel - @ObservedObject var geminiVM: GeminiSessionViewModel - @ObservedObject var webrtcVM: WebRTCSessionViewModel - - var body: some View { - // Controls row - HStack(spacing: 8) { - CustomButton( - title: "Stop streaming", - style: .destructive, - isDisabled: false - ) { - Task { - await viewModel.stopSession() - } - } - - // Photo button (glasses mode only -- DAT SDK capture) - if viewModel.streamingMode == .glasses { - CircleButton(icon: "camera.fill", text: nil) { - viewModel.capturePhoto() - } - } - - // Gemini AI button (disabled when WebRTC is active — audio conflict) - CircleButton( - icon: geminiVM.isGeminiActive ? "waveform.circle.fill" : "waveform.circle", - text: "AI" - ) { - Task { - if geminiVM.isGeminiActive { - geminiVM.stopSession() - } else { - await geminiVM.startSession() - } - } - } - .opacity(webrtcVM.isActive ? 0.4 : 1.0) - .disabled(webrtcVM.isActive) - - // WebRTC Live Stream button (disabled when Gemini is active — audio conflict) - CircleButton( - icon: webrtcVM.isActive - ? "antenna.radiowaves.left.and.right.circle.fill" - : "antenna.radiowaves.left.and.right.circle", - text: "Live" - ) { - Task { - if webrtcVM.isActive { - webrtcVM.stopSession() - } else { - await webrtcVM.startSession() - } - } - } - .opacity(geminiVM.isGeminiActive ? 0.4 : 1.0) - .disabled(geminiVM.isGeminiActive) - } } } diff --git a/samples/CameraAccess/CameraAccess/WebRTC/CustomVideoCapturer.swift b/samples/CameraAccess/CameraAccess/WebRTC/CustomVideoCapturer.swift index 89db30f8..f17e00e2 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/CustomVideoCapturer.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/CustomVideoCapturer.swift @@ -1,61 +1,273 @@ +import CoreImage +import QuartzCore import UIKit import WebRTC -/// Bridges UIImage frames from DAT SDK / iPhone camera into WebRTC's video pipeline. -/// Creates RTCVideoFrame from UIImage and feeds it to RTCVideoSource via the capturer delegate pattern. -class CustomVideoCapturer: RTCVideoCapturer { - private var frameCount: Int64 = 0 +struct WebRTCSenderStats { + let totalFrames: Int64 + let totalDroppedFrames: Int64 + let windowFramesPerSecond: Double + let windowDroppedFrames: Int64 + let lastEnqueueDurationMs: Double? + let sourceLabel: String + let width: Int + let height: Int +} - /// Push a UIImage frame into the WebRTC video track. - /// Called on each frame from StreamSessionViewModel (24fps glasses, 30fps iPhone). - func pushFrame(_ image: UIImage) { - guard let cgImage = image.cgImage else { return } +enum VideoFrameBufferFactory { + static let pixelFormat = kCVPixelFormatType_32BGRA - let width = cgImage.width - let height = cgImage.height + private static let ciContext = CIContext() - // Create CVPixelBuffer from CGImage - var pixelBuffer: CVPixelBuffer? - let attrs: [String: Any] = [ + static func currentTimestampNs() -> Int64 { + Int64(CACurrentMediaTime() * 1_000_000_000) + } + + static func makeBufferPool( + width: Int, + height: Int, + pixelFormat: OSType = pixelFormat + ) -> CVPixelBufferPool? { + let attributes: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: pixelFormat, + kCVPixelBufferWidthKey as String: width, + kCVPixelBufferHeightKey as String: height, kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true, kCVPixelBufferIOSurfacePropertiesKey as String: [:] as [String: Any], ] - let status = CVPixelBufferCreate( - kCFAllocatorDefault, width, height, - kCVPixelFormatType_32BGRA, attrs as CFDictionary, - &pixelBuffer + + var pool: CVPixelBufferPool? + let status = CVPixelBufferPoolCreate( + kCFAllocatorDefault, + nil, + attributes as CFDictionary, + &pool ) - guard status == kCVReturnSuccess, let buffer = pixelBuffer else { return } + guard status == kCVReturnSuccess else { return nil } + return pool + } + + static func makePixelBuffer( + from image: UIImage, + using pool: CVPixelBufferPool? = nil, + pixelFormat: OSType = pixelFormat + ) -> CVPixelBuffer? { + guard let cgImage = image.cgImage else { return nil } + + let width = cgImage.width + let height = cgImage.height + var pixelBuffer: CVPixelBuffer? + let status: CVReturn + + if let pool { + status = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, &pixelBuffer) + } else { + let attrs: [String: Any] = [ + kCVPixelBufferCGImageCompatibilityKey as String: true, + kCVPixelBufferCGBitmapContextCompatibilityKey as String: true, + kCVPixelBufferIOSurfacePropertiesKey as String: [:] as [String: Any], + ] + status = CVPixelBufferCreate( + kCFAllocatorDefault, + width, + height, + pixelFormat, + attrs as CFDictionary, + &pixelBuffer + ) + } + + guard status == kCVReturnSuccess, let buffer = pixelBuffer else { return nil } CVPixelBufferLockBaseAddress(buffer, []) - if let context = CGContext( - data: CVPixelBufferGetBaseAddress(buffer), - width: width, height: height, - bitsPerComponent: 8, - bytesPerRow: CVPixelBufferGetBytesPerRow(buffer), - space: CGColorSpaceCreateDeviceRGB(), - bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue - | CGBitmapInfo.byteOrder32Little.rawValue - ) { - context.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) + defer { CVPixelBufferUnlockBaseAddress(buffer, []) } + + guard + let context = CGContext( + data: CVPixelBufferGetBaseAddress(buffer), + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: CVPixelBufferGetBytesPerRow(buffer), + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue + | CGBitmapInfo.byteOrder32Little.rawValue + ) + else { + return nil + } + + context.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) + return buffer + } + + static func copyPixelBuffer( + _ source: CVPixelBuffer, + using pool: CVPixelBufferPool? = nil, + pixelFormat: OSType = pixelFormat + ) -> CVPixelBuffer? { + let width = CVPixelBufferGetWidth(source) + let height = CVPixelBufferGetHeight(source) + var destination: CVPixelBuffer? + let status: CVReturn + + if let pool { + status = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, &destination) + } else { + let attrs: [String: Any] = [ + kCVPixelBufferCGImageCompatibilityKey as String: true, + kCVPixelBufferCGBitmapContextCompatibilityKey as String: true, + kCVPixelBufferIOSurfacePropertiesKey as String: [:] as [String: Any], + ] + status = CVPixelBufferCreate( + kCFAllocatorDefault, + width, + height, + pixelFormat, + attrs as CFDictionary, + &destination + ) } - CVPixelBufferUnlockBaseAddress(buffer, []) - // Wrap in WebRTC types and feed to video source - let rtcPixelBuffer = RTCCVPixelBuffer(pixelBuffer: buffer) - let timeStampNs = Int64(CACurrentMediaTime() * 1_000_000_000) + guard status == kCVReturnSuccess, let destination else { return nil } + ciContext.render(CIImage(cvPixelBuffer: source), to: destination) + return destination + } +} + +/// Bridges UIImage or CVPixelBuffer frames into WebRTC's video pipeline. +class CustomVideoCapturer: RTCVideoCapturer { + private var frameCount: Int64 = 0 + private var droppedFrameCount: Int64 = 0 + private var statsWindowStart = CACurrentMediaTime() + private var statsWindowFrames: Int64 = 0 + private var statsWindowDroppedBaseline: Int64 = 0 + private var pixelBufferPool: CVPixelBufferPool? + private var poolSize: CGSize = .zero + var onStatsSample: ((WebRTCSenderStats) -> Void)? + + func pushFrame(_ image: UIImage) { + guard let cgImage = image.cgImage else { + registerDroppedFrame(reason: "missing-cgimage") + return + } + + ensurePixelBufferPool(width: cgImage.width, height: cgImage.height) + let startedAt = CACurrentMediaTime() + guard + let buffer = VideoFrameBufferFactory.makePixelBuffer(from: image, using: pixelBufferPool) + else { + registerDroppedFrame(reason: "pixel-buffer-create") + return + } + + pushPixelBuffer( + buffer, + timeStampNs: VideoFrameBufferFactory.currentTimestampNs(), + enqueueDurationMs: (CACurrentMediaTime() - startedAt) * 1000, + sourceLabel: "image-converted" + ) + } + + func pushPixelBuffer(_ pixelBuffer: CVPixelBuffer, timeStampNs: Int64) { + pushPixelBuffer( + pixelBuffer, + timeStampNs: timeStampNs, + enqueueDurationMs: nil, + sourceLabel: "pixel-buffer" + ) + } + + private func pushPixelBuffer( + _ pixelBuffer: CVPixelBuffer, + timeStampNs: Int64, + enqueueDurationMs: Double?, + sourceLabel: String + ) { + let rtcPixelBuffer = RTCCVPixelBuffer(pixelBuffer: pixelBuffer) let rtcFrame = RTCVideoFrame( buffer: rtcPixelBuffer, rotation: ._0, timeStampNs: timeStampNs ) - self.delegate?.capturer(self, didCapture: rtcFrame) + delegate?.capturer(self, didCapture: rtcFrame) frameCount += 1 - if frameCount == 1 || frameCount % 120 == 0 { - NSLog("[WebRTC] Pushed frame #%lld (%dx%d)", frameCount, width, height) + statsWindowFrames += 1 + logSenderStatsIfNeeded( + width: CVPixelBufferGetWidth(pixelBuffer), + height: CVPixelBufferGetHeight(pixelBuffer), + enqueueDurationMs: enqueueDurationMs, + sourceLabel: sourceLabel + ) + } + + private func ensurePixelBufferPool(width: Int, height: Int) { + let requestedSize = CGSize(width: width, height: height) + guard pixelBufferPool == nil || poolSize != requestedSize else { return } + pixelBufferPool = VideoFrameBufferFactory.makeBufferPool(width: width, height: height) + poolSize = requestedSize + } + + private func registerDroppedFrame(reason: String) { + droppedFrameCount += 1 + if droppedFrameCount == 1 || droppedFrameCount % 30 == 0 { + NSLog( + "[WebRTC] Sender dropped frame #%lld (%@)", + droppedFrameCount, + reason + ) + } + } + + private func logSenderStatsIfNeeded( + width: Int, + height: Int, + enqueueDurationMs: Double?, + sourceLabel: String + ) { + guard frameCount == 1 || frameCount % 60 == 0 else { return } + + let now = CACurrentMediaTime() + let elapsed = max(now - statsWindowStart, 0.001) + let fps = Double(statsWindowFrames) / elapsed + let droppedInWindow = droppedFrameCount - statsWindowDroppedBaseline + let enqueueLabel: String + + if let enqueueDurationMs { + enqueueLabel = String(format: "%.1fms", enqueueDurationMs) + } else { + enqueueLabel = "direct" } + + NSLog( + "[WebRTC] Sender stats frames=%lld dropped=%lld rate=%.1ffps last-enqueue=%@ source=%@ size=%dx%d", + frameCount, + droppedFrameCount, + fps, + enqueueLabel, + sourceLabel, + width, + height + ) + + onStatsSample?( + WebRTCSenderStats( + totalFrames: frameCount, + totalDroppedFrames: droppedFrameCount, + windowFramesPerSecond: fps, + windowDroppedFrames: droppedInWindow, + lastEnqueueDurationMs: enqueueDurationMs, + sourceLabel: sourceLabel, + width: width, + height: height + ) + ) + + statsWindowStart = now + statsWindowFrames = 0 + statsWindowDroppedBaseline = droppedFrameCount } } diff --git a/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift b/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift index aec172c6..f3f4d6ba 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift @@ -55,11 +55,11 @@ class SignalingClient { } func joinRoom(code: String) { - sendJSON(["type": "join", "room": code]) + sendJSON(["type": "join", "room": code, "room_code": code]) } func rejoinRoom(code: String) { - sendJSON(["type": "rejoin", "room": code]) + sendJSON(["type": "rejoin", "room": code, "room_code": code]) } func send(sdp: RTCSessionDescription) { @@ -134,7 +134,7 @@ class SignalingClient { switch type { case "room_created": - if let room = json["room"] as? String { + if let room = roomCode(from: json) { onMessageReceived?(.roomCreated(room)) } @@ -142,14 +142,14 @@ class SignalingClient { onMessageReceived?(.roomJoined) case "room_rejoined": - if let room = json["room"] as? String { + if let room = roomCode(from: json) { onMessageReceived?(.roomRejoined(room)) } - case "peer_joined": + case "peer_joined", "viewer_joined": onMessageReceived?(.peerJoined) - case "peer_left": + case "peer_left", "viewer_left": onMessageReceived?(.peerLeft) case "offer": @@ -182,6 +182,16 @@ class SignalingClient { NSLog("[Signaling] Unknown message type: %@", type) } } + + private func roomCode(from json: [String: Any]) -> String? { + if let room = json["room"] as? String, !room.isEmpty { + return room + } + if let roomCode = json["room_code"] as? String, !roomCode.isEmpty { + return roomCode + } + return nil + } } // MARK: - WebSocket Delegate diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift index ad7319c1..e6197b01 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift @@ -1,25 +1,45 @@ +import AVFoundation import Foundation +import UIKit import WebRTC +enum WebRTCRoomMode: String { + case observation + case support + + var usesAudio: Bool { + self == .support + } +} + protocol WebRTCClientDelegate: AnyObject { func webRTCClient(_ client: WebRTCClient, didChangeConnectionState state: RTCIceConnectionState) func webRTCClient(_ client: WebRTCClient, didGenerateCandidate candidate: RTCIceCandidate) func webRTCClient(_ client: WebRTCClient, didReceiveRemoteVideoTrack track: RTCVideoTrack) func webRTCClient(_ client: WebRTCClient, didRemoveRemoteVideoTrack track: RTCVideoTrack) + func webRTCClient(_ client: WebRTCClient, didReceiveRemoteAudioTrack track: RTCAudioTrack) + func webRTCClient(_ client: WebRTCClient, didUpdateSenderStats stats: WebRTCSenderStats) } /// Manages RTCPeerConnection, video/audio tracks, and SDP negotiation. -/// Video uses a custom capturer (fed by DAT SDK frames). Audio uses WebRTC's native engine. +/// Video uses a custom capturer fed by the worker camera pipeline. class WebRTCClient: NSObject { weak var delegate: WebRTCClientDelegate? private let factory: RTCPeerConnectionFactory + private var streamProfile = WebRTCConfig.supportModeGlassesProfile private var peerConnection: RTCPeerConnection? private var videoSource: RTCVideoSource! private var videoCapturer: CustomVideoCapturer! private var localVideoTrack: RTCVideoTrack? private var localAudioTrack: RTCAudioTrack? + private var localVideoSender: RTCRtpSender? private(set) var remoteVideoTrack: RTCVideoTrack? + private(set) var remoteAudioTrack: RTCAudioTrack? + private var receiveRemoteVideo = true + private var captureMode: StreamingMode = .glasses + private var roomMode: WebRTCRoomMode = .support + private var audioRouteLease: WorkerAudioRouteLease? override init() { RTCInitializeSSL() @@ -32,7 +52,20 @@ class WebRTCClient: NSObject { super.init() } - func setup(iceServers: [RTCIceServer]? = nil) { + func setup( + iceServers: [RTCIceServer]? = nil, + profile: WebRTCStreamProfile = WebRTCConfig.supportModeGlassesProfile, + receiveRemoteVideo: Bool = true, + captureMode: StreamingMode = .glasses, + roomMode: WebRTCRoomMode = .support + ) { + streamProfile = profile + self.receiveRemoteVideo = receiveRemoteVideo + self.captureMode = captureMode + self.roomMode = roomMode + if roomMode.usesAudio { + configureSupportAudioRoute(captureMode: captureMode) + } let config = RTCConfiguration() config.iceServers = iceServers ?? [RTCIceServer(urlStrings: WebRTCConfig.stunServers)] config.sdpSemantics = .unifiedPlan @@ -44,42 +77,163 @@ class WebRTCClient: NSObject { ) peerConnection = factory.peerConnection( - with: config, constraints: constraints, delegate: self + with: config, + constraints: constraints, + delegate: self ) createMediaTracks() } private func createMediaTracks() { - // Video track — custom source fed by DAT SDK frames videoSource = factory.videoSource() + videoSource.adaptOutputFormat( + toWidth: Int32(streamProfile.maxWidth), + height: Int32(streamProfile.maxHeight), + fps: Int32(streamProfile.maxFramerate) + ) videoCapturer = CustomVideoCapturer(delegate: videoSource) + videoCapturer.onStatsSample = { [weak self] stats in + guard let self else { return } + self.delegate?.webRTCClient(self, didUpdateSenderStats: stats) + } localVideoTrack = factory.videoTrack(with: videoSource, trackId: "video0") localVideoTrack?.isEnabled = true - peerConnection?.add(localVideoTrack!, streamIds: ["stream0"]) + if let localVideoTrack { + localVideoSender = peerConnection?.add(localVideoTrack, streamIds: ["stream0"]) + applyVideoSenderParameters() + } + + guard roomMode.usesAudio else { + localAudioTrack = nil + return + } - // Audio track — WebRTC native audio (handles mic capture, AEC, playback) let audioConstraints = RTCMediaConstraints( - mandatoryConstraints: nil, optionalConstraints: nil + mandatoryConstraints: nil, + optionalConstraints: nil ) let audioSource = factory.audioSource(with: audioConstraints) localAudioTrack = factory.audioTrack(with: audioSource, trackId: "audio0") localAudioTrack?.isEnabled = true - peerConnection?.add(localAudioTrack!, streamIds: ["stream0"]) + if let localAudioTrack { + peerConnection?.add(localAudioTrack, streamIds: ["stream0"]) + Task { + await WorkerTelemetry.shared.record( + "webrtc_local_audio_track", + source: "webrtc", + stage: "sender", + payload: [ + "enabled": localAudioTrack.isEnabled, + "track_id": localAudioTrack.trackId, + "room_mode": roomMode.rawValue + ] + ) + } + } + } + + @discardableResult + func configureSupportAudioRoute(captureMode: StreamingMode) -> String? { + guard roomMode.usesAudio else { return nil } + let previousLease = audioRouteLease + do { + let snapshot = try WorkerAudioRouteCoordinator.shared.acquire( + owner: .backOfficeWebRTC, + mode: captureMode, + reason: "webrtc_support_call", + forceSpeaker: SettingsManager.shared.speakerOutputEnabled, + preferredIOBufferDuration: 0.02 + ) + audioRouteLease = snapshot.lease + if let previousLease, previousLease != snapshot.lease { + Task { + await WorkerAudioRouteCoordinator.shared.release(lease: previousLease) + } + } + self.captureMode = captureMode + + let rtcAudioSession = RTCAudioSession.sharedInstance() + rtcAudioSession.lockForConfiguration() + defer { rtcAudioSession.unlockForConfiguration() } + rtcAudioSession.useManualAudio = false + rtcAudioSession.isAudioEnabled = true + return snapshot.fallbackMessage + } catch { + NSLog("[WebRTC] Audio route setup failed: %@", error.localizedDescription) + return nil + } } - /// Called by ViewModel to push video frames from DAT SDK / iPhone camera. func pushVideoFrame(_ image: UIImage) { videoCapturer?.pushFrame(image) } + func pushPixelBuffer(_ pixelBuffer: CVPixelBuffer, timeStampNs: Int64) { + videoCapturer?.pushPixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } + + func updateStreamProfile(_ profile: WebRTCStreamProfile) { + streamProfile = profile + videoSource?.adaptOutputFormat( + toWidth: Int32(profile.maxWidth), + height: Int32(profile.maxHeight), + fps: Int32(profile.maxFramerate) + ) + applyVideoSenderParameters() + } + + private func applyVideoSenderParameters() { + guard let localVideoSender else { return } + let parameters = localVideoSender.parameters + let encodings = parameters.encodings + + if encodings.isEmpty { + NSLog("[WebRTC] Sender parameters missing encodings; bitrate tuning skipped") + return + } + + for encoding in encodings { + encoding.maxBitrateBps = NSNumber(value: streamProfile.maxBitrateBps) + encoding.maxFramerate = NSNumber(value: streamProfile.maxFramerate) + } + + parameters.encodings = encodings + parameters.degradationPreference = NSNumber( + value: RTCDegradationPreference.maintainFramerate.rawValue + ) + localVideoSender.parameters = parameters + + NSLog( + "[WebRTC] Sender tuned for support mode (%dx%d @ %dfps, %@ bps)", + streamProfile.maxWidth, + streamProfile.maxHeight, + streamProfile.maxFramerate, + NSNumber(value: streamProfile.maxBitrateBps) + ) + } + // MARK: - SDP Negotiation func createOffer(completion: @escaping (RTCSessionDescription) -> Void) { + Task { + await WorkerTelemetry.shared.record( + "webrtc_offer_create", + source: "webrtc", + stage: "negotiation", + payload: [ + "receive_audio": roomMode.usesAudio, + "receive_remote_video": receiveRemoteVideo, + "local_audio_enabled": localAudioTrack?.isEnabled ?? false, + "capture_mode": captureMode == .iPhone ? "iphone" : "glasses", + "room_mode": roomMode.rawValue + ] + ) + } let constraints = RTCMediaConstraints( mandatoryConstraints: [ - "OfferToReceiveAudio": "true", - "OfferToReceiveVideo": "true", + "OfferToReceiveAudio": roomMode.usesAudio ? "true" : "false", + "OfferToReceiveVideo": receiveRemoteVideo ? "true" : "false", ], optionalConstraints: nil ) @@ -90,8 +244,7 @@ class WebRTCClient: NSObject { } self?.peerConnection?.setLocalDescription(sdp) { error in if let error { - NSLog( - "[WebRTC] Failed to set local description: %@", error.localizedDescription) + NSLog("[WebRTC] Failed to set local description: %@", error.localizedDescription) } else { completion(sdp) } @@ -109,14 +262,37 @@ class WebRTCClient: NSObject { func muteAudio(_ mute: Bool) { localAudioTrack?.isEnabled = !mute + NSLog("[WebRTC] Local mic %@", mute ? "muted" : "live") + Task { + await WorkerTelemetry.shared.record( + "webrtc_local_audio_mute", + source: "webrtc", + stage: mute ? "muted" : "live", + payload: [ + "muted": mute, + "track_live": localAudioTrack != nil + ] + ) + } } func close() { localVideoTrack?.isEnabled = false localAudioTrack?.isEnabled = false + localVideoSender = nil remoteVideoTrack = nil + remoteAudioTrack = nil peerConnection?.close() peerConnection = nil + let lease = audioRouteLease + audioRouteLease = nil + // Keep close() synchronous for the WebRTC state machine; the coordinator + // releases/deactivates the shared AVAudioSession off the caller thread. + if let lease { + Task { + await WorkerAudioRouteCoordinator.shared.release(lease: lease) + } + } NSLog("[WebRTC] Peer connection closed") } @@ -129,28 +305,31 @@ class WebRTCClient: NSObject { extension WebRTCClient: RTCPeerConnectionDelegate { func peerConnection( - _ peerConnection: RTCPeerConnection, didChange stateChanged: RTCSignalingState + _ peerConnection: RTCPeerConnection, + didChange stateChanged: RTCSignalingState ) { NSLog("[WebRTC] Signaling state: %d", stateChanged.rawValue) } func peerConnection( - _ peerConnection: RTCPeerConnection, didChange newState: RTCIceConnectionState + _ peerConnection: RTCPeerConnection, + didChange newState: RTCIceConnectionState ) { NSLog("[WebRTC] ICE connection state: %d", newState.rawValue) delegate?.webRTCClient(self, didChangeConnectionState: newState) } func peerConnection( - _ peerConnection: RTCPeerConnection, didChange newState: RTCIceGatheringState + _ peerConnection: RTCPeerConnection, + didChange newState: RTCIceGatheringState ) { NSLog("[WebRTC] ICE gathering state: %d", newState.rawValue) } func peerConnection( - _ peerConnection: RTCPeerConnection, didGenerate candidate: RTCIceCandidate + _ peerConnection: RTCPeerConnection, + didGenerate candidate: RTCIceCandidate ) { - // Log candidate type for debugging NAT traversal let sdp = candidate.sdp if sdp.contains("relay") { NSLog("[WebRTC] ICE candidate: RELAY (TURN)") @@ -163,12 +342,40 @@ extension WebRTCClient: RTCPeerConnectionDelegate { } func peerConnection(_ peerConnection: RTCPeerConnection, didAdd stream: RTCMediaStream) { - NSLog("[WebRTC] Remote stream added with %d audio tracks, %d video tracks", - stream.audioTracks.count, stream.videoTracks.count) + NSLog( + "[WebRTC] Remote stream added with %d audio tracks, %d video tracks", + stream.audioTracks.count, + stream.videoTracks.count + ) if let videoTrack = stream.videoTracks.first { remoteVideoTrack = videoTrack delegate?.webRTCClient(self, didReceiveRemoteVideoTrack: videoTrack) } + if let audioTrack = stream.audioTracks.first { + audioTrack.isEnabled = true + remoteAudioTrack = audioTrack + delegate?.webRTCClient(self, didReceiveRemoteAudioTrack: audioTrack) + } + } + + func peerConnection( + _ peerConnection: RTCPeerConnection, + didAdd receiver: RTCRtpReceiver, + streams: [RTCMediaStream] + ) { + guard let track = receiver.track else { return } + if let videoTrack = track as? RTCVideoTrack { + remoteVideoTrack = videoTrack + delegate?.webRTCClient(self, didReceiveRemoteVideoTrack: videoTrack) + NSLog("[WebRTC] Unified Plan remote video track received") + return + } + if let audioTrack = track as? RTCAudioTrack { + audioTrack.isEnabled = true + remoteAudioTrack = audioTrack + delegate?.webRTCClient(self, didReceiveRemoteAudioTrack: audioTrack) + NSLog("[WebRTC] Unified Plan remote audio track received") + } } func peerConnection(_ peerConnection: RTCPeerConnection, didRemove stream: RTCMediaStream) { @@ -184,10 +391,12 @@ extension WebRTCClient: RTCPeerConnectionDelegate { } func peerConnection( - _ peerConnection: RTCPeerConnection, didRemove candidates: [RTCIceCandidate] + _ peerConnection: RTCPeerConnection, + didRemove candidates: [RTCIceCandidate] ) {} func peerConnection( - _ peerConnection: RTCPeerConnection, didOpen dataChannel: RTCDataChannel + _ peerConnection: RTCPeerConnection, + didOpen dataChannel: RTCDataChannel ) {} } diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCConfig.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCConfig.swift index 3d401c44..2b5a40ad 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCConfig.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCConfig.swift @@ -1,27 +1,68 @@ import Foundation import WebRTC +struct WebRTCStreamProfile { + let maxBitrateBps: Int + let maxFramerate: Int + let maxWidth: Int + let maxHeight: Int +} + enum WebRTCConfig { - static let signalingServerURL = Secrets.webrtcSignalingURL + static var signalBaseURL: String { GeminiConfig.signalBaseURL } + + static var signalingServerURL: String { + normalizedWebSocketURL(from: signalBaseURL) + } static let stunServers = [ "stun:stun.l.google.com:19302", "stun:stun1.l.google.com:19302", ] - static let maxBitrateBps = 2_500_000 // 2.5 Mbps - static let maxFramerate = 24 + static let supportModeGlassesProfile = WebRTCStreamProfile( + maxBitrateBps: 850_000, + maxFramerate: 12, + maxWidth: 960, + maxHeight: 540 + ) + static let supportModeGlassesFallbackProfile = WebRTCStreamProfile( + maxBitrateBps: 550_000, + maxFramerate: 10, + maxWidth: 640, + maxHeight: 360 + ) + static let supportModePhoneProfile = WebRTCStreamProfile( + maxBitrateBps: 900_000, + maxFramerate: 20, + maxWidth: 960, + maxHeight: 540 + ) + static let supportModePhoneFallbackProfile = WebRTCStreamProfile( + maxBitrateBps: 550_000, + maxFramerate: 15, + maxWidth: 640, + maxHeight: 360 + ) + + static func supportProfile(for mode: StreamingMode) -> WebRTCStreamProfile { + switch mode { + case .iPhone: + return supportModePhoneProfile + case .glasses: + return supportModeGlassesProfile + } + } static var isConfigured: Bool { - return !signalingServerURL.isEmpty - && signalingServerURL != "ws://YOUR_MAC_IP:8080" + let trimmed = signalBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + return !trimmed.isEmpty + && !trimmed.contains("YOUR_") } /// Derive the HTTP base URL from the WebSocket signaling URL. static var httpBaseURL: String { - return signalingServerURL - .replacingOccurrences(of: "wss://", with: "https://") - .replacingOccurrences(of: "ws://", with: "http://") + normalizedHTTPURL(from: signalBaseURL) } /// Fetch TURN credentials from the signaling server. @@ -64,4 +105,34 @@ enum WebRTCConfig { return servers } + + private static func normalizedWebSocketURL(from raw: String) -> String { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + if trimmed.hasPrefix("wss://") || trimmed.hasPrefix("ws://") { + return trimmed + } + if trimmed.hasPrefix("https://") { + return "wss://" + String(trimmed.dropFirst("https://".count)) + } + if trimmed.hasPrefix("http://") { + return "ws://" + String(trimmed.dropFirst("http://".count)) + } + return "wss://\(trimmed)" + } + + private static func normalizedHTTPURL(from raw: String) -> String { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return trimmed } + if trimmed.hasPrefix("https://") || trimmed.hasPrefix("http://") { + return trimmed + } + if trimmed.hasPrefix("wss://") { + return "https://" + String(trimmed.dropFirst("wss://".count)) + } + if trimmed.hasPrefix("ws://") { + return "http://" + String(trimmed.dropFirst("ws://".count)) + } + return "https://\(trimmed)" + } } diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift index a2463ce8..b78c5aee 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift @@ -1,7 +1,123 @@ import Foundation +import QuartzCore import SwiftUI import WebRTC +final class WebRTCRealtimeVideoForwarder: @unchecked Sendable { + private let queue = DispatchQueue( + label: "visionclaw.webrtc.realtime-forwarder", + qos: .userInteractive + ) + private let stateLock = NSLock() + private var imageHandler: ((UIImage) -> Void)? + private var pixelBufferHandler: ((CVPixelBuffer, Int64) -> Void)? + private var pendingPixelBuffer: CVPixelBuffer? + private var pendingPixelBufferTimestampNs: Int64 = 0 + private var pendingPixelBufferQueuedAt = CACurrentMediaTime() + private var isPixelBufferDrainScheduled = false + private var pixelBufferForwardCount: Int64 = 0 + private var stalePixelBufferDropCount: Int64 = 0 + private var pixelBufferStatsWindowStart = CACurrentMediaTime() + + func updateHandlers( + imageHandler: ((UIImage) -> Void)?, + pixelBufferHandler: ((CVPixelBuffer, Int64) -> Void)? + ) { + clearPendingPixelBuffer() + queue.async { + self.imageHandler = imageHandler + self.pixelBufferHandler = pixelBufferHandler + } + } + + func enqueueImage(_ image: UIImage) { + queue.async { + self.imageHandler?(image) + } + } + + func enqueuePixelBuffer(_ pixelBuffer: CVPixelBuffer, timeStampNs: Int64) { + let queuedAt = CACurrentMediaTime() + + var shouldScheduleDrain = false + stateLock.lock() + if pendingPixelBuffer != nil { + stalePixelBufferDropCount += 1 + } + pendingPixelBuffer = pixelBuffer + pendingPixelBufferTimestampNs = timeStampNs + pendingPixelBufferQueuedAt = queuedAt + if !isPixelBufferDrainScheduled { + isPixelBufferDrainScheduled = true + shouldScheduleDrain = true + } + stateLock.unlock() + + if shouldScheduleDrain { + queue.async { [weak self] in + self?.drainLatestPixelBuffer() + } + } + } + + private func clearPendingPixelBuffer() { + stateLock.lock() + pendingPixelBuffer = nil + isPixelBufferDrainScheduled = false + stateLock.unlock() + } + + private func drainLatestPixelBuffer() { + while true { + stateLock.lock() + guard let pixelBuffer = pendingPixelBuffer else { + isPixelBufferDrainScheduled = false + stateLock.unlock() + return + } + let timeStampNs = pendingPixelBufferTimestampNs + let queuedAt = pendingPixelBufferQueuedAt + pendingPixelBuffer = nil + stateLock.unlock() + + pixelBufferHandler?(pixelBuffer, timeStampNs) + pixelBufferForwardCount += 1 + logPixelBufferForwardStatsIfNeeded(waitDurationMs: (CACurrentMediaTime() - queuedAt) * 1000) + } + } + + private func logPixelBufferForwardStatsIfNeeded(waitDurationMs: Double) { + guard pixelBufferForwardCount == 1 || pixelBufferForwardCount % 120 == 0 else { return } + let now = CACurrentMediaTime() + let elapsed = max(now - pixelBufferStatsWindowStart, 0.001) + let fps = Double(pixelBufferForwardCount) / elapsed + NSLog( + "[WebRTC] Realtime forwarder rate=%.1ffps last-wait=%.2fms stale-dropped=%lld", + fps, + waitDurationMs, + stalePixelBufferDropCount + ) + Task { + await WorkerTelemetry.shared.record( + "webrtc_realtime_forwarder", + source: "webrtc", + stage: "forwarder", + durationMs: waitDurationMs, + metricValue: fps, + metricUnit: "fps", + payload: [ + "fps": fps, + "wait_ms": waitDurationMs, + "stale_dropped": stalePixelBufferDropCount + ] + ) + } + pixelBufferStatsWindowStart = now + pixelBufferForwardCount = 0 + stalePixelBufferDropCount = 0 + } +} + enum WebRTCConnectionState: Equatable { case disconnected case connecting @@ -21,17 +137,35 @@ class WebRTCSessionViewModel: ObservableObject { @Published var isMuted: Bool = false @Published var errorMessage: String? @Published var remoteVideoTrack: RTCVideoTrack? + @Published var remoteAudioTrack: RTCAudioTrack? @Published var hasRemoteVideo: Bool = false + @Published var hasRemoteAudio: Bool = false + @Published var incomingRemoteVideoEnabled: Bool = true + @Published var isUnderLiveVideoPressure: Bool = false + @Published private(set) var roomMode: WebRTCRoomMode = .observation + + nonisolated let realtimeVideoForwarder = WebRTCRealtimeVideoForwarder() private var webRTCClient: WebRTCClient? private var signalingClient: SignalingClient? private var delegateAdapter: WebRTCDelegateAdapter? + private var currentCaptureMode: StreamingMode = .glasses + private var wantsIncomingRemoteVideo = true + private var isUsingPhoneFallbackProfile = false + private var stablePhoneSenderWindows = 0 /// Saved room code for reconnecting after app backgrounding. private var savedRoomCode: String? private var foregroundObserver: Any? - func startSession() async { + var isSupportMode: Bool { + roomMode == .support + } + + func startSession( + captureMode: StreamingMode = .glasses, + roomMode: WebRTCRoomMode = .support + ) async { guard !isActive else { return } guard WebRTCConfig.isConfigured else { errorMessage = "WebRTC signaling URL not configured." @@ -41,17 +175,36 @@ class WebRTCSessionViewModel: ObservableObject { isActive = true connectionState = .connecting savedRoomCode = nil + currentCaptureMode = captureMode + self.roomMode = roomMode + wantsIncomingRemoteVideo = roomMode == .support && captureMode != .iPhone + incomingRemoteVideoEnabled = wantsIncomingRemoteVideo + isUsingPhoneFallbackProfile = false + stablePhoneSenderWindows = 0 + Task { + await WorkerTelemetry.shared.record( + "webrtc_session_start", + source: "webrtc", + stage: "connecting", + payload: [ + "capture_mode": captureMode == .iPhone ? "iphone" : "glasses", + "room_mode": roomMode.rawValue, + "audio_enabled": roomMode.usesAudio + ] + ) + } // Fetch TURN credentials for NAT traversal across networks let iceServers = await WebRTCConfig.fetchIceServers() - setupWebRTCClient(iceServers: iceServers) + setupWebRTCClient(iceServers: iceServers, captureMode: captureMode) connectSignaling(rejoinCode: nil) observeForeground() } func stopSession() { removeForegroundObserver() + realtimeVideoForwarder.updateHandlers(imageHandler: nil, pixelBufferHandler: nil) webRTCClient?.close() webRTCClient = nil delegateAdapter = nil @@ -59,11 +212,26 @@ class WebRTCSessionViewModel: ObservableObject { signalingClient = nil isActive = false connectionState = .disconnected + isUnderLiveVideoPressure = false roomCode = "" savedRoomCode = nil isMuted = false remoteVideoTrack = nil + remoteAudioTrack = nil hasRemoteVideo = false + hasRemoteAudio = false + incomingRemoteVideoEnabled = true + wantsIncomingRemoteVideo = true + roomMode = .observation + isUsingPhoneFallbackProfile = false + stablePhoneSenderWindows = 0 + Task { + await WorkerTelemetry.shared.record( + "webrtc_session_stop", + source: "webrtc", + stage: "disconnected" + ) + } } func toggleMute() { @@ -71,21 +239,61 @@ class WebRTCSessionViewModel: ObservableObject { webRTCClient?.muteAudio(isMuted) } + @discardableResult + func refreshSupportAudioRoute(captureMode: StreamingMode) -> String? { + guard roomMode.usesAudio else { return nil } + currentCaptureMode = captureMode + return webRTCClient?.configureSupportAudioRoute(captureMode: captureMode) + } + /// Called by StreamSessionViewModel on each video frame. func pushVideoFrame(_ image: UIImage) { guard isActive, connectionState == .connected else { return } webRTCClient?.pushVideoFrame(image) } + func pushVideoPixelBuffer(_ pixelBuffer: CVPixelBuffer, timeStampNs: Int64) { + guard isActive, connectionState == .connected else { return } + webRTCClient?.pushPixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } + // MARK: - WebRTC + Signaling Setup - private func setupWebRTCClient(iceServers: [RTCIceServer]?) { + private func setupWebRTCClient( + iceServers: [RTCIceServer]?, + captureMode: StreamingMode + ) { let client = WebRTCClient() let adapter = WebRTCDelegateAdapter(viewModel: self) delegateAdapter = adapter client.delegate = adapter - client.setup(iceServers: iceServers) + let profile: WebRTCStreamProfile + switch captureMode { + case .iPhone: + profile = isUsingPhoneFallbackProfile + ? WebRTCConfig.supportModePhoneFallbackProfile + : WebRTCConfig.supportModePhoneProfile + case .glasses: + profile = isUsingPhoneFallbackProfile + ? WebRTCConfig.supportModeGlassesFallbackProfile + : WebRTCConfig.supportModeGlassesProfile + } + client.setup( + iceServers: iceServers, + profile: profile, + receiveRemoteVideo: wantsIncomingRemoteVideo, + captureMode: captureMode, + roomMode: roomMode + ) webRTCClient = client + realtimeVideoForwarder.updateHandlers( + imageHandler: { [weak client] image in + client?.pushVideoFrame(image) + }, + pixelBufferHandler: { [weak client] pixelBuffer, timeStampNs in + client?.pushPixelBuffer(pixelBuffer, timeStampNs: timeStampNs) + } + ) } private func connectSignaling(rejoinCode: String?) { @@ -96,6 +304,13 @@ class WebRTCSessionViewModel: ObservableObject { signaling.onConnected = { [weak self] in Task { @MainActor in + Task { + await WorkerTelemetry.shared.record( + "webrtc_signaling_connected", + source: "webrtc", + stage: "signaling" + ) + } if let code = rejoinCode { NSLog("[WebRTC] Reconnected, rejoining room: %@", code) self?.signalingClient?.rejoinRoom(code: code) @@ -114,6 +329,14 @@ class WebRTCSessionViewModel: ObservableObject { signaling.onDisconnected = { [weak self] reason in Task { @MainActor in guard let self, self.isActive else { return } + Task { + await WorkerTelemetry.shared.record( + "webrtc_signaling_disconnected", + source: "webrtc", + stage: self.savedRoomCode != nil ? "backgrounded" : "failed", + payload: ["reason": reason ?? NSNull()] + ) + } // Don't fully stop -- mark as backgrounded so we can reconnect if self.savedRoomCode != nil { self.connectionState = .backgrounded @@ -159,6 +382,28 @@ class WebRTCSessionViewModel: ObservableObject { private func handleReturnToForeground() { guard isActive, let code = savedRoomCode else { return } NSLog("[WebRTC] App returned to foreground, reconnecting to room: %@", code) + reconnectCurrentRoom(reason: "app_foreground", roomCode: code) + } + + func setIncomingRemoteVideoEnabled(_ enabled: Bool) { + guard currentCaptureMode == .iPhone else { + incomingRemoteVideoEnabled = true + return + } + guard enabled != wantsIncomingRemoteVideo else { return } + wantsIncomingRemoteVideo = enabled + incomingRemoteVideoEnabled = enabled + remoteVideoTrack = nil + hasRemoteVideo = false + guard let code = savedRoomCode, isActive else { return } + NSLog( + "[WebRTC] Reconfiguring incoming supervisor video: %@", + enabled ? "enabled" : "disabled" + ) + reconnectCurrentRoom(reason: enabled ? "enable_remote_video" : "disable_remote_video", roomCode: code) + } + + private func reconnectCurrentRoom(reason: String, roomCode code: String) { connectionState = .connecting // Tear down old peer connection, set up fresh one @@ -168,9 +413,10 @@ class WebRTCSessionViewModel: ObservableObject { Task { let iceServers = await WebRTCConfig.fetchIceServers() - setupWebRTCClient(iceServers: iceServers) + setupWebRTCClient(iceServers: iceServers, captureMode: currentCaptureMode) connectSignaling(rejoinCode: code) } + NSLog("[WebRTC] Reconnecting room %@ (%@)", code, reason) } // MARK: - Signaling Message Handling @@ -182,15 +428,38 @@ class WebRTCSessionViewModel: ObservableObject { savedRoomCode = code connectionState = .waitingForPeer NSLog("[WebRTC] Room created: %@", code) + Task { + await WorkerTelemetry.shared.record( + "webrtc_room_created", + source: "webrtc", + stage: "room", + payload: ["room_code_present": true] + ) + } case .roomRejoined(let code): roomCode = code savedRoomCode = code connectionState = .waitingForPeer NSLog("[WebRTC] Room rejoined: %@", code) + Task { + await WorkerTelemetry.shared.record( + "webrtc_room_rejoined", + source: "webrtc", + stage: "room", + payload: ["room_code_present": true] + ) + } case .peerJoined: NSLog("[WebRTC] Peer joined, creating offer") + Task { + await WorkerTelemetry.shared.record( + "webrtc_peer_joined", + source: "webrtc", + stage: "peer" + ) + } webRTCClient?.createOffer { [weak self] sdp in self?.signalingClient?.send(sdp: sdp) } @@ -212,8 +481,23 @@ class WebRTCSessionViewModel: ObservableObject { case .peerLeft: NSLog("[WebRTC] Peer left") connectionState = .waitingForPeer + Task { + await WorkerTelemetry.shared.record( + "webrtc_peer_left", + source: "webrtc", + stage: "peer" + ) + } case .error(let msg): + Task { + await WorkerTelemetry.shared.record( + "webrtc_signaling_error", + source: "webrtc", + stage: "failed", + payload: ["error": msg] + ) + } // If rejoin fails (room expired), fall back to creating a new room if savedRoomCode != nil && msg == "Room not found" { NSLog("[WebRTC] Rejoin failed (room expired), creating new room") @@ -235,10 +519,31 @@ class WebRTCSessionViewModel: ObservableObject { case .connected, .completed: connectionState = .connected NSLog("[WebRTC] Peer connected") + Task { + await WorkerTelemetry.shared.record( + "webrtc_ice_connected", + source: "webrtc", + stage: "connected" + ) + } case .disconnected: connectionState = .waitingForPeer + Task { + await WorkerTelemetry.shared.record( + "webrtc_ice_disconnected", + source: "webrtc", + stage: "disconnected" + ) + } case .failed: connectionState = .error("Connection failed") + Task { + await WorkerTelemetry.shared.record( + "webrtc_ice_failed", + source: "webrtc", + stage: "failed" + ) + } case .closed: connectionState = .disconnected default: @@ -261,6 +566,126 @@ class WebRTCSessionViewModel: ObservableObject { hasRemoteVideo = false NSLog("[WebRTC] Remote video track removed") } + + fileprivate func handleRemoteAudioTrackReceived(_ track: RTCAudioTrack) { + guard roomMode.usesAudio else { + track.isEnabled = false + remoteAudioTrack = nil + hasRemoteAudio = false + NSLog("[WebRTC] Ignoring remote audio track in observation mode") + return + } + track.isEnabled = true + remoteAudioTrack = track + hasRemoteAudio = true + NSLog("[WebRTC] Remote audio track received") + Task { + await WorkerTelemetry.shared.record( + "webrtc_remote_audio_track", + source: "webrtc", + stage: "receiver", + payload: [ + "enabled": track.isEnabled, + "track_id": track.trackId + ] + ) + } + } + + fileprivate func handleSenderStats(_ stats: WebRTCSenderStats) { + let enqueueMs = stats.lastEnqueueDurationMs ?? 0 + let captureModeLabel = currentCaptureMode == .iPhone ? "iphone" : "glasses" + Task { + await WorkerTelemetry.shared.record( + "webrtc_sender_stats", + source: "webrtc", + stage: isUsingPhoneFallbackProfile ? "fallback" : "sender", + metricValue: stats.windowFramesPerSecond, + metricUnit: "fps", + payload: [ + "sender_fps": stats.windowFramesPerSecond, + "dropped_frames": stats.windowDroppedFrames, + "enqueue_ms": enqueueMs, + "fallback_profile": isUsingPhoneFallbackProfile, + "capture_mode": captureModeLabel + ] + ) + } + let minimumHealthyFps = currentCaptureMode == .glasses ? 9.0 : 14.0 + let maxHealthyEnqueueMs = currentCaptureMode == .glasses ? 28.0 : 20.0 + let isUnderPressure = enqueueMs > maxHealthyEnqueueMs + || stats.windowDroppedFrames >= 3 + || stats.windowFramesPerSecond < minimumHealthyFps + + if isUnderPressure { + isUnderLiveVideoPressure = true + stablePhoneSenderWindows = 0 + guard !isUsingPhoneFallbackProfile else { return } + isUsingPhoneFallbackProfile = true + webRTCClient?.updateStreamProfile(fallbackProfile(for: currentCaptureMode)) + Task { + await WorkerTelemetry.shared.record( + "webrtc_profile_downgrade", + source: "webrtc", + stage: "fallback", + payload: [ + "sender_fps": stats.windowFramesPerSecond, + "dropped_frames": stats.windowDroppedFrames, + "enqueue_ms": enqueueMs, + "capture_mode": captureModeLabel + ] + ) + } + NSLog( + "[WebRTC] %@ sender downgraded to fallback profile (fps=%.1f dropped=%lld enqueue=%@ms)", + captureModeLabel, + stats.windowFramesPerSecond, + stats.windowDroppedFrames, + stats.lastEnqueueDurationMs.map { String(format: "%.1f", $0) } ?? "direct" + ) + return + } + + isUnderLiveVideoPressure = false + guard isUsingPhoneFallbackProfile else { return } + stablePhoneSenderWindows += 1 + + if stablePhoneSenderWindows >= 6 { + stablePhoneSenderWindows = 0 + isUsingPhoneFallbackProfile = false + webRTCClient?.updateStreamProfile(defaultProfile(for: currentCaptureMode)) + Task { + await WorkerTelemetry.shared.record( + "webrtc_profile_restore", + source: "webrtc", + stage: "sender", + payload: [ + "stable_windows": stablePhoneSenderWindows, + "capture_mode": captureModeLabel + ] + ) + } + NSLog("[WebRTC] %@ sender restored to default support profile", captureModeLabel) + } + } + + private func defaultProfile(for mode: StreamingMode) -> WebRTCStreamProfile { + switch mode { + case .iPhone: + return WebRTCConfig.supportModePhoneProfile + case .glasses: + return WebRTCConfig.supportModeGlassesProfile + } + } + + private func fallbackProfile(for mode: StreamingMode) -> WebRTCStreamProfile { + switch mode { + case .iPhone: + return WebRTCConfig.supportModePhoneFallbackProfile + case .glasses: + return WebRTCConfig.supportModeGlassesFallbackProfile + } + } } // MARK: - Delegate Adapter (bridges nonisolated delegate to @MainActor ViewModel) @@ -295,4 +720,16 @@ private class WebRTCDelegateAdapter: WebRTCClientDelegate { self?.viewModel?.handleRemoteVideoTrackRemoved(track) } } + + func webRTCClient(_ client: WebRTCClient, didReceiveRemoteAudioTrack track: RTCAudioTrack) { + Task { @MainActor [weak self] in + self?.viewModel?.handleRemoteAudioTrackReceived(track) + } + } + + func webRTCClient(_ client: WebRTCClient, didUpdateSenderStats stats: WebRTCSenderStats) { + Task { @MainActor [weak self] in + self?.viewModel?.handleSenderStats(stats) + } + } } diff --git a/samples/CameraAccess/CameraAccess/iPhone/IPhoneCameraManager.swift b/samples/CameraAccess/CameraAccess/iPhone/IPhoneCameraManager.swift index 5587de8f..6d713b84 100644 --- a/samples/CameraAccess/CameraAccess/iPhone/IPhoneCameraManager.swift +++ b/samples/CameraAccess/CameraAccess/iPhone/IPhoneCameraManager.swift @@ -1,35 +1,163 @@ import AVFoundation import UIKit -class IPhoneCameraManager: NSObject { +private struct SendablePixelBuffer: @unchecked Sendable { + let pixelBuffer: CVPixelBuffer +} + +class IPhoneCameraManager: NSObject, @unchecked Sendable { private let captureSession = AVCaptureSession() private let videoOutput = AVCaptureVideoDataOutput() + private let movieOutput = AVCaptureMovieFileOutput() private let sessionQueue = DispatchQueue(label: "iphone-camera-session") + private let analysisQueue = DispatchQueue(label: "iphone-camera-analysis", qos: .userInitiated) private let context = CIContext() private var isRunning = false + private var isConfigured = false + private var isAudioInputConfigured = false + private var recordingCompletion: ((URL?) -> Void)? + private var currentRecordingURL: URL? + private var lastAnalysisEmissionAt: CFTimeInterval = 0 + private var isAnalysisConversionInFlight = false + private var sampleFrameCount: Int64 = 0 + private var analysisFrameCount: Int64 = 0 + private var statsWindowStart = CACurrentMediaTime() + private var hasDeliveredFirstPreviewFrame = false var onFrameCaptured: ((UIImage) -> Void)? + var onSampleBufferCaptured: ((CMSampleBuffer) -> Void)? + var onFirstPreviewFrame: (() -> Void)? + var analysisFrameInterval: CFTimeInterval = 0.2 + var analysisMaxDimension: CGFloat = 720 + var previewSession: AVCaptureSession { captureSession } func start() { guard !isRunning else { return } sessionQueue.async { [weak self] in + NSLog("[iPhoneCamera] start() requested") + self?.sampleFrameCount = 0 + self?.analysisFrameCount = 0 + self?.statsWindowStart = CACurrentMediaTime() + self?.lastAnalysisEmissionAt = 0 + self?.isAnalysisConversionInFlight = false + self?.hasDeliveredFirstPreviewFrame = false self?.configureSession() self?.captureSession.startRunning() self?.isRunning = true + let sessionRunning = self?.captureSession.isRunning == true + Task { + await WorkerTelemetry.shared.record( + "iphone_camera_start", + source: "ios_app", + stage: "camera", + payload: ["session_running": sessionRunning] + ) + } + NSLog("[iPhoneCamera] captureSession.startRunning() complete (isRunning=%@, sessionRunning=%@)", + self?.isRunning == true ? "true" : "false", + self?.captureSession.isRunning == true ? "true" : "false") + } + } + + func startRecording(sessionID: String) { + sessionQueue.async { [weak self] in + guard let self else { return } + self.configureSession() + + NSLog( + "[iPhoneCamera] startRecording requested (sessionConfigured=%@, sessionRunning=%@, alreadyRecording=%@)", + self.isConfigured ? "true" : "false", + self.captureSession.isRunning ? "true" : "false", + self.movieOutput.isRecording ? "true" : "false") + + if let attributes = try? FileManager.default.attributesOfFileSystem(forPath: FileManager.default.temporaryDirectory.path), + let freeSize = attributes[.systemFreeSize] as? NSNumber { + NSLog("[iPhoneCamera] Free disk space before recording: %@ bytes", freeSize) + } + + guard self.captureSession.isRunning else { + NSLog("[iPhoneCamera] Cannot start recording because capture session is not running") + return + } + guard !self.movieOutput.isRecording else { + NSLog("[iPhoneCamera] Ignoring startRecording because movie output is already recording") + return + } + + let fileURL = FileManager.default.temporaryDirectory + .appendingPathComponent("sop_\(sessionID)") + .appendingPathExtension("mp4") + try? FileManager.default.removeItem(at: fileURL) + self.currentRecordingURL = fileURL + + self.movieOutput.startRecording(to: fileURL, recordingDelegate: self) + Task { + await WorkerTelemetry.shared.record( + "iphone_recording_start", + source: "ios_app", + stage: "recording", + sessionID: sessionID, + payload: [ + "file_extension": fileURL.pathExtension, + "disk_free_checked": true + ] + ) + } + NSLog("[iPhoneCamera] Started recording SOP video: %@", fileURL.path) + } + } + + func stopRecording() async -> URL? { + await withCheckedContinuation { continuation in + sessionQueue.async { [weak self] in + guard let self else { + continuation.resume(returning: nil) + return + } + + NSLog( + "[iPhoneCamera] stopRecording requested (isRecording=%@, currentRecordingURL=%@)", + self.movieOutput.isRecording ? "true" : "false", + self.currentRecordingURL?.path ?? "nil") + + guard self.movieOutput.isRecording else { + NSLog("[iPhoneCamera] stopRecording returning currentRecordingURL immediately because movieOutput.isRecording=false") + continuation.resume(returning: self.currentRecordingURL) + return + } + + self.recordingCompletion = { url in + NSLog("[iPhoneCamera] stopRecording completion fired with URL=%@", url?.path ?? "nil") + continuation.resume(returning: url) + } + self.movieOutput.stopRecording() + NSLog("[iPhoneCamera] movieOutput.stopRecording() called") + } } } func stop() { guard isRunning else { return } sessionQueue.async { [weak self] in + NSLog("[iPhoneCamera] stop() requested") self?.captureSession.stopRunning() self?.isRunning = false + Task { + await WorkerTelemetry.shared.record( + "iphone_camera_stop", + source: "ios_app", + stage: "camera" + ) + } + NSLog("[iPhoneCamera] captureSession.stopRunning() complete") } } private func configureSession() { + guard !isConfigured else { return } + captureSession.beginConfiguration() - captureSession.sessionPreset = .medium + captureSession.sessionPreset = .iFrame960x540 // Add back camera input guard let camera = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back), @@ -43,6 +171,16 @@ class IPhoneCameraManager: NSObject { captureSession.addInput(input) } + if let microphone = AVCaptureDevice.default(for: .audio), + let audioInput = try? AVCaptureDeviceInput(device: microphone), + captureSession.canAddInput(audioInput) { + captureSession.addInput(audioInput) + isAudioInputConfigured = true + } else { + isAudioInputConfigured = false + NSLog("[iPhoneCamera] Microphone input unavailable for recording") + } + // Add video output videoOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA @@ -54,6 +192,11 @@ class IPhoneCameraManager: NSObject { captureSession.addOutput(videoOutput) } + // Add movie output for full-session recording. + if captureSession.canAddOutput(movieOutput) { + captureSession.addOutput(movieOutput) + } + // Force portrait-oriented frames from the sensor if let connection = videoOutput.connection(with: .video) { if connection.isVideoRotationAngleSupported(90) { @@ -61,8 +204,39 @@ class IPhoneCameraManager: NSObject { } } + if let movieConnection = movieOutput.connection(with: .video) { + if movieConnection.isVideoRotationAngleSupported(90) { + movieConnection.videoRotationAngle = 90 + } + } + captureSession.commitConfiguration() - NSLog("[iPhoneCamera] Session configured") + isConfigured = true + NSLog("[iPhoneCamera] Session configured successfully") + } + + func waitUntilRunningAndAudioConfigured(timeout: TimeInterval) async -> Bool { + let deadline = CACurrentMediaTime() + max(0, timeout) + while !Task.isCancelled { + let ready = await withCheckedContinuation { continuation in + sessionQueue.async { [weak self] in + guard let self else { + continuation.resume(returning: false) + return + } + continuation.resume( + returning: self.isConfigured && + self.isRunning && + self.captureSession.isRunning && + self.isAudioInputConfigured + ) + } + } + if ready { return true } + if CACurrentMediaTime() >= deadline { return false } + try? await Task.sleep(nanoseconds: 100_000_000) + } + return false } static func requestPermission() async -> Bool { @@ -86,12 +260,156 @@ extension IPhoneCameraManager: AVCaptureVideoDataOutputSampleBufferDelegate { didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection ) { - guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return } + sampleFrameCount += 1 + onSampleBufferCaptured?(sampleBuffer) - let ciImage = CIImage(cvPixelBuffer: pixelBuffer) - guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return } - let image = UIImage(cgImage: cgImage) + if !hasDeliveredFirstPreviewFrame { + hasDeliveredFirstPreviewFrame = true + let onFirstPreviewFrame = onFirstPreviewFrame + DispatchQueue.main.async { + onFirstPreviewFrame?() + } + } + + logCaptureStatsIfNeeded() + + guard onFrameCaptured != nil else { return } + + let now = CACurrentMediaTime() + guard now - lastAnalysisEmissionAt >= analysisFrameInterval else { return } + guard !isAnalysisConversionInFlight else { return } + lastAnalysisEmissionAt = now + isAnalysisConversionInFlight = true + + guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { + isAnalysisConversionInFlight = false + return + } + analysisFrameCount += 1 + let frameHandler = onFrameCaptured + let maxDimension = analysisMaxDimension + let analysisPixelBuffer = SendablePixelBuffer(pixelBuffer: pixelBuffer) + analysisQueue.async { [weak self] in + let image = self?.makeUIImage(from: analysisPixelBuffer.pixelBuffer, maxDimension: maxDimension) + self?.sessionQueue.async { [weak self] in + self?.isAnalysisConversionInFlight = false + } + guard let image else { return } + frameHandler?(image) + } + } +} + +extension IPhoneCameraManager: AVCaptureFileOutputRecordingDelegate { + func fileOutput( + _ output: AVCaptureFileOutput, + didStartRecordingTo fileURL: URL, + from connections: [AVCaptureConnection] + ) { + NSLog("[iPhoneCamera] didStartRecordingTo fired for %@", fileURL.path) + } + + func fileOutput( + _ output: AVCaptureFileOutput, + didFinishRecordingTo outputFileURL: URL, + from connections: [AVCaptureConnection], + error: Error? + ) { + if let error { + NSLog("[iPhoneCamera] Recording failed: %@", error.localizedDescription) + } else { + NSLog("[iPhoneCamera] Recording finished: %@", outputFileURL.path) + } + + if let attributes = try? FileManager.default.attributesOfItem(atPath: outputFileURL.path), + let fileSize = attributes[.size] as? NSNumber { + NSLog("[iPhoneCamera] Recorded file size: %@ bytes", fileSize) + let stage = error == nil ? "recorded" : "failed" + let errorMessage = error?.localizedDescription + let byteCount = fileSize.intValue + let metricValue = fileSize.doubleValue + Task { + await WorkerTelemetry.shared.record( + "iphone_recording_finish", + source: "ios_app", + stage: stage, + metricValue: metricValue, + metricUnit: "bytes", + payload: [ + "bytes": byteCount, + "error": errorMessage ?? NSNull() + ] + ) + } + } else { + NSLog("[iPhoneCamera] Could not read recorded file size at %@", outputFileURL.path) + let errorMessage = error?.localizedDescription + Task { + await WorkerTelemetry.shared.record( + "iphone_recording_finish", + source: "ios_app", + stage: "unknown_size", + payload: ["error": errorMessage ?? NSNull()] + ) + } + } + + let completion = recordingCompletion + recordingCompletion = nil + currentRecordingURL = outputFileURL + completion?(error == nil ? outputFileURL : nil) + } +} + +extension IPhoneCameraManager { + fileprivate func makeUIImage(from pixelBuffer: CVPixelBuffer, maxDimension: CGFloat? = nil) -> UIImage? { + var ciImage = CIImage(cvPixelBuffer: pixelBuffer) + let extent = ciImage.extent + + if let maxDimension, + maxDimension > 0 { + let largestDimension = max(extent.width, extent.height) + if largestDimension > maxDimension { + let scale = maxDimension / largestDimension + ciImage = ciImage.transformed(by: CGAffineTransform(scaleX: scale, y: scale)) + } + } + + guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return nil } + return UIImage(cgImage: cgImage) + } + + private func logCaptureStatsIfNeeded() { + guard sampleFrameCount == 1 || sampleFrameCount % 120 == 0 else { return } + + let now = CACurrentMediaTime() + let elapsed = max(now - statsWindowStart, 0.001) + let previewFPS = Double(sampleFrameCount) / elapsed + let analysisFPS = Double(analysisFrameCount) / elapsed + + NSLog( + "[iPhoneCamera] Preview stats preview=%.1ffps analysis=%.1ffps recording=%@", + previewFPS, + analysisFPS, + movieOutput.isRecording ? "true" : "false" + ) + Task { + await WorkerTelemetry.shared.record( + "iphone_camera_fps", + source: "ios_app", + stage: "camera", + metricValue: previewFPS, + metricUnit: "fps", + payload: [ + "preview_fps": previewFPS, + "analysis_fps": analysisFPS, + "recording": movieOutput.isRecording + ] + ) + } - onFrameCaptured?(image) + statsWindowStart = now + sampleFrameCount = 0 + analysisFrameCount = 0 } } diff --git a/samples/CameraAccess/CameraAccessTests/CameraAccessTests.swift b/samples/CameraAccess/CameraAccessTests/CameraAccessTests.swift index 5e9a28d5..ab190be9 100644 --- a/samples/CameraAccess/CameraAccessTests/CameraAccessTests.swift +++ b/samples/CameraAccess/CameraAccessTests/CameraAccessTests.swift @@ -8,12 +8,14 @@ import Foundation import MWDATCore -import MWDATMockDevice import SwiftUI import XCTest @testable import CameraAccess +#if canImport(MWDATMockDevice) +import MWDATMockDevice + @MainActor class ViewModelIntegrationTests: XCTestCase { @@ -156,3 +158,1251 @@ class ViewModelIntegrationTests: XCTestCase { XCTAssertTrue([.stopped, .waiting].contains(viewModel.streamingStatus)) } } +#endif + +final class BackendBootstrapDecodingTests: XCTestCase { + func testBootstrapPayloadAcceptsCloudSQLNumericStringsAndNumericStepDurations() throws { + let json = """ + { + "worker": { + "id": "11111111-1111-1111-1111-111111111111", + "login_code": "EMBC-0001", + "display_name": "Lucas Pereira", + "role": "Kitchen Staff", + "active": true + }, + "device": { + "id": "22222222-2222-4222-8222-222222222222", + "worker_id": "11111111-1111-1111-1111-111111111111", + "platform": "ios", + "device_label": "iPhone" + }, + "queue": [ + { + "shift_assignment_id": "33333333-3333-4333-8333-333333333333", + "worker_id": "11111111-1111-1111-1111-111111111111", + "package_id": "44444444-4444-4444-8444-444444444444", + "package_title": "Meal Prep", + "package_version": "2", + "package_run_id": "55555555-5555-4555-8555-555555555555", + "sop_id": "66666666-6666-4666-8666-666666666666", + "sop_title": "Burger Assembly", + "sop_version": "1", + "sort_order": "1", + "required": "true", + "active": true, + "source_type": "package", + "steps": [ + { + "id": "step-1", + "title": "Prepare the bun", + "duration": 15, + "validation": "visual", + "allowManualComplete": false + }, + { + "index": 1, + "title": "Record temperature log", + "instruction": "Read the temperature logger display.", + "requires_photo": false + } + ] + } + ], + "assigned_packages": [], + "worker_session_token": "worker-token", + "worker_session_expires_at": "2026-05-31T14:43:20.929Z" + } + """ + + let payload = try JSONDecoder().decode(BootstrapPayload.self, from: Data(json.utf8)) + + XCTAssertEqual(payload.worker.loginCode, "EMBC-0001") + XCTAssertEqual(payload.queue.first?.sortOrder, 1) + XCTAssertEqual(payload.queue.first?.packageVersion, 2) + XCTAssertEqual(payload.queue.first?.steps.first?.duration, "15") + XCTAssertEqual(payload.queue.first?.steps.last?.description, "Read the temperature logger display.") + XCTAssertEqual(payload.workerSessionToken, "worker-token") + } + + func testExecutionEventDecodesCloudSQLUuidResponse() throws { + let json = """ + { + "id": "bb81da90-9e5e-491e-ba02-0c91730b35b9", + "session_id": "0089c81d-e79f-407f-a235-a1b84a535c9c", + "event_type": "step_complete", + "payload": { + "source": "vision", + "checked": true, + "step_index": 0 + }, + "created_at": "2026-05-31T02:57:53.933Z", + "workspace_id": "00000000-0000-4000-8000-000000000001" + } + """ + + let event = try JSONDecoder().decode(BackendExecutionEvent.self, from: Data(json.utf8)) + + XCTAssertEqual(event.id, "bb81da90-9e5e-491e-ba02-0c91730b35b9") + XCTAssertEqual(event.sessionID, "0089c81d-e79f-407f-a235-a1b84a535c9c") + XCTAssertEqual(event.eventType, "step_complete") + } +} + +private final class RequestCaptureURLProtocol: URLProtocol { + static var handler: ((URLRequest) throws -> (HTTPURLResponse, Data))? + + override class func canInit(with request: URLRequest) -> Bool { + true + } + + override class func canonicalRequest(for request: URLRequest) -> URLRequest { + request + } + + override func startLoading() { + guard let handler = Self.handler else { + client?.urlProtocol(self, didFailWithError: URLError(.badServerResponse)) + return + } + + do { + let (response, data) = try handler(request) + client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + client?.urlProtocol(self, didLoad: data) + client?.urlProtocolDidFinishLoading(self) + } catch { + client?.urlProtocol(self, didFailWithError: error) + } + } + + override func stopLoading() {} +} + +private struct WorkerUploadTargetRequestCapture: Equatable { + let sessionID: String + let assetType: String + let filename: String + let contentType: String + let byteSize: Int + let source: String? +} + +private struct WorkerAdminAPISnapshot { + let heartbeats: [WorkerLiveHeartbeatRequest] + let uploadTargetRequests: [WorkerUploadTargetRequestCapture] + let uploadCalls: [(assetID: String, byteSize: Int, contentType: String)] + let finalizeRequests: [WorkerMediaFinalizeRequest] + let telemetryBatches: [WorkerTelemetryBatch] + let liveTokenRequests: [(model: String?, sessionID: String?)] + let spotterRequests: [GeminiSpotterRequest] +} + +private func requestBodyData(from request: URLRequest) -> Data? { + if let body = request.httpBody { + return body + } + + guard let stream = request.httpBodyStream else { return nil } + stream.open() + defer { stream.close() } + + let bufferSize = 1024 + let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) + defer { buffer.deallocate() } + + var data = Data() + while stream.hasBytesAvailable { + let bytesRead = stream.read(buffer, maxLength: bufferSize) + if bytesRead < 0 { + return nil + } + if bytesRead == 0 { + break + } + data.append(buffer, count: bytesRead) + } + + return data +} + +private final class WorkerAdminAPIMock: WorkerAdminAPI, @unchecked Sendable { + private let lock = NSLock() + + var heartbeatErrors: [Error] = [] + var uploadTargetErrors: [Error] = [] + var uploadErrors: [Error] = [] + var finalizeErrors: [Error] = [] + var telemetryErrors: [Error] = [] + var liveTokenErrors: [Error] = [] + var spotterErrors: [Error] = [] + var uploadTargetResponses: [WorkerMediaUploadTarget] = [] + var liveTokenResponses: [GeminiLiveTokenResponse] = [] + var spotterResponses: [GeminiSpotterResponse] = [] + var onFinalizeAttempt: ((WorkerMediaFinalizeRequest) -> Void)? + + private var recordedHeartbeats: [WorkerLiveHeartbeatRequest] = [] + private var recordedUploadTargetRequests: [WorkerUploadTargetRequestCapture] = [] + private var recordedUploadCalls: [(assetID: String, byteSize: Int, contentType: String)] = [] + private var recordedFinalizeRequests: [WorkerMediaFinalizeRequest] = [] + private var recordedTelemetryBatches: [WorkerTelemetryBatch] = [] + private var recordedLiveTokenRequests: [(model: String?, sessionID: String?)] = [] + private var recordedSpotterRequests: [GeminiSpotterRequest] = [] + + func sendWorkerLiveHeartbeat(_ heartbeat: WorkerLiveHeartbeatRequest) async throws -> WorkerLiveHeartbeatResponse { + let queuedError = lock.withLock { () -> Error? in + recordedHeartbeats.append(heartbeat) + return heartbeatErrors.isEmpty ? nil : heartbeatErrors.removeFirst() + } + + if let queuedError { + throw queuedError + } + + return WorkerLiveHeartbeatResponse( + sessionID: heartbeat.sessionID, + updatedAt: "2026-05-30T18:31:00.000Z", + isFreshLiveSession: true, + webrtcRoomCode: heartbeat.webrtcRoomCode, + supportMode: heartbeat.helpRequested ? "handoff_requested" : "ai", + aiSessionStatus: heartbeat.helpRequested ? "paused" : "active", + humanSupportStatus: heartbeat.helpRequested ? "ringing" : "none", + shouldOpenLiveRoom: false + ) + } + + func requestWorkerMediaUploadTarget( + sessionID: String, + assetType: String, + filename: String, + contentType: String, + byteSize: Int, + source: String? + ) async throws -> WorkerMediaUploadTarget { + let (queuedError, response) = lock.withLock { () -> (Error?, WorkerMediaUploadTarget) in + recordedUploadTargetRequests.append( + WorkerUploadTargetRequestCapture( + sessionID: sessionID, + assetType: assetType, + filename: filename, + contentType: contentType, + byteSize: byteSize, + source: source + ) + ) + let queuedError = uploadTargetErrors.isEmpty ? nil : uploadTargetErrors.removeFirst() + let response: WorkerMediaUploadTarget + if uploadTargetResponses.isEmpty { + let index = recordedUploadTargetRequests.count + response = WorkerMediaUploadTarget( + assetID: "\(assetType)-asset-\(index)", + bucket: "\(assetType)-bucket", + path: "sessions/\(sessionID)/\(filename)", + uploadURL: "https://upload.example/\(assetType)-\(index)" + ) + } else { + response = uploadTargetResponses.removeFirst() + } + return (queuedError, response) + } + + if let queuedError { + throw queuedError + } + return response + } + + func finalizeWorkerMediaUpload(_ finalize: WorkerMediaFinalizeRequest) async throws { + let (queuedError, finalizeHandler) = lock.withLock { () -> (Error?, ((WorkerMediaFinalizeRequest) -> Void)?) in + recordedFinalizeRequests.append(finalize) + let queuedError = finalizeErrors.isEmpty ? nil : finalizeErrors.removeFirst() + return (queuedError, onFinalizeAttempt) + } + + finalizeHandler?(finalize) + + if let queuedError { + throw queuedError + } + } + + func uploadBinary( + to target: WorkerMediaUploadTarget, + data: Data, + contentType: String + ) async throws { + let queuedError = lock.withLock { () -> Error? in + recordedUploadCalls.append((assetID: target.assetID, byteSize: data.count, contentType: contentType)) + return uploadErrors.isEmpty ? nil : uploadErrors.removeFirst() + } + + if let queuedError { + throw queuedError + } + } + + func sendWorkerTelemetryBatch(_ batch: WorkerTelemetryBatch) async throws { + let queuedError = lock.withLock { () -> Error? in + recordedTelemetryBatches.append(batch) + return telemetryErrors.isEmpty ? nil : telemetryErrors.removeFirst() + } + + if let queuedError { + throw queuedError + } + } + + func requestGeminiLiveToken( + model: String?, + sessionID: String? + ) async throws -> GeminiLiveTokenResponse { + let (queuedError, response) = lock.withLock { () -> (Error?, GeminiLiveTokenResponse) in + recordedLiveTokenRequests.append((model: model, sessionID: sessionID)) + let queuedError = liveTokenErrors.isEmpty ? nil : liveTokenErrors.removeFirst() + let response = liveTokenResponses.isEmpty + ? GeminiLiveTokenResponse( + token: "ephemeral-token", + expiresAt: "2026-05-30T19:00:00.000Z", + newSessionExpiresAt: "2026-05-30T18:31:00.000Z", + model: model ?? GeminiConfig.model, + websocketBaseURL: GeminiConfig.ephemeralTokenWebsocketBaseURL, + queryParameterName: "access_token", + systemInstruction: "Server-built checklist instruction.", + runtimeContext: nil, + diagnosticsID: "test-diagnostics", + provider: "gemini" + ) + : liveTokenResponses.removeFirst() + return (queuedError, response) + } + + if let queuedError { + throw queuedError + } + return response + } + + func requestGeminiSpotter(_ request: GeminiSpotterRequest) async throws -> GeminiSpotterResponse { + let (queuedError, response) = lock.withLock { () -> (Error?, GeminiSpotterResponse) in + recordedSpotterRequests.append(request) + let queuedError = spotterErrors.isEmpty ? nil : spotterErrors.removeFirst() + let response = spotterResponses.isEmpty + ? GeminiSpotterResponse( + matched: true, + confidence: 0.93, + reason: "Clear visual evidence.", + evidenceTimestamp: request.capturedAt, + threshold: 0.88, + model: "gemini-3.5-flash", + autoComplete: true, + modelAutoComplete: nil, + evidenceWindowSatisfied: nil, + activeDurationSatisfied: nil, + elapsedActiveMs: nil, + minActiveSeconds: nil, + stableObservations: nil, + stableObservationsRequired: nil, + advancedToStepIndex: nil, + completedSop: nil, + packageProgressWarning: nil + ) + : spotterResponses.removeFirst() + return (queuedError, response) + } + + if let queuedError { + throw queuedError + } + return response + } + + func snapshot() -> WorkerAdminAPISnapshot { + lock.lock() + defer { lock.unlock() } + return WorkerAdminAPISnapshot( + heartbeats: recordedHeartbeats, + uploadTargetRequests: recordedUploadTargetRequests, + uploadCalls: recordedUploadCalls, + finalizeRequests: recordedFinalizeRequests, + telemetryBatches: recordedTelemetryBatches, + liveTokenRequests: recordedLiveTokenRequests, + spotterRequests: recordedSpotterRequests + ) + } +} + +private actor SleepRecorder { + private(set) var values: [UInt64] = [] + + func record(_ value: UInt64) { + values.append(value) + } + + func snapshot() -> [UInt64] { + values + } +} + +private final class CallOrderRecorder: @unchecked Sendable { + private let lock = NSLock() + private var storedValues: [String] = [] + + func append(_ value: String) { + lock.lock() + storedValues.append(value) + lock.unlock() + } + + var values: [String] { + lock.lock() + defer { lock.unlock() } + return storedValues + } +} + +final class WorkerAdminLiveSessionCoordinatorTests: XCTestCase { + private func makeTempFile(data: Data, suffix: String = UUID().uuidString) throws -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("worker-admin-\(suffix)") + .appendingPathExtension("mp4") + try? FileManager.default.removeItem(at: url) + try data.write(to: url) + return url + } + + func testHeartbeatKeepsStickyRoomCodeAndLastFrameLocation() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "frame-asset-1", + bucket: "live-frames", + path: "sessions/session-1/last-frame.jpg", + uploadURL: "https://upload.example/frame-1" + ) + ] + + let frameFinalized = expectation(description: "frame finalized") + api.onFinalizeAttempt = { finalize in + if finalize.assetID == "frame-asset-1", finalize.status == "uploaded" { + frameFinalized.fulfill() + } + } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + await coordinator.start(sessionID: "session-1", currentStepIndex: 0, helpRequested: false) + await coordinator.updateRoomCode("ROOM42") + await coordinator.enqueueFrameUpload(data: Data([0x01, 0x02, 0x03])) + + await fulfillment(of: [frameFinalized], timeout: 1.0) + + await coordinator.updateRoomCode("") + await coordinator.updateHelpRequested(true) + + let snapshot = api.snapshot() + XCTAssertEqual(snapshot.heartbeats.first?.webrtcRoomCode, nil) + XCTAssertEqual(snapshot.heartbeats.last?.webrtcRoomCode, "ROOM42") + XCTAssertEqual(snapshot.heartbeats.last?.lastFrameBucket, "live-frames") + XCTAssertEqual(snapshot.heartbeats.last?.lastFramePath, "sessions/session-1/last-frame.jpg") + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "uploaded") + } + + func testFrameUploadFinalizesFailedWhenUploadFailsAfterRetries() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "frame-asset-2", + bucket: "live-frames", + path: "sessions/session-2/last-frame.jpg", + uploadURL: "https://upload.example/frame-2" + ) + ] + api.uploadErrors = [ + URLError(.networkConnectionLost), + URLError(.networkConnectionLost), + URLError(.networkConnectionLost), + URLError(.networkConnectionLost), + ] + + let sleepRecorder = SleepRecorder() + let frameFailed = expectation(description: "frame failed finalize") + api.onFinalizeAttempt = { finalize in + if finalize.assetID == "frame-asset-2", finalize.status == "failed" { + frameFailed.fulfill() + } + } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + heartbeatIntervalNanoseconds: 0, + sleeper: { value in + await sleepRecorder.record(value) + } + ) + + await coordinator.start(sessionID: "session-2", currentStepIndex: 0, helpRequested: false) + await coordinator.enqueueFrameUpload(data: Data([0x0A, 0x0B])) + + await fulfillment(of: [frameFailed], timeout: 1.0) + + let snapshot = api.snapshot() + let recordedSleeps = await sleepRecorder.snapshot() + XCTAssertEqual(snapshot.uploadCalls.count, 4) + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "failed") + XCTAssertEqual(recordedSleeps, [750_000_000, 1_500_000_000, 3_000_000_000]) + } + + func testVideoUploadFinalizesFailedWhenRecordingIsMissing() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-1", + bucket: "execution-videos", + path: "sessions/session-3/recording.mp4", + uploadURL: "https://upload.example/video-1" + ) + ] + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-3", + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + let result = await coordinator.uploadVideoRecording(from: nil) + let snapshot = api.snapshot() + + XCTAssertEqual(result.uploadState, "failed") + XCTAssertEqual(snapshot.uploadTargetRequests.last?.byteSize, 0) + XCTAssertEqual(snapshot.uploadTargetRequests.last?.source, "session-recording") + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "failed") + XCTAssertTrue(snapshot.uploadCalls.isEmpty) + } + + func testPrepareVideoRecordingUploadReturnsPendingWithoutUploading() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-2", + bucket: "execution-videos", + path: "sessions/session-4/recording.mp4", + uploadURL: "https://upload.example/video-2" + ) + ] + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-4", + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + let preparedResult = await coordinator.prepareVideoRecordingUpload(source: "stream-capture") + let prepared = try XCTUnwrap(preparedResult) + let snapshot = api.snapshot() + + XCTAssertEqual(prepared.result.uploadState, "pending") + XCTAssertEqual(prepared.result.assetID, "video-asset-2") + XCTAssertEqual(prepared.result.byteSize, 0) + XCTAssertEqual(snapshot.uploadTargetRequests.last?.byteSize, 0) + XCTAssertEqual(snapshot.uploadTargetRequests.last?.source, "stream-capture") + XCTAssertTrue(snapshot.uploadCalls.isEmpty) + XCTAssertTrue(snapshot.finalizeRequests.isEmpty) + } + + func testCompleteSessionEndsBeforePreparedVideoUpload() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-2", + bucket: "execution-videos", + path: "sessions/session-4/recording.mp4", + uploadURL: "https://upload.example/video-2" + ) + ] + + let callOrder = CallOrderRecorder() + api.onFinalizeAttempt = { finalize in + if finalize.assetID == "video-asset-2", finalize.status == "uploaded" { + callOrder.append("finalize") + } + } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + await coordinator.start(sessionID: "session-4", currentStepIndex: 0, helpRequested: false) + let preparedResult = await coordinator.prepareVideoRecordingUpload(source: "stream-capture") + let prepared = try XCTUnwrap(preparedResult) + let result = await coordinator.completeSession(pendingVideoUpload: prepared) { + callOrder.append("end") + } + let completionSnapshot = api.snapshot() + + XCTAssertEqual(result.uploadState, "pending") + XCTAssertEqual(callOrder.values, ["end"]) + XCTAssertTrue(completionSnapshot.uploadCalls.isEmpty) + XCTAssertTrue(completionSnapshot.finalizeRequests.isEmpty) + } + + func testPreparedVideoUploadReusesReservedAssetID() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-2", + bucket: "execution-videos", + path: "sessions/session-4/recording.mp4", + uploadURL: "https://upload.example/video-2" + ) + ] + + let fileURL = try makeTempFile(data: Data([0x01, 0x02, 0x03]), suffix: "prepared-reuse") + defer { try? FileManager.default.removeItem(at: fileURL) } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-4", + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + let preparedResult = await coordinator.prepareVideoRecordingUpload(source: "stream-capture") + let prepared = try XCTUnwrap(preparedResult) + let result = await coordinator.uploadPreparedVideoRecording( + from: fileURL, + preparedUpload: prepared + ) + let snapshot = api.snapshot() + + XCTAssertTrue(result.succeeded) + XCTAssertEqual(snapshot.uploadCalls.last?.assetID, "video-asset-2") + XCTAssertEqual(snapshot.finalizeRequests.last?.assetID, "video-asset-2") + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "uploaded") + } + + func testPreparedVideoUploadFinalizesFailedWhenRecordingIsMissing() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-missing", + bucket: "execution-videos", + path: "sessions/session-missing/recording.mp4", + uploadURL: "https://upload.example/video-missing" + ) + ] + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-missing", + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + let preparedResult = await coordinator.prepareVideoRecordingUpload(source: "phone-recording") + let prepared = try XCTUnwrap(preparedResult) + let result = await coordinator.uploadPreparedVideoRecording( + from: nil, + preparedUpload: prepared + ) + let snapshot = api.snapshot() + + XCTAssertEqual(result.uploadState, "failed") + XCTAssertEqual(result.assetID, "video-asset-missing") + XCTAssertTrue(snapshot.uploadCalls.isEmpty) + XCTAssertEqual(snapshot.finalizeRequests.last?.assetID, "video-asset-missing") + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "failed") + } + + func testVideoFinalizeRetriesTransientErrorsThenSucceeds() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-3", + bucket: "execution-videos", + path: "sessions/session-5/recording.mp4", + uploadURL: "https://upload.example/video-3" + ) + ] + api.finalizeErrors = [ + URLError(.timedOut), + URLError(.networkConnectionLost), + ] + + let sleepRecorder = SleepRecorder() + let fileURL = try makeTempFile(data: Data([0xAB, 0xCD, 0xEF]), suffix: "retry") + defer { try? FileManager.default.removeItem(at: fileURL) } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-5", + heartbeatIntervalNanoseconds: 0, + sleeper: { value in + await sleepRecorder.record(value) + } + ) + + let result = await coordinator.uploadVideoRecording(from: fileURL) + let snapshot = api.snapshot() + let recordedSleeps = await sleepRecorder.snapshot() + + XCTAssertTrue(result.succeeded) + XCTAssertEqual(snapshot.uploadTargetRequests.last?.source, "session-recording") + XCTAssertEqual( + snapshot.finalizeRequests.filter { $0.assetID == "video-asset-3" && $0.status == "uploaded" }.count, + 3 + ) + XCTAssertEqual(recordedSleeps, [750_000_000, 1_500_000_000]) + } + + func testVideoUploadIncludesExplicitRecordingSource() async throws { + let api = WorkerAdminAPIMock() + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-4", + bucket: "execution-videos", + path: "sessions/session-6/recording.mp4", + uploadURL: "https://upload.example/video-4" + ) + ] + + let fileURL = try makeTempFile(data: Data([0x11, 0x22, 0x33]), suffix: "phone-source") + defer { try? FileManager.default.removeItem(at: fileURL) } + + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "session-6", + heartbeatIntervalNanoseconds: 0, + sleeper: { _ in } + ) + + let result = await coordinator.uploadVideoRecording(from: fileURL, source: "phone-recording") + let snapshot = api.snapshot() + + XCTAssertTrue(result.succeeded) + XCTAssertEqual(snapshot.uploadTargetRequests.last?.source, "phone-recording") + } + + func testTelemetryBatchFlushesSanitizedPayload() async throws { + let api = WorkerAdminAPIMock() + let telemetry = WorkerTelemetry( + api: api, + sessionID: "11111111-1111-1111-1111-111111111111", + deviceID: "iphone-test", + appBuild: "test-build", + flushIntervalNanoseconds: 0, + maxBatchSize: 20 + ) + + await telemetry.record( + "video_upload_success", + source: "media_upload", + stage: "uploaded", + durationMs: 42, + metricValue: 1024, + metricUnit: "bytes", + payload: [ + "token": "secret-token", + "uploadUrl": "https://signed.example/upload?token=secret", + "image_data": "data:image/jpeg;base64,abcd", + "asset_type": "video" + ] + ) + await telemetry.flush() + + let snapshot = api.snapshot() + let batch = try XCTUnwrap(snapshot.telemetryBatches.first) + XCTAssertEqual(batch.sessionID, "11111111-1111-1111-1111-111111111111") + XCTAssertEqual(batch.deviceID, "iphone-test") + XCTAssertEqual(batch.appBuild, "test-build") + XCTAssertEqual(batch.events.first?.name, "video_upload_success") + XCTAssertEqual(batch.events.first?.payload["token"] as? String, "[redacted]") + XCTAssertEqual(batch.events.first?.payload["uploadUrl"] as? String, "[redacted-url]") + XCTAssertEqual(batch.events.first?.payload["image_data"] as? String, "[redacted-raw-payload]") + XCTAssertEqual(batch.events.first?.payload["asset_type"] as? String, "video") + } + + func testTelemetryFailureDoesNotBlockCoordinatorUpload() async throws { + let api = WorkerAdminAPIMock() + api.telemetryErrors = [URLError(.timedOut)] + api.uploadTargetResponses = [ + WorkerMediaUploadTarget( + assetID: "video-asset-telemetry", + bucket: "execution-videos", + path: "sessions/session-telemetry/recording.mp4", + uploadURL: "https://upload.example/video-telemetry" + ) + ] + let telemetry = WorkerTelemetry( + api: api, + sessionID: "22222222-2222-2222-2222-222222222222", + flushIntervalNanoseconds: 0, + maxBatchSize: 100 + ) + let fileURL = try makeTempFile(data: Data([0x41, 0x42, 0x43]), suffix: "telemetry") + defer { try? FileManager.default.removeItem(at: fileURL) } + let coordinator = WorkerAdminLiveSessionCoordinator( + api: api, + sessionID: "22222222-2222-2222-2222-222222222222", + heartbeatIntervalNanoseconds: 0, + telemetry: telemetry, + sleeper: { _ in } + ) + + let result = await coordinator.uploadVideoRecording(from: fileURL) + await telemetry.flush() + + let snapshot = api.snapshot() + XCTAssertTrue(result.succeeded) + XCTAssertEqual(snapshot.finalizeRequests.last?.status, "uploaded") + XCTAssertEqual(snapshot.telemetryBatches.count, 1) + } +} + +@MainActor +final class GeminiInstructionSyncTests: XCTestCase { + override func setUp() async throws { + try await super.setUp() + try? Wearables.configure() + } + + func testGeminiInstructionPreviewIncludesActiveStepContext() async throws { + let viewModel = StreamSessionViewModel(wearables: Wearables.shared) + let sop = SOPTemplate( + name: "Cold Chain Verification SOP", + steps: [ + SOPStepTemplate( + id: "inspect_packaging_seal", + order: 1, + title: "Inspect packaging seal", + description: "Check the package seal before accepting the delivery.", + aiPrompt: "Confirm the seal is intact before intake.", + expectedObjects: ["seal", "package"] + ), + SOPStepTemplate( + id: "record_temperature_log", + order: 2, + title: "Record temperature log", + description: "Read the thermometer and capture the result in the log.", + aiPrompt: "Verify that the worker recorded the temperature reading.", + expectedObjects: ["thermometer", "clipboard"] + ), + ] + ) + + viewModel.checklistItems = [ + ChecklistItemState( + itemID: "inspect_packaging_seal", + name: "Inspect packaging seal", + description: "Check the package seal before accepting the delivery.", + aiPrompt: "Confirm the seal is intact before intake.", + expectedObjects: ["seal", "package"], + isChecked: true, + completionSource: .manual + ), + ChecklistItemState( + itemID: "record_temperature_log", + name: "Record temperature log", + description: "Read the thermometer and capture the result in the log.", + aiPrompt: "Verify that the worker recorded the temperature reading.", + expectedObjects: ["thermometer", "clipboard"] + ), + ] + + let instruction = try XCTUnwrap(viewModel.debugGeminiInstructionPreview(for: sop)) + + XCTAssertTrue(instruction.contains("SOP title: Cold Chain Verification SOP")) + XCTAssertTrue(instruction.contains("Step title: Record temperature log")) + XCTAssertTrue(instruction.contains("Step description: Read the thermometer and capture the result in the log.")) + XCTAssertTrue(instruction.contains("Vision completion prompt: Verify that the worker recorded the temperature reading.")) + XCTAssertTrue(instruction.contains("Expected objects to look for: thermometer, clipboard")) + XCTAssertTrue(instruction.contains("Direct next action: Guide the worker through this step now: Record temperature log.")) + XCTAssertTrue(viewModel.geminiInstructionSyncStatus.contains("Record temperature log")) + } + + func testSpotterTargetsOnlyCurrentIncompleteStep() async throws { + let viewModel = StreamSessionViewModel(wearables: Wearables.shared) + + viewModel.checklistItems = [ + ChecklistItemState( + itemID: "inspect_packaging_seal", + name: "Inspect packaging seal", + aiPrompt: "Confirm the seal is intact before intake.", + expectedObjects: ["seal", "package"], + isChecked: true, + completionSource: .manual + ), + ChecklistItemState( + itemID: "record_temperature_log", + name: "Record temperature log", + critical: true, + aiPrompt: "Verify that the worker recorded the temperature reading.", + expectedObjects: ["thermometer", "clipboard"] + ), + ChecklistItemState( + itemID: "stage_delivery", + name: "Stage delivery", + aiPrompt: "Confirm the package is staged for pickup.", + expectedObjects: ["package", "pickup shelf"] + ), + ] + + XCTAssertEqual(viewModel.debugSpotterTargetIDs(), ["record_temperature_log"]) + } +} + +@MainActor +final class AssignmentDrivenFlowTests: XCTestCase { + override func setUp() async throws { + try await super.setUp() + try? Wearables.configure() + } + + func testExplicitHomeSelectionStartsFirstPendingSOP() async throws { + let viewModel = StreamSessionViewModel(wearables: Wearables.shared) + let secondAssignment = SOPTemplate( + name: "Second assigned SOP", + items: ["Inspect secondary station"], + packageTitle: "Line A", + sortOrder: 2 + ) + let firstAssignment = SOPTemplate( + name: "First assigned SOP", + items: ["Inspect primary station"], + packageTitle: "Line A", + sortOrder: 1 + ) + + viewModel.availableSOPs = [secondAssignment, firstAssignment] + viewModel.startCurrentAssignmentFromHome() + + XCTAssertEqual(viewModel.currentAssignedSOP?.name, "First assigned SOP") + XCTAssertEqual(viewModel.selectedSOP?.name, "First assigned SOP") + XCTAssertEqual(viewModel.activeCaptureSOP?.name, "First assigned SOP") + XCTAssertEqual(viewModel.preferredCaptureMode, .iPhone) + } + + func testExplicitHomeSelectionPrefersGlassesWhenAvailable() async throws { + let viewModel = StreamSessionViewModel(wearables: Wearables.shared) + viewModel.hasActiveDevice = true + viewModel.availableSOPs = [ + SOPTemplate( + name: "Assigned SOP", + items: ["Inspect station"], + packageTitle: "Line A", + sortOrder: 1 + ) + ] + + viewModel.startCurrentAssignmentFromHome() + + XCTAssertEqual(viewModel.activeCaptureSOP?.name, "Assigned SOP") + XCTAssertEqual(viewModel.preferredCaptureMode, .glasses) + } +} + +final class SpotterEvidenceWindowTests: XCTestCase { + func testRequiresMultiplePositiveSamplesBeforeAutoComplete() { + var window = SpotterEvidenceWindow() + + let first = window.record( + stepID: "seal-check", + matched: true, + autoComplete: true, + confidence: 0.92, + threshold: 0.88 + ) + let second = window.record( + stepID: "seal-check", + matched: true, + autoComplete: true, + confidence: 0.91, + threshold: 0.88 + ) + let third = window.record( + stepID: "seal-check", + matched: true, + autoComplete: true, + confidence: 0.9, + threshold: 0.88 + ) + + XCTAssertFalse(first.shouldAutoComplete) + XCTAssertFalse(second.shouldAutoComplete) + XCTAssertTrue(third.shouldAutoComplete) + XCTAssertEqual(third.positiveCount, 3) + } + + func testNegativeSamplesPreventStableAutoComplete() { + var window = SpotterEvidenceWindow() + + _ = window.record(stepID: "seal-check", matched: true, autoComplete: true, confidence: 0.92, threshold: 0.88) + _ = window.record(stepID: "seal-check", matched: false, autoComplete: false, confidence: 0.2, threshold: 0.88) + let decision = window.record( + stepID: "seal-check", + matched: true, + autoComplete: true, + confidence: 0.91, + threshold: 0.88 + ) + + XCTAssertFalse(decision.shouldAutoComplete) + XCTAssertEqual(decision.sampleCount, 3) + XCTAssertEqual(decision.positiveCount, 2) + } +} + +final class OpsAPIClientRoutingTests: XCTestCase { + override func tearDown() { + RequestCaptureURLProtocol.handler = nil + super.tearDown() + } + + func testWorkerRoutesUseAdminBaseURL() async throws { + let settings = SettingsManager.shared + let originalOpsBaseURL = settings.opsBaseURL + let originalAdminBaseURL = settings.adminBaseURL + let originalBearerToken = settings.workerAPIBearerToken + + settings.opsBaseURL = "https://ops.example.test" + settings.adminBaseURL = "http://admin.example.test:3001" + settings.workerAPIBearerToken = "worker-bearer-token" + defer { + settings.opsBaseURL = originalOpsBaseURL + settings.adminBaseURL = originalAdminBaseURL + settings.workerAPIBearerToken = originalBearerToken + } + + let lock = NSLock() + var capturedRequests: [URLRequest] = [] + RequestCaptureURLProtocol.handler = { request in + lock.withLock { + capturedRequests.append(request) + } + + let body: Data + switch request.url?.path { + case "/health": + body = Data(#"{"status":"ok","service":"ops"}"#.utf8) + case "/api/worker/live/heartbeat": + body = Data( + #"{"sessionId":"11111111-1111-1111-1111-111111111111","updatedAt":"2026-05-30T18:31:00.000Z","isFreshLiveSession":true,"webrtcRoomCode":"ROOM42","supportMode":"handoff_requested","aiSessionStatus":"paused","humanSupportStatus":"ringing","supportUpdatedAt":"2026-05-30T18:31:00.000Z","shouldOpenLiveRoom":false}"# + .utf8 + ) + case "/api/worker/media/upload-target": + body = Data( + #"{"assetId":"video-asset-1","bucket":"execution-videos","path":"sessions/session-1/recording.mp4","uploadUrl":"https://upload.example/video-1"}"# + .utf8 + ) + case "/api/worker/gemini/live-token": + body = Data( + #"{"token":"ephemeral-token","expiresAt":"2026-05-30T19:00:00.000Z","newSessionExpiresAt":"2026-05-30T18:31:00.000Z","model":"gemini-live-2.5-flash-native-audio","websocketBaseURL":"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained","queryParameterName":"access_token"}"# + .utf8 + ) + case "/api/worker/gemini/spotter": + body = Data( + #"{"matched":true,"confidence":0.93,"reason":"Clear visual evidence.","evidenceTimestamp":"2026-05-30T18:31:00.000Z","threshold":0.88,"model":"gemini-3.5-flash","autoComplete":true}"# + .utf8 + ) + default: + body = Data("{}".utf8) + } + + let response = HTTPURLResponse( + url: try XCTUnwrap(request.url), + statusCode: 200, + httpVersion: nil, + headerFields: ["Content-Type": "application/json"] + ) + return (try XCTUnwrap(response), body) + } + + let configuration = URLSessionConfiguration.ephemeral + configuration.protocolClasses = [RequestCaptureURLProtocol.self] + let client = OpsAPIClient(session: URLSession(configuration: configuration)) + + let health = try await client.health() + XCTAssertEqual(health, "ok:ops") + + try await client.sendWorkerLiveHeartbeat( + WorkerLiveHeartbeatRequest( + sessionID: "11111111-1111-1111-1111-111111111111", + webrtcRoomCode: "ROOM42", + currentStepIndex: 2, + helpRequested: true, + status: "active", + lastFrameBucket: "live-frames", + lastFramePath: "sessions/session-1/last-frame.jpg" + ) + ) + + _ = try await client.requestWorkerMediaUploadTarget( + sessionID: "11111111-1111-1111-1111-111111111111", + assetType: "video", + filename: "recording.mp4", + contentType: "video/mp4", + byteSize: 256, + source: "phone-recording" + ) + + _ = try await client.requestGeminiLiveToken( + model: "models/gemini-live-2.5-flash-native-audio", + sessionID: "11111111-1111-1111-1111-111111111111" + ) + + _ = try await client.requestGeminiSpotter( + GeminiSpotterRequest( + sessionID: "11111111-1111-1111-1111-111111111111", + stepID: "step-1", + stepTitle: "Check seal", + aiPrompt: "Confirm the seal is visible.", + expectedObjects: ["seal"], + preconditions: ["Package is in view"], + postconditions: ["Seal is visible"], + skipRisk: "medium", + evidenceRequired: true, + imageBase64: "ZmFrZQ==", + imageMimeType: "image/jpeg", + capturedAt: "2026-05-30T18:31:00.000Z", + critical: false, + allowAIComplete: true, + elapsedActiveMs: nil + ) + ) + + let requests = lock.withLock { capturedRequests } + XCTAssertEqual(requests.map { $0.url?.host }, ["ops.example.test", "admin.example.test", "admin.example.test", "admin.example.test", "admin.example.test"]) + XCTAssertEqual(requests.map { $0.url?.path }, ["/health", "/api/worker/live/heartbeat", "/api/worker/media/upload-target", "/api/worker/gemini/live-token", "/api/worker/gemini/spotter"]) + XCTAssertNil(requests.first?.value(forHTTPHeaderField: "Authorization")) + XCTAssertEqual(requests.dropFirst().first?.value(forHTTPHeaderField: "Authorization"), "Bearer worker-bearer-token") + let uploadPayload = try XCTUnwrap( + JSONSerialization.jsonObject(with: try XCTUnwrap(requestBodyData(from: requests[2]))) as? [String: Any] + ) + XCTAssertEqual(uploadPayload["source"] as? String, "phone-recording") + let tokenPayload = try XCTUnwrap( + JSONSerialization.jsonObject(with: try XCTUnwrap(requestBodyData(from: requests[3]))) as? [String: Any] + ) + XCTAssertEqual(tokenPayload["model"] as? String, "models/gemini-live-2.5-flash-native-audio") + XCTAssertEqual(tokenPayload["sessionId"] as? String, "11111111-1111-1111-1111-111111111111") + let spotterPayload = try XCTUnwrap( + JSONSerialization.jsonObject(with: try XCTUnwrap(requests.last.flatMap(requestBodyData(from:)))) as? [String: Any] + ) + XCTAssertEqual(spotterPayload["stepId"] as? String, "step-1") + XCTAssertEqual(spotterPayload["allowAIComplete"] as? Bool, true) + XCTAssertEqual(spotterPayload["imageBase64"] as? String, "ZmFrZQ==") + } + + func testWorkerRouteErrorsMentionAdminIngest() async throws { + let settings = SettingsManager.shared + let originalAdminBaseURL = settings.adminBaseURL + let originalBearerToken = settings.workerAPIBearerToken + + settings.adminBaseURL = "http://admin.example.test:3001" + settings.workerAPIBearerToken = "worker-bearer-token" + defer { + settings.adminBaseURL = originalAdminBaseURL + settings.workerAPIBearerToken = originalBearerToken + } + + RequestCaptureURLProtocol.handler = { request in + let response = HTTPURLResponse( + url: try XCTUnwrap(request.url), + statusCode: 404, + httpVersion: nil, + headerFields: ["Content-Type": "text/html"] + ) + let body = Data("
Cannot POST /api/worker/media/upload-target
".utf8) + return (try XCTUnwrap(response), body) + } + + let configuration = URLSessionConfiguration.ephemeral + configuration.protocolClasses = [RequestCaptureURLProtocol.self] + let client = OpsAPIClient(session: URLSession(configuration: configuration)) + + do { + _ = try await client.requestWorkerMediaUploadTarget( + sessionID: "11111111-1111-1111-1111-111111111111", + assetType: "video", + filename: "recording.mp4", + contentType: "video/mp4", + byteSize: 256, + source: "stream-capture" + ) + XCTFail("Expected admin ingest request to fail") + } catch { + XCTAssertTrue(error.localizedDescription.contains("Admin ingest returned HTTP 404")) + XCTAssertFalse(error.localizedDescription.localizedCaseInsensitiveContains("ops-api")) + XCTAssertTrue(error.localizedDescription.contains("/api/worker/media/upload-target")) + } + } + + func testTelemetryRouteUsesAdminBaseURL() async throws { + let settings = SettingsManager.shared + let originalAdminBaseURL = settings.adminBaseURL + let originalBearerToken = settings.workerAPIBearerToken + + settings.adminBaseURL = "http://admin.example.test:3001" + settings.workerAPIBearerToken = "worker-bearer-token" + defer { + settings.adminBaseURL = originalAdminBaseURL + settings.workerAPIBearerToken = originalBearerToken + } + + let lock = NSLock() + var capturedRequests: [URLRequest] = [] + RequestCaptureURLProtocol.handler = { request in + lock.withLock { + capturedRequests.append(request) + } + + let response = HTTPURLResponse( + url: try XCTUnwrap(request.url), + statusCode: 202, + httpVersion: nil, + headerFields: ["Content-Type": "application/json"] + ) + return (try XCTUnwrap(response), Data(#"{"accepted":1,"inserted":1}"#.utf8)) + } + + let configuration = URLSessionConfiguration.ephemeral + configuration.protocolClasses = [RequestCaptureURLProtocol.self] + let client = OpsAPIClient(session: URLSession(configuration: configuration)) + + try await client.sendWorkerTelemetryBatch( + WorkerTelemetryBatch( + sessionID: "11111111-1111-1111-1111-111111111111", + deviceID: "iphone-test", + workerID: nil, + platform: "ios", + appBuild: "test-build", + events: [ + WorkerTelemetryEvent( + name: "heartbeat_result", + source: "ios_app", + stage: "heartbeat", + payload: ["status": "active"] + ) + ] + ) + ) + + let request = try XCTUnwrap(lock.withLock { capturedRequests.first }) + XCTAssertEqual(request.url?.host, "admin.example.test") + XCTAssertEqual(request.url?.path, "/api/worker/telemetry") + XCTAssertEqual(request.value(forHTTPHeaderField: "Authorization"), "Bearer worker-bearer-token") + let payload = try XCTUnwrap( + JSONSerialization.jsonObject(with: try XCTUnwrap(requestBodyData(from: request))) as? [String: Any] + ) + XCTAssertEqual(payload["sessionId"] as? String, "11111111-1111-1111-1111-111111111111") + XCTAssertEqual(payload["deviceId"] as? String, "iphone-test") + XCTAssertEqual(payload["platform"] as? String, "ios") + XCTAssertEqual((payload["events"] as? [[String: Any]])?.first?["name"] as? String, "heartbeat_result") + } +} diff --git a/samples/CameraAccess/server/Dockerfile b/samples/CameraAccess/server/Dockerfile index 0b977be1..54eed006 100644 --- a/samples/CameraAccess/server/Dockerfile +++ b/samples/CameraAccess/server/Dockerfile @@ -1,7 +1,7 @@ FROM node:20-alpine WORKDIR /app COPY package*.json ./ -RUN npm ci --production +RUN npm install --omit=dev COPY . . EXPOSE 8080 CMD ["node", "index.js"] diff --git a/samples/CameraAccess/server/index.js b/samples/CameraAccess/server/index.js index dbb9149d..91b5a06d 100644 --- a/samples/CameraAccess/server/index.js +++ b/samples/CameraAccess/server/index.js @@ -1,43 +1,88 @@ +const crypto = require("crypto"); const http = require("http"); const fs = require("fs"); const path = require("path"); const { WebSocketServer } = require("ws"); const PORT = process.env.PORT || 8080; +const NODE_ENV = process.env.NODE_ENV || "development"; +const IS_PRODUCTION = NODE_ENV === "production"; const rooms = new Map(); // roomCode -> { creator: ws, viewer: ws, destroyTimer: timeout|null } // Grace period (ms) before destroying a room when creator disconnects. // Allows the iOS user to switch apps (e.g. copy room code, send via WhatsApp) and come back. const ROOM_GRACE_PERIOD_MS = 60_000; -// TURN: ExpressTURN (1000 GB/month free, reliable) -// Ports 3478 (standard), 80, 443 (firewall bypass) -const EXPRESSTURN_SERVER = process.env.EXPRESSTURN_SERVER || "free.expressturn.com"; -const EXPRESSTURN_USER = process.env.EXPRESSTURN_USER || "efPU52K4SLOQ34W2QY"; -const EXPRESSTURN_PASS = process.env.EXPRESSTURN_PASS || "1TJPNFxHKXrZfelz"; +const STUN_SERVER = process.env.STUN_SERVER || ""; +const TURN_HOST = (process.env.TURN_HOST || "").trim(); +const TURN_PORT = Number(process.env.TURN_PORT || 3478); +const TURN_TLS_PORT = Number(process.env.TURN_TLS_PORT || 5349); +const TURN_TTL_SECONDS = Number(process.env.TURN_TTL_SECONDS || 86400); +const TURN_SHARED_SECRET = (process.env.TURN_SHARED_SECRET || process.env.TURN_SECRET || "").trim(); +const HAS_TURN_CREDENTIALS = Boolean(TURN_HOST && TURN_SHARED_SECRET); + +if (!HAS_TURN_CREDENTIALS) { + console.warn("[TURN] No TURN credentials configured; /api/turn will return 503."); +} function getTurnCredentials() { + const iceServers = []; + + if (STUN_SERVER) { + iceServers.push({ urls: [STUN_SERVER] }); + } + + if (HAS_TURN_CREDENTIALS) { + const username = `${Math.floor(Date.now() / 1000) + TURN_TTL_SECONDS}:support`; + const credential = crypto + .createHmac("sha1", TURN_SHARED_SECRET) + .update(username) + .digest("base64"); + + iceServers.push({ + urls: [ + `stun:${TURN_HOST}:${TURN_PORT}`, + `turn:${TURN_HOST}:${TURN_PORT}?transport=udp`, + `turn:${TURN_HOST}:${TURN_PORT}?transport=tcp`, + `turns:${TURN_HOST}:${TURN_TLS_PORT}?transport=tcp`, + ], + username, + credential, + }); + } + return { - iceServers: [ - { - urls: [ - `turn:${EXPRESSTURN_SERVER}:3478`, - `turn:${EXPRESSTURN_SERVER}:3478?transport=tcp`, - `turn:${EXPRESSTURN_SERVER}:80`, - `turn:${EXPRESSTURN_SERVER}:80?transport=tcp`, - `turns:${EXPRESSTURN_SERVER}:443?transport=tcp`, - ], - username: EXPRESSTURN_USER, - credential: EXPRESSTURN_PASS, - }, - ], + iceServers, }; } // HTTP server for serving the web viewer const httpServer = http.createServer((req, res) => { + if (req.url === "/health") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + ok: true, + status: "ok", + environment: NODE_ENV, + roomCount: rooms.size, + turnConfigured: HAS_TURN_CREDENTIALS, + }) + ); + return; + } + // TURN credentials API endpoint if (req.url === "/api/turn") { + if (!HAS_TURN_CREDENTIALS) { + res.writeHead(503, { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }); + res.end(JSON.stringify({ error: "TURN credentials are not configured." })); + return; + } + const creds = getTurnCredentials(); res.writeHead(200, { "Content-Type": "application/json", @@ -102,18 +147,26 @@ wss.on("connection", (ws, req) => { switch (msg.type) { case "create": { - const code = generateRoomCode(); + const requested = + typeof msg.room_code === "string" && msg.room_code.trim() + ? msg.room_code.trim().toUpperCase() + : null; + const code = requested && !rooms.has(requested) ? requested : generateRoomCode(); rooms.set(code, { creator: ws, viewer: null, destroyTimer: null }); currentRoom = code; role = "creator"; - ws.send(JSON.stringify({ type: "room_created", room: code })); + ws.send(JSON.stringify({ type: "room_created", room: code, room_code: code })); console.log(`[Room] Created: ${code}`); break; } case "rejoin": { // Creator reconnects to an existing room (after app backgrounding) - const room = rooms.get(msg.room); + const code = + typeof msg.room_code === "string" && msg.room_code.trim() + ? msg.room_code.trim().toUpperCase() + : String(msg.room || "").trim().toUpperCase(); + const room = rooms.get(code); if (!room) { ws.send( JSON.stringify({ type: "error", message: "Room not found" }) @@ -124,23 +177,27 @@ wss.on("connection", (ws, req) => { if (room.destroyTimer) { clearTimeout(room.destroyTimer); room.destroyTimer = null; - console.log(`[Room] Creator rejoined, cancelled destroy timer: ${msg.room}`); + console.log(`[Room] Creator rejoined, cancelled destroy timer: ${code}`); } room.creator = ws; - currentRoom = msg.room; + currentRoom = code; role = "creator"; - ws.send(JSON.stringify({ type: "room_rejoined", room: msg.room })); + ws.send(JSON.stringify({ type: "room_rejoined", room: code, room_code: code })); // If viewer is already waiting, trigger a new offer if (room.viewer && room.viewer.readyState === 1) { - ws.send(JSON.stringify({ type: "peer_joined" })); - console.log(`[Room] Viewer already present, notifying rejoined creator: ${msg.room}`); + ws.send(JSON.stringify({ type: "peer_joined", room: code, room_code: code })); + console.log(`[Room] Viewer already present, notifying rejoined creator: ${code}`); } - console.log(`[Room] Creator rejoined: ${msg.room}`); + console.log(`[Room] Creator rejoined: ${code}`); break; } case "join": { - const room = rooms.get(msg.room); + const code = + typeof msg.room_code === "string" && msg.room_code.trim() + ? msg.room_code.trim().toUpperCase() + : String(msg.room || "").trim().toUpperCase(); + const room = rooms.get(code); if (!room) { ws.send( JSON.stringify({ type: "error", message: "Room not found" }) @@ -152,14 +209,14 @@ wss.on("connection", (ws, req) => { return; } room.viewer = ws; - currentRoom = msg.room; + currentRoom = code; role = "viewer"; - ws.send(JSON.stringify({ type: "room_joined" })); + ws.send(JSON.stringify({ type: "room_joined", room: code, room_code: code })); // Notify creator that viewer joined (only if creator is connected) if (room.creator && room.creator.readyState === 1) { - room.creator.send(JSON.stringify({ type: "peer_joined" })); + room.creator.send(JSON.stringify({ type: "peer_joined", room: code, room_code: code })); } - console.log(`[Room] Viewer joined: ${msg.room}`); + console.log(`[Room] Viewer joined: ${code}`); break; } @@ -195,7 +252,7 @@ wss.on("connection", (ws, req) => { const room = rooms.get(currentRoom); const otherPeer = role === "creator" ? room.viewer : room.creator; if (otherPeer && otherPeer.readyState === 1) { - otherPeer.send(JSON.stringify({ type: "peer_left" })); + otherPeer.send(JSON.stringify({ type: "peer_left", room: currentRoom, room_code: currentRoom })); } if (role === "creator") { // Don't destroy immediately -- give the creator a grace period to reconnect diff --git a/samples/CameraAccessAndroid/app/build.gradle.kts b/samples/CameraAccessAndroid/app/build.gradle.kts index c20937c8..27d6dbbb 100644 --- a/samples/CameraAccessAndroid/app/build.gradle.kts +++ b/samples/CameraAccessAndroid/app/build.gradle.kts @@ -12,6 +12,27 @@ plugins { alias(libs.plugins.compose.compiler) } +fun loadEnvFile(projectRoot: File): Map { + val envFile = projectRoot.resolve(".env") + if (!envFile.exists()) return emptyMap() + + return envFile.readLines() + .map { it.trim() } + .filter { it.isNotEmpty() && !it.startsWith("#") && it.contains("=") } + .associate { + val splitIndex = it.indexOf('=') + val key = it.substring(0, splitIndex).trim() + val value = it.substring(splitIndex + 1).trim().removeSurrounding("\"") + key to value + } +} + +fun envValue(envMap: Map, key: String, defaultValue: String): String { + return envMap[key] ?: System.getenv(key) ?: defaultValue +} + +val repoEnv = loadEnvFile(rootProject.projectDir) + android { namespace = "com.meta.wearable.dat.externalsampleapps.cameraaccess" compileSdk = 35 @@ -27,6 +48,10 @@ android { testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" vectorDrawables { useSupportLibrary = true } + + val openClawTailscaleIp = envValue(repoEnv, "OPENCLAW_TAILSCALE_IP", "") + + buildConfigField("String", "OPENCLAW_TAILSCALE_IP", "\"${openClawTailscaleIp}\"") } buildTypes { @@ -44,14 +69,6 @@ android { buildFeatures { compose = true } composeOptions { kotlinCompilerExtensionVersion = "1.5.1" } packaging { resources { excludes += "/META-INF/{AL2.0,LGPL2.1}" } } - signingConfigs { - getByName("debug") { - storeFile = file("sample.keystore") - storePassword = "sample" - keyAlias = "sample" - keyPassword = "sample" - } - } } dependencies { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/Secrets.kt.example b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/Secrets.kt.example index c231b5e5..80ed3a4a 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/Secrets.kt.example +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/Secrets.kt.example @@ -4,6 +4,12 @@ package com.meta.wearable.dat.externalsampleapps.cameraaccess object Secrets { + // OPTIONAL: Stable device ID for SOP/heartbeat telemetry + const val deviceId = "YOUR_DEVICE_UUID" + + // Prototype worker identity used during ops-api bootstrap + const val workerLoginCode = "EMBC-0001" + // REQUIRED: Get your key at https://aistudio.google.com/apikey const val geminiAPIKey = "YOUR_GEMINI_API_KEY" @@ -11,9 +17,17 @@ object Secrets { // Use your Mac's Bonjour hostname (run: scutil --get LocalHostName) const val openClawHost = "http://YOUR_MAC_HOSTNAME.local" const val openClawPort = 18789 + const val openClawTailscaleIP = "srv1338555" + const val openClawBearerToken = "" const val openClawHookToken = "YOUR_OPENCLAW_HOOK_TOKEN" const val openClawGatewayToken = "YOUR_OPENCLAW_GATEWAY_TOKEN" - // OPTIONAL: WebRTC signaling server URL (for live POV streaming) - const val webrtcSignalingURL = "wss://YOUR_SIGNALING_SERVER" + // Operations API URL for worker bootstrap, sessions, events, and media registration. + const val opsBaseURL = "https://ops.embarcaderolabs.cloud" + + // Signaling service URL for live supervisor jump-in. + const val signalBaseURL = "https://signal.embarcaderolabs.cloud" + + // Backward-compatible legacy signaling URL used by older code paths. + const val webrtcSignalingURL = "wss://signal.embarcaderolabs.cloud" } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt index 10ba908e..17431608 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt @@ -21,6 +21,15 @@ object GeminiConfig { val apiKey: String get() = SettingsManager.geminiAPIKey + val workerLoginCode: String + get() = SettingsManager.workerLoginCode + + val opsBaseURL: String + get() = SettingsManager.opsBaseURL + + val signalBaseURL: String + get() = SettingsManager.signalBaseURL + val openClawHost: String get() = SettingsManager.openClawHost @@ -33,6 +42,15 @@ object GeminiConfig { val openClawGatewayToken: String get() = SettingsManager.openClawGatewayToken + val openClawTailscaleIP: String + get() = SettingsManager.openClawTailscaleIP + + val openClawBearerToken: String + get() = SettingsManager.openClawBearerToken + + val deviceId: String + get() = SettingsManager.deviceId + fun websocketURL(): String? { if (apiKey == "YOUR_GEMINI_API_KEY" || apiKey.isEmpty()) return null return "$WEBSOCKET_BASE_URL?key=$apiKey" diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt index d046d306..6643a894 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt @@ -50,6 +50,8 @@ class GeminiLiveService { var onOutputTranscription: ((String) -> Unit)? = null var onToolCall: ((GeminiToolCall) -> Unit)? = null var onToolCallCancellation: ((GeminiToolCallCancellation) -> Unit)? = null + var onSocketOpened: (() -> Unit)? = null + var onSocketClosed: ((String?) -> Unit)? = null // Latency tracking private var lastUserSpeechEnd: Long = 0 @@ -59,6 +61,10 @@ class GeminiLiveService { private val sendExecutor = Executors.newSingleThreadExecutor() private var connectCallback: ((Boolean) -> Unit)? = null private var timeoutTimer: Timer? = null + @Volatile private var latestVideoFrameBase64: String? = null + + val lastVideoFrameBase64: String? + get() = latestVideoFrameBase64 private val client = OkHttpClient.Builder() .readTimeout(0, TimeUnit.MILLISECONDS) @@ -80,6 +86,7 @@ class GeminiLiveService { webSocket = client.newWebSocket(request, object : WebSocketListener() { override fun onOpen(webSocket: WebSocket, response: Response) { Log.d(TAG, "WebSocket opened") + onSocketOpened?.invoke() _connectionState.value = GeminiConnectionState.SettingUp sendSetupMessage() } @@ -98,6 +105,7 @@ class GeminiLiveService { _connectionState.value = GeminiConnectionState.Error(msg) _isModelSpeaking.value = false resolveConnect(false) + onSocketClosed?.invoke(msg) onDisconnected?.invoke(msg) } @@ -106,6 +114,7 @@ class GeminiLiveService { _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false resolveConnect(false) + onSocketClosed?.invoke("Connection closing (code $code: $reason)") onDisconnected?.invoke("Connection closed (code $code: $reason)") } @@ -113,6 +122,7 @@ class GeminiLiveService { Log.d(TAG, "WebSocket closed: $code $reason") _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false + onSocketClosed?.invoke("Connection closed (code $code: $reason)") } }) @@ -134,10 +144,13 @@ class GeminiLiveService { fun disconnect() { timeoutTimer?.cancel() timeoutTimer = null + onSocketClosed?.invoke("Disconnected") webSocket?.close(1000, null) webSocket = null onToolCall = null onToolCallCancellation = null + onSocketOpened = null + onSocketClosed = null _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false resolveConnect(false) @@ -165,6 +178,7 @@ class GeminiLiveService { val baos = ByteArrayOutputStream() bitmap.compress(Bitmap.CompressFormat.JPEG, GeminiConfig.VIDEO_JPEG_QUALITY, baos) val base64 = Base64.encodeToString(baos.toByteArray(), Base64.NO_WRAP) + latestVideoFrameBase64 = base64 val json = JSONObject().apply { put("realtimeInput", JSONObject().apply { put("video", JSONObject().apply { @@ -269,6 +283,7 @@ class GeminiLiveService { val seconds = goAway.optJSONObject("timeLeft")?.optInt("seconds", 0) ?: 0 _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false + onSocketClosed?.invoke("Server closing (time left: ${seconds}s)") onDisconnected?.invoke("Server closing (time left: ${seconds}s)") return } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 31567442..03e2cffd 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -12,12 +12,18 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallRo import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMode import kotlinx.coroutines.Job +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.isActive import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import org.json.JSONArray +import org.json.JSONObject +import java.time.Instant +import java.util.UUID data class GeminiUiState( val isGeminiActive: Boolean = false, @@ -39,12 +45,18 @@ class GeminiSessionViewModel : ViewModel() { val uiState: StateFlow = _uiState.asStateFlow() private val geminiService = GeminiLiveService() + private val sopRelayClient = SopRelayClient() private val openClawBridge = OpenClawBridge() private var toolCallRouter: ToolCallRouter? = null private val audioManager = AudioManager() private val eventClient = OpenClawEventClient() private var lastVideoFrameTime: Long = 0 private var stateObservationJob: Job? = null + private var heartbeatJob: Job? = null + private var heartbeatTimeoutJob: Job? = null + private var currentSopSessionId: String? = null + private var isSopSessionTerminated = true + private var isFinalizingSession = false var streamingMode: StreamingMode = StreamingMode.GLASSES @@ -93,14 +105,23 @@ class GeminiSessionViewModel : ViewModel() { } geminiService.onDisconnected = { reason -> - if (_uiState.value.isGeminiActive) { - stopSession() + if (_uiState.value.isGeminiActive && !isFinalizingSession) { + resetToIdle("Connection lost: ${reason ?: "Unknown error"}") _uiState.value = _uiState.value.copy( errorMessage = "Connection lost: ${reason ?: "Unknown error"}" ) } } + geminiService.onSocketOpened = { + startSopHeartbeatSession() + } + + geminiService.onSocketClosed = { + if (!_uiState.value.isGeminiActive || isFinalizingSession) return@onSocketClosed + finalizeSessionWithReceipt(status = "terminated") + } + // Check OpenClaw and start session viewModelScope.launch { openClawBridge.checkConnection() @@ -111,6 +132,11 @@ class GeminiSessionViewModel : ViewModel() { geminiService.onToolCall = { toolCall -> for (call in toolCall.functionCalls) { + if (call.name == "log_sop_step") { + handleSopLogToolCall(call) + continue + } + toolCallRouter?.handleToolCall(call) { response -> geminiService.sendToolResponse(response) } @@ -182,13 +208,7 @@ class GeminiSessionViewModel : ViewModel() { fun stopSession() { eventClient.disconnect() - toolCallRouter?.cancelAll() - toolCallRouter = null - audioManager.stopCapture() - geminiService.disconnect() - stateObservationJob?.cancel() - stateObservationJob = null - _uiState.value = GeminiUiState() + finalizeSessionWithReceipt(status = "terminated") } fun sendVideoFrameIfThrottled(bitmap: Bitmap) { @@ -209,4 +229,141 @@ class GeminiSessionViewModel : ViewModel() { super.onCleared() stopSession() } + + private fun handleSopLogToolCall(call: com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.GeminiFunctionCall) { + val stepName = call.args["step_name"]?.toString()?.trim().orEmpty() + if (stepName.isEmpty()) { + geminiService.sendToolResponse(buildToolResponse( + callId = call.id, + name = call.name, + result = com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult.Failure("Missing required argument: step_name") + )) + return + } + + val sessionId = currentSopSessionId ?: UUID.randomUUID().toString().also { + currentSopSessionId = it + isSopSessionTerminated = false + } + + val imageBase64 = call.args["frame_data"]?.toString()?.takeIf { it.isNotBlank() } + ?: call.args["image_base64"]?.toString()?.takeIf { it.isNotBlank() } + ?: geminiService.lastVideoFrameBase64.orEmpty() + + viewModelScope.launch(Dispatchers.IO) { + sopRelayClient.postSopLog( + tailscaleIp = GeminiConfig.openClawTailscaleIP, + sessionId = sessionId, + stepName = stepName, + timestampIso8601 = Instant.now().toString(), + imageBase64 = imageBase64 + ) + } + + geminiService.sendToolResponse(buildToolResponse( + callId = call.id, + name = call.name, + result = com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult.Success("SOP step forwarded") + )) + } + + private fun startSopHeartbeatSession() { + if (currentSopSessionId != null && !isSopSessionTerminated) return + + val sessionId = UUID.randomUUID().toString() + currentSopSessionId = sessionId + isSopSessionTerminated = false + + heartbeatJob?.cancel() + heartbeatTimeoutJob?.cancel() + + heartbeatJob = viewModelScope.launch(Dispatchers.IO) { + sopRelayClient.postHeartbeat( + tailscaleIp = GeminiConfig.openClawTailscaleIP, + sessionId = sessionId, + status = "active" + ) + + while (isActive && !isSopSessionTerminated) { + sopRelayClient.postHeartbeat( + tailscaleIp = GeminiConfig.openClawTailscaleIP, + sessionId = sessionId, + status = "active" + ) + delay(3_000) + } + } + + heartbeatTimeoutJob = viewModelScope.launch { + delay(60_000) + finalizeSessionWithReceipt(status = "terminated") + } + } + + private fun finalizeSessionWithReceipt(status: String) { + if (isFinalizingSession) return + + val sessionId = currentSopSessionId + if (sessionId == null) { + resetToIdle(null) + return + } + if (isSopSessionTerminated) { + resetToIdle(null) + return + } + + isFinalizingSession = true + isSopSessionTerminated = true + heartbeatJob?.cancel() + heartbeatJob = null + heartbeatTimeoutJob?.cancel() + heartbeatTimeoutJob = null + + viewModelScope.launch(Dispatchers.IO) { + val receiptMessage = sopRelayClient.postHeartbeatForReceipt( + tailscaleIp = GeminiConfig.openClawTailscaleIP, + sessionId = sessionId, + status = status + ) + + withContext(Dispatchers.Main) { + resetToIdle(receiptMessage) + } + } + } + + private fun resetToIdle(receiptMessage: String?) { + eventClient.disconnect() + geminiService.onDisconnected = null + geminiService.onSocketClosed = null + geminiService.onSocketOpened = null + + toolCallRouter?.cancelAll() + toolCallRouter = null + audioManager.stopCapture() + geminiService.disconnect() + stateObservationJob?.cancel() + stateObservationJob = null + + _uiState.value = GeminiUiState(errorMessage = receiptMessage) + isFinalizingSession = false + currentSopSessionId = null + } + + private fun buildToolResponse( + callId: String, + name: String, + result: com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult + ): JSONObject { + return JSONObject().apply { + put("toolResponse", JSONObject().apply { + put("functionResponses", JSONArray().put(JSONObject().apply { + put("id", callId) + put("name", name) + put("response", result.toJSON()) + })) + }) + } + } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/SopRelayClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/SopRelayClient.kt new file mode 100644 index 00000000..d309a55c --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/SopRelayClient.kt @@ -0,0 +1,144 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini + +import android.util.Log +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody.Companion.toRequestBody +import java.util.concurrent.TimeUnit +import org.json.JSONObject + +class SopRelayClient { + companion object { + private const val TAG = "SopRelayClient" + private const val JSON_MEDIA_TYPE = "application/json; charset=utf-8" + private const val MAX_RETRIES = 3 + } + + private val client = OkHttpClient.Builder() + .connectTimeout(5, TimeUnit.SECONDS) + .readTimeout(10, TimeUnit.SECONDS) + .writeTimeout(10, TimeUnit.SECONDS) + .build() + + fun postSopLog( + tailscaleIp: String, + sessionId: String, + stepName: String, + timestampIso8601: String, + imageBase64: String + ) { + if (tailscaleIp.isBlank()) { + Log.w(TAG, "OPENCLAW_TAILSCALE_IP is empty, skipping SOP log relay") + return + } + + val payload = JSONObject().apply { + put("session_id", sessionId) + put("step_name", stepName) + put("timestamp", timestampIso8601) + put("image_base64", stripDataUriPrefix(imageBase64)) + } + + postJson("http://$tailscaleIp:8000/api/v1/sop-log", payload) + } + + fun postHeartbeat( + tailscaleIp: String, + sessionId: String, + status: String + ) { + if (tailscaleIp.isBlank()) { + Log.w(TAG, "OPENCLAW_TAILSCALE_IP is empty, skipping heartbeat relay") + return + } + + val payload = JSONObject().apply { + put("session_id", sessionId) + put("status", status) + } + + postJson("http://$tailscaleIp:8000/api/v1/heartbeat", payload) + } + + fun postHeartbeatForReceipt( + tailscaleIp: String, + sessionId: String, + status: String + ): String? { + if (tailscaleIp.isBlank()) { + Log.w(TAG, "OPENCLAW_TAILSCALE_IP is empty, skipping heartbeat relay") + return null + } + + val payload = JSONObject().apply { + put("session_id", sessionId) + put("status", status) + } + + val rawBody = postJsonWithResponse("http://$tailscaleIp:8000/api/v1/heartbeat", payload) ?: return null + + return try { + JSONObject(rawBody).optString("message").ifBlank { rawBody } + } catch (_: Exception) { + rawBody + } + } + + private fun postJson(url: String, payload: JSONObject) { + postJsonWithResponse(url, payload) + } + + private fun postJsonWithResponse(url: String, payload: JSONObject): String? { + var attempt = 0 + var backoffMs = 500L + + while (attempt < MAX_RETRIES) { + attempt += 1 + + try { + val requestBody = payload.toString().toRequestBody(JSON_MEDIA_TYPE.toMediaType()) + val request = Request.Builder() + .url(url) + .post(requestBody) + .build() + + client.newCall(request).execute().use { response -> + val responseBody = response.body?.string() + + if (response.isSuccessful) { + return responseBody + } + + if (response.code in 500..599 && attempt < MAX_RETRIES) { + Thread.sleep(backoffMs) + backoffMs *= 2 + continue + } + + Log.w(TAG, "POST $url failed with HTTP ${response.code}") + return responseBody + } + } catch (e: Exception) { + if (attempt < MAX_RETRIES) { + Thread.sleep(backoffMs) + backoffMs *= 2 + continue + } + + Log.e(TAG, "POST failed for $url after $attempt attempts: ${e.message}") + return null + } + } + + return null + } + + private fun stripDataUriPrefix(value: String): String { + return if (value.startsWith("data:image/jpeg;base64,")) { + value.removePrefix("data:image/jpeg;base64,") + } else { + value + } + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index 696a0c8a..4ae0b322 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -103,7 +103,9 @@ sealed class OpenClawConnectionState { object ToolDeclarations { fun allDeclarationsJSON(): JSONArray { - return JSONArray().put(executeJSON()) + return JSONArray() + .put(executeJSON()) + .put(logSopStepJSON()) } private fun executeJSON(): JSONObject { @@ -123,4 +125,42 @@ object ToolDeclarations { put("behavior", "BLOCKING") } } + + private fun logSopStepJSON(): JSONObject { + return JSONObject().apply { + put("name", "log_sop_step") + put("description", "Log a standard operating procedure step to the external SOP processor.") + put("parameters", JSONObject().apply { + put("type", "object") + put("properties", JSONObject().apply { + put("step_number", JSONObject().apply { + put("type", "integer") + put("description", "Current SOP step number (1-based).") + }) + put("step_name", JSONObject().apply { + put("type", "string") + put("description", "The SOP step label that should be logged.") + }) + put("action", JSONObject().apply { + put("type", "string") + put("description", "Step action state: started, completed, failed, or skipped.") + }) + put("total_steps", JSONObject().apply { + put("type", "integer") + put("description", "Total number of SOP steps.") + }) + put("frame_data", JSONObject().apply { + put("type", "string") + put("description", "Optional current frame image in base64. If omitted, the app uses the latest captured frame.") + }) + put("notes", JSONObject().apply { + put("type", "string") + put("description", "Optional operator/model notes for this step.") + }) + }) + put("required", JSONArray().put("step_name")) + }) + put("behavior", "NON_BLOCKING") + } + } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 35337e14..1e0d05f6 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -33,7 +33,7 @@ class ToolCallRouter( Log.d(TAG, "Circuit breaker open ($consecutiveFailures consecutive failures), rejecting $callId") val errorResult = ToolResult.Failure( "Tool execution is temporarily unavailable after $consecutiveFailures consecutive failures. " + - "Please tell the user you cannot complete this action right now and suggest they check their OpenClaw gateway connection." + "Please tell the user you cannot complete this action right now and suggest they check their Video AI Analyst gateway connection." ) sendResponse(buildToolResponse(callId, callName, errorResult)) return diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index dd8d2d26..11ce560f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -2,25 +2,67 @@ package com.meta.wearable.dat.externalsampleapps.cameraaccess.settings import android.content.Context import android.content.SharedPreferences +import android.security.keystore.KeyGenParameterSpec +import android.security.keystore.KeyProperties +import android.util.Base64 +import com.meta.wearable.dat.externalsampleapps.cameraaccess.BuildConfig import com.meta.wearable.dat.externalsampleapps.cameraaccess.Secrets +import java.security.KeyStore +import java.util.UUID +import javax.crypto.Cipher +import javax.crypto.KeyGenerator +import javax.crypto.SecretKey +import javax.crypto.spec.GCMParameterSpec object SettingsManager { private const val PREFS_NAME = "visionclaw_settings" + private const val SECURE_PREFS_NAME = "visionclaw_secure_settings" + private const val SECURE_KEY_ALIAS = "visionclaw.secure.settings" private lateinit var prefs: SharedPreferences + private lateinit var secureStore: AndroidSecureStringStore fun init(context: Context) { prefs = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) + secureStore = AndroidSecureStringStore( + context.getSharedPreferences(SECURE_PREFS_NAME, Context.MODE_PRIVATE), + SECURE_KEY_ALIAS + ) } var geminiAPIKey: String - get() = prefs.getString("geminiAPIKey", null) ?: Secrets.geminiAPIKey - set(value) = prefs.edit().putString("geminiAPIKey", value).apply() + get() { + val stored = getSecureString("geminiAPIKey") + if (!stored.isNullOrBlank()) return stored + + return Secrets.geminiAPIKey + } + set(value) = putSecureString("geminiAPIKey", value) var geminiSystemPrompt: String get() = prefs.getString("geminiSystemPrompt", null) ?: DEFAULT_SYSTEM_PROMPT set(value) = prefs.edit().putString("geminiSystemPrompt", value).apply() + var workerLoginCode: String + get() = prefs.getString("workerLoginCode", null) ?: Secrets.workerLoginCode + set(value) = prefs.edit().putString("workerLoginCode", value).apply() + + var opsBaseURL: String + get() = prefs.getString("opsBaseURL", null) ?: Secrets.opsBaseURL + set(value) = prefs.edit().putString("opsBaseURL", value).apply() + + var signalBaseURL: String + get() { + val stored = prefs.getString("signalBaseURL", null) + if (!stored.isNullOrBlank()) return stored + + val legacy = prefs.getString("webrtcSignalingURL", null) + if (!legacy.isNullOrBlank()) return normalizeSignalBaseURL(legacy) + + return Secrets.signalBaseURL + } + set(value) = prefs.edit().putString("signalBaseURL", normalizeSignalBaseURL(value)).apply() + var openClawHost: String get() = prefs.getString("openClawHost", null) ?: Secrets.openClawHost set(value) = prefs.edit().putString("openClawHost", value).apply() @@ -33,16 +75,60 @@ object SettingsManager { set(value) = prefs.edit().putInt("openClawPort", value).apply() var openClawHookToken: String - get() = prefs.getString("openClawHookToken", null) ?: Secrets.openClawHookToken - set(value) = prefs.edit().putString("openClawHookToken", value).apply() + get() = getSecureString("openClawHookToken") ?: Secrets.openClawHookToken + set(value) = putSecureString("openClawHookToken", value) var openClawGatewayToken: String - get() = prefs.getString("openClawGatewayToken", null) ?: Secrets.openClawGatewayToken - set(value) = prefs.edit().putString("openClawGatewayToken", value).apply() + get() = getSecureString("openClawGatewayToken") ?: Secrets.openClawGatewayToken + set(value) = putSecureString("openClawGatewayToken", value) var webrtcSignalingURL: String - get() = prefs.getString("webrtcSignalingURL", null) ?: Secrets.webrtcSignalingURL - set(value) = prefs.edit().putString("webrtcSignalingURL", value).apply() + get() = normalizeWebsocketURL(signalBaseURL) + set(value) { + val normalized = normalizeSignalBaseURL(value) + prefs.edit() + .putString("signalBaseURL", normalized) + .putString("webrtcSignalingURL", normalized) + .apply() + } + + var openClawTailscaleIP: String + get() { + val stored = prefs.getString("openClawTailscaleIP", null) + if (!stored.isNullOrBlank()) return stored + + val buildValue = BuildConfig.OPENCLAW_TAILSCALE_IP + if (buildValue.isNotBlank()) return buildValue + + return Secrets.openClawTailscaleIP + } + set(value) = prefs.edit().putString("openClawTailscaleIP", value).apply() + + var openClawBearerToken: String + get() { + val stored = getSecureString("openClawBearerToken") + if (!stored.isNullOrBlank()) return stored + + return Secrets.openClawBearerToken + } + set(value) = putSecureString("openClawBearerToken", value) + + var deviceId: String + get() { + val stored = prefs.getString("deviceId", null) + if (!stored.isNullOrBlank()) return stored + + val secret = Secrets.deviceId + if (secret.isNotBlank() && secret != "YOUR_DEVICE_UUID") { + prefs.edit().putString("deviceId", secret).apply() + return secret + } + + val generated = UUID.randomUUID().toString() + prefs.edit().putString("deviceId", generated).apply() + return generated + } + set(value) = prefs.edit().putString("deviceId", value).apply() var videoStreamingEnabled: Boolean get() = prefs.getBoolean("videoStreamingEnabled", true) @@ -54,6 +140,54 @@ object SettingsManager { fun resetAll() { prefs.edit().clear().apply() + secureStore.clear() + } + + private fun normalizeSignalBaseURL(raw: String): String { + val trimmed = raw.trim() + if (trimmed.isEmpty()) return trimmed + return when { + trimmed.startsWith("wss://") -> "https://${trimmed.removePrefix("wss://")}" + trimmed.startsWith("ws://") -> "http://${trimmed.removePrefix("ws://")}" + trimmed.startsWith("https://") || trimmed.startsWith("http://") -> trimmed + else -> "https://$trimmed" + } + } + + private fun normalizeWebsocketURL(raw: String): String { + val trimmed = raw.trim() + if (trimmed.isEmpty()) return trimmed + return when { + trimmed.startsWith("wss://") || trimmed.startsWith("ws://") -> trimmed + trimmed.startsWith("https://") -> "wss://${trimmed.removePrefix("https://")}" + trimmed.startsWith("http://") -> "ws://${trimmed.removePrefix("http://")}" + else -> "wss://$trimmed" + } + } + + private fun getSecureString(key: String): String? { + val stored = secureStore.getString(key) + if (!stored.isNullOrBlank()) return stored + + val legacy = prefs.getString(key, null) + if (!legacy.isNullOrBlank()) { + secureStore.putString(key, legacy) + prefs.edit().remove(key).apply() + return legacy + } + + return null + } + + private fun putSecureString(key: String, value: String) { + val trimmed = value.trim() + prefs.edit().remove(key).apply() + + if (trimmed.isEmpty()) { + secureStore.remove(key) + } else { + secureStore.putString(key, trimmed) + } } const val DEFAULT_SYSTEM_PROMPT = """You are an AI assistant for someone wearing Meta Ray-Ban smart glasses. You can see through their camera and have a voice conversation. Keep responses concise and natural. @@ -82,3 +216,77 @@ Never call execute silently -- the user needs verbal confirmation that you heard For messages, confirm recipient and content before delegating unless clearly urgent.""" } + +private class AndroidSecureStringStore( + private val prefs: SharedPreferences, + private val keyAlias: String, +) { + fun getString(key: String): String? { + val encoded = prefs.getString(key, null) ?: return null + return runCatching { decrypt(encoded) }.getOrNull() + } + + fun putString(key: String, value: String) { + prefs.edit().putString(key, encrypt(value)).apply() + } + + fun remove(key: String) { + prefs.edit().remove(key).apply() + } + + fun clear() { + prefs.edit().clear().apply() + } + + private fun encrypt(value: String): String { + val cipher = Cipher.getInstance(TRANSFORMATION) + cipher.init(Cipher.ENCRYPT_MODE, getOrCreateSecretKey()) + val encrypted = cipher.doFinal(value.toByteArray(Charsets.UTF_8)) + val iv = cipher.iv + return "${encode(iv)}:${encode(encrypted)}" + } + + private fun decrypt(value: String): String { + val parts = value.split(":", limit = 2) + require(parts.size == 2) { "Invalid secure value format" } + + val cipher = Cipher.getInstance(TRANSFORMATION) + cipher.init( + Cipher.DECRYPT_MODE, + getOrCreateSecretKey(), + GCMParameterSpec(TAG_LENGTH_BITS, decode(parts[0])) + ) + return String(cipher.doFinal(decode(parts[1])), Charsets.UTF_8) + } + + private fun getOrCreateSecretKey(): SecretKey { + val keyStore = KeyStore.getInstance(ANDROID_KEYSTORE).apply { load(null) } + val existingKey = keyStore.getKey(keyAlias, null) as? SecretKey + if (existingKey != null) return existingKey + + val keyGenerator = KeyGenerator.getInstance(KeyProperties.KEY_ALGORITHM_AES, ANDROID_KEYSTORE) + val spec = KeyGenParameterSpec.Builder( + keyAlias, + KeyProperties.PURPOSE_ENCRYPT or KeyProperties.PURPOSE_DECRYPT + ) + .setBlockModes(KeyProperties.BLOCK_MODE_GCM) + .setEncryptionPaddings(KeyProperties.ENCRYPTION_PADDING_NONE) + .setKeySize(256) + .setRandomizedEncryptionRequired(true) + .build() + keyGenerator.init(spec) + return keyGenerator.generateKey() + } + + private fun encode(value: ByteArray): String = + Base64.encodeToString(value, Base64.NO_WRAP) + + private fun decode(value: String): ByteArray = + Base64.decode(value, Base64.NO_WRAP) + + private companion object { + const val ANDROID_KEYSTORE = "AndroidKeyStore" + const val TRANSFORMATION = "AES/GCM/NoPadding" + const val TAG_LENGTH_BITS = 128 + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GeminiOverlayView.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GeminiOverlayView.kt index 8cfa09cf..ec3c55f2 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GeminiOverlayView.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GeminiOverlayView.kt @@ -98,7 +98,7 @@ fun GeminiStatusBar( if (openClawState !is OpenClawConnectionState.NotConfigured) { StatusPill( - label = "OpenClaw", + label = "Video AI Analyst", color = when (openClawState) { is OpenClawConnectionState.Connected -> Color(0xFF4CAF50) is OpenClawConnectionState.Checking -> Color(0xFFFF9800) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index dd913363..bdf8aa41 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -33,6 +33,8 @@ import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color import androidx.compose.ui.text.font.FontFamily import androidx.compose.ui.text.input.KeyboardType +import androidx.compose.ui.text.input.PasswordVisualTransformation +import androidx.compose.ui.text.input.VisualTransformation import androidx.compose.ui.unit.dp import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager @@ -42,8 +44,11 @@ fun SettingsScreen( onBack: () -> Unit, modifier: Modifier = Modifier, ) { + var workerLoginCode by remember { mutableStateOf(SettingsManager.workerLoginCode) } var geminiAPIKey by remember { mutableStateOf(SettingsManager.geminiAPIKey) } var systemPrompt by remember { mutableStateOf(SettingsManager.geminiSystemPrompt) } + var opsBaseURL by remember { mutableStateOf(SettingsManager.opsBaseURL) } + var signalBaseURL by remember { mutableStateOf(SettingsManager.signalBaseURL) } var openClawHost by remember { mutableStateOf(SettingsManager.openClawHost) } var openClawPort by remember { mutableStateOf(SettingsManager.openClawPort.toString()) } var openClawHookToken by remember { mutableStateOf(SettingsManager.openClawHookToken) } @@ -54,8 +59,11 @@ fun SettingsScreen( var showResetDialog by remember { mutableStateOf(false) } fun save() { + SettingsManager.workerLoginCode = workerLoginCode.trim() SettingsManager.geminiAPIKey = geminiAPIKey.trim() SettingsManager.geminiSystemPrompt = systemPrompt.trim() + SettingsManager.opsBaseURL = opsBaseURL.trim() + SettingsManager.signalBaseURL = signalBaseURL.trim() SettingsManager.openClawHost = openClawHost.trim() openClawPort.trim().toIntOrNull()?.let { SettingsManager.openClawPort = it } SettingsManager.openClawHookToken = openClawHookToken.trim() @@ -66,8 +74,11 @@ fun SettingsScreen( } fun reload() { + workerLoginCode = SettingsManager.workerLoginCode geminiAPIKey = SettingsManager.geminiAPIKey systemPrompt = SettingsManager.geminiSystemPrompt + opsBaseURL = SettingsManager.opsBaseURL + signalBaseURL = SettingsManager.signalBaseURL openClawHost = SettingsManager.openClawHost openClawPort = SettingsManager.openClawPort.toString() openClawHookToken = SettingsManager.openClawHookToken @@ -98,6 +109,30 @@ fun SettingsScreen( .navigationBarsPadding(), verticalArrangement = Arrangement.spacedBy(16.dp), ) { + SectionHeader("Worker") + MonoTextField( + value = workerLoginCode, + onValueChange = { workerLoginCode = it }, + label = "Login Code", + placeholder = "EMBC-0001", + ) + + SectionHeader("Operations Backend") + MonoTextField( + value = opsBaseURL, + onValueChange = { opsBaseURL = it }, + label = "Ops Base URL", + placeholder = "https://ops.embarcaderolabs.cloud", + keyboardType = KeyboardType.Uri, + ) + MonoTextField( + value = signalBaseURL, + onValueChange = { signalBaseURL = it }, + label = "Signal Base URL", + placeholder = "https://signal.embarcaderolabs.cloud", + keyboardType = KeyboardType.Uri, + ) + // Gemini section SectionHeader("Gemini API") MonoTextField( @@ -105,6 +140,7 @@ fun SettingsScreen( onValueChange = { geminiAPIKey = it }, label = "API Key", placeholder = "Enter Gemini API key", + sensitive = true, ) SectionHeader("System Prompt") @@ -117,7 +153,7 @@ fun SettingsScreen( ) // OpenClaw section - SectionHeader("OpenClaw") + SectionHeader("Video AI Analyst") MonoTextField( value = openClawHost, onValueChange = { openClawHost = it }, @@ -137,22 +173,14 @@ fun SettingsScreen( onValueChange = { openClawHookToken = it }, label = "Hook Token", placeholder = "Hook token", + sensitive = true, ) MonoTextField( value = openClawGatewayToken, onValueChange = { openClawGatewayToken = it }, label = "Gateway Token", placeholder = "Gateway auth token", - ) - - // WebRTC section - SectionHeader("WebRTC") - MonoTextField( - value = webrtcSignalingURL, - onValueChange = { webrtcSignalingURL = it }, - label = "Signaling URL", - placeholder = "wss://your-server.example.com", - keyboardType = KeyboardType.Uri, + sensitive = true, ) // Video @@ -186,7 +214,7 @@ fun SettingsScreen( Column { Text("Proactive Notifications", style = MaterialTheme.typography.bodyLarge) Text( - "Receive updates from OpenClaw spoken through glasses.", + "Receive updates from Video AI Analyst spoken through glasses.", style = MaterialTheme.typography.bodySmall, color = MaterialTheme.colorScheme.onSurfaceVariant, ) @@ -245,6 +273,7 @@ private fun MonoTextField( label: String, placeholder: String, keyboardType: KeyboardType = KeyboardType.Text, + sensitive: Boolean = false, ) { OutlinedTextField( value = value, @@ -254,6 +283,9 @@ private fun MonoTextField( modifier = Modifier.fillMaxWidth(), textStyle = MaterialTheme.typography.bodyMedium.copy(fontFamily = FontFamily.Monospace), singleLine = true, - keyboardOptions = KeyboardOptions(keyboardType = keyboardType), + keyboardOptions = KeyboardOptions( + keyboardType = if (sensitive) KeyboardType.Password else keyboardType + ), + visualTransformation = if (sensitive) PasswordVisualTransformation() else VisualTransformation.None, ) } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/webrtc/SignalingClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/webrtc/SignalingClient.kt index 6dedb61b..7bdee461 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/webrtc/SignalingClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/webrtc/SignalingClient.kt @@ -79,11 +79,11 @@ class SignalingClient { } fun joinRoom(code: String) { - sendJSON(JSONObject().put("type", "join").put("room", code)) + sendJSON(JSONObject().put("type", "join").put("room", code).put("room_code", code)) } fun rejoinRoom(code: String) { - sendJSON(JSONObject().put("type", "rejoin").put("room", code)) + sendJSON(JSONObject().put("type", "rejoin").put("room", code).put("room_code", code)) } fun sendSdp(sdp: SessionDescription) { @@ -120,7 +120,7 @@ class SignalingClient { when (type) { "room_created" -> { - val room = json.optString("room", "") + val room = readRoomCode(json) if (room.isNotEmpty()) { onMessageReceived?.invoke(SignalingMessage.RoomCreated(room)) } @@ -129,15 +129,15 @@ class SignalingClient { onMessageReceived?.invoke(SignalingMessage.RoomJoined) } "room_rejoined" -> { - val room = json.optString("room", "") + val room = readRoomCode(json) if (room.isNotEmpty()) { onMessageReceived?.invoke(SignalingMessage.RoomRejoined(room)) } } - "peer_joined" -> { + "peer_joined", "viewer_joined" -> { onMessageReceived?.invoke(SignalingMessage.PeerJoined) } - "peer_left" -> { + "peer_left", "viewer_left" -> { onMessageReceived?.invoke(SignalingMessage.PeerLeft) } "offer" -> { @@ -184,4 +184,8 @@ class SignalingClient { Log.e(TAG, "Failed to parse signaling message: ${e.message}") } } + + private fun readRoomCode(json: JSONObject): String { + return json.optString("room", json.optString("room_code", "")) + } } diff --git a/test_connection.sh b/test_connection.sh new file mode 100755 index 00000000..48605dcc --- /dev/null +++ b/test_connection.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="http://100.64.30.99:8000" + +echo "[1/3] Start session heartbeat..." +curl -sS -X POST "${BASE_URL}/api/v1/heartbeat" \ + -H "Content-Type: application/json" \ + -d '{"session_id":"test-e2e-001","status":"active"}' + +echo +sleep 1 + +echo "[2/3] Send SOP log..." +curl -sS -X POST "${BASE_URL}/api/v1/sop-log" \ + -H "Content-Type: application/json" \ + -d '{"session_id":"test-e2e-001","step_name":"network_validation","timestamp":"2026-03-02T12:00:00Z","image_base64":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="}' + +echo +sleep 1 + +echo "[3/3] Terminate session heartbeat..." +curl -sS -X POST "${BASE_URL}/api/v1/heartbeat" \ + -H "Content-Type: application/json" \ + -d '{"session_id":"test-e2e-001","status":"terminated"}' + +echo +echo "Done."