diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index 92a6a3ef..352cab64 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -1972,7 +1972,7 @@ struct ContentView: View { // If this was a rewrite recording, process the rewrite instead of typing if wasRewriteMode { - DebugLogger.shared.info("Processing rewrite with instruction: \(transcribedText)", source: "ContentView") + DebugLogger.shared.info("Processing rewrite (instruction chars: \(transcribedText.count))", source: "ContentView") let appInfo = self.recordingAppInfo ?? self.getCurrentAppInfo() await self.processRewriteWithVoiceInstruction(transcribedText, appInfo: appInfo) AnalyticsService.shared.capture( @@ -1988,7 +1988,7 @@ struct ContentView: View { // If this was a command recording, process the command if wasCommandMode { - DebugLogger.shared.info("Processing command: \(transcribedText)", source: "ContentView") + DebugLogger.shared.info("Processing command (chars: \(transcribedText.count))", source: "ContentView") await self.processCommandWithVoice(transcribedText) AnalyticsService.shared.capture( .transcriptionCompleted, @@ -2517,7 +2517,7 @@ struct ContentView: View { ) async { self.rewriteModeService.setPromptAppBundleID(appInfo.bundleId) let hasOriginalText = !self.rewriteModeService.originalText.isEmpty - DebugLogger.shared.info("Processing \(hasOriginalText ? "rewrite" : "write/improve") - instruction: '\(instruction)', originalText length: \(self.rewriteModeService.originalText.count)", source: "ContentView") + DebugLogger.shared.info("Processing \(hasOriginalText ? "rewrite" : "write/improve") - instruction chars: \(instruction.count), originalText length: \(self.rewriteModeService.originalText.count)", source: "ContentView") // Show processing animation self.menuBarManager.setProcessing(true) @@ -2666,7 +2666,7 @@ struct ContentView: View { // MARK: - Command Mode Voice Processing private func processCommandWithVoice(_ command: String) async { - DebugLogger.shared.info("Processing voice command: '\(command)'", source: "ContentView") + DebugLogger.shared.info("Processing voice command (chars: \(command.count))", source: "ContentView") // Show processing animation self.menuBarManager.setProcessing(true) @@ -3185,11 +3185,17 @@ extension ContentView { } private func logDictationPromptTrace(_ title: String, value: String) { - let line = "[PromptTrace][Dictate] \(title):\n\(value)" + // Privacy: prompt-trace values are raw user content (the dictated transcript, the + // folded prompt, the model's answer and thinking). DebugLogger persists every line to + // a plaintext disk log (~/Library/Logs/Fluid/Fluid.log), and this trace is enabled + // whenever EnableDebugLogs is on (the default), so the raw value must never be routed + // there. The full trace remains available on the console for live debugging, gated + // behind the explicit FLUID_PROMPT_TRACE=1 env var; only a redacted metadata line is + // persisted to the log. if self.forcePromptTraceToConsole { - print(line) + print("[PromptTrace][Dictate] \(title):\n\(value)") } - DebugLogger.shared.debug(line, source: "ContentView") + DebugLogger.shared.debug("[PromptTrace][Dictate] \(title) (\(value.count) chars) [REDACTED]", source: "ContentView") } private func customPromptAnalyticsProperties(promptSource: String, overrideEmpty: Bool?) -> [String: Any] { diff --git a/Sources/Fluid/Networking/AIProvider.swift b/Sources/Fluid/Networking/AIProvider.swift index 1541d9d8..c1ff09cd 100644 --- a/Sources/Fluid/Networking/AIProvider.swift +++ b/Sources/Fluid/Networking/AIProvider.swift @@ -131,7 +131,9 @@ final class OpenAICompatibleProvider: AIProvider { if let http = response as? HTTPURLResponse, http.statusCode >= 400 { let errText = String(data: data, encoding: .utf8) ?? "Unknown error" - DebugLogger.shared.error("AI API error HTTP \(http.statusCode): \(errText)", source: "AIProvider") + // Privacy: do not persist the error body (it can echo the request/prompt) — log + // status and body size only. The full text is still returned to the caller. + DebugLogger.shared.error("AI API error HTTP \(http.statusCode): \(data.count) bytes", source: "AIProvider") return "Error: HTTP \(http.statusCode): \(errText)" } let decoded = try JSONDecoder().decode(ChatResponse.self, from: data) diff --git a/Sources/Fluid/Networking/FunctionCallingProvider.swift b/Sources/Fluid/Networking/FunctionCallingProvider.swift index 0dfcefb8..634c7a02 100644 --- a/Sources/Fluid/Networking/FunctionCallingProvider.swift +++ b/Sources/Fluid/Networking/FunctionCallingProvider.swift @@ -235,10 +235,10 @@ final class FunctionCallingProvider { return .error("Failed to encode request") } - // Debug: Log request payload - if let requestString = String(data: jsonData, encoding: .utf8) { - DebugLogger.shared.debug("Request JSON: \(requestString)", source: "FunctionCallingProvider") - } + // Debug: Log request metadata. Privacy: the request body embeds the user's dictated + // transcript / prompt, and DebugLogger persists to a plaintext disk log, so log only + // the body size, never its contents. + DebugLogger.shared.debug("Request JSON: \(jsonData.count) bytes", source: "FunctionCallingProvider") var request = URLRequest(url: url) request.httpMethod = "POST" @@ -254,14 +254,16 @@ final class FunctionCallingProvider { do { let (data, response) = try await URLSession.shared.data(for: request) - // Log response - if let responseString = String(data: data, encoding: .utf8) { - DebugLogger.shared.debug("📥 LLM Response: \(responseString)", source: "FunctionCallingProvider") - } + // Log response metadata. Privacy: the response body is the model's reply derived + // from the user's dictation, and DebugLogger persists to a plaintext disk log, so + // log only its size, never the contents. + DebugLogger.shared.debug("📥 LLM Response: \(data.count) bytes", source: "FunctionCallingProvider") if let http = response as? HTTPURLResponse, http.statusCode >= 400 { let errText = String(data: data, encoding: .utf8) ?? "Unknown error" - DebugLogger.shared.error("HTTP \(http.statusCode): \(errText)", source: "FunctionCallingProvider") + // Privacy: do not persist the error body (it can echo the request/prompt) — log + // status and body size only. The full text is still returned to the caller. + DebugLogger.shared.error("HTTP \(http.statusCode): \(data.count) bytes", source: "FunctionCallingProvider") return .error("HTTP \(http.statusCode): \(errText)") } @@ -283,8 +285,10 @@ final class FunctionCallingProvider { var parsedCalls: [(name: String, arguments: [String: Any], callId: String)] = [] for toolCall in toolCalls { + // Privacy: tool-call arguments are model-generated from the user's dictation + // and may contain user content; log the tool name and argument size only. DebugLogger.shared.info( - " → \(toolCall.function.name)(\(toolCall.function.arguments))", + " → \(toolCall.function.name)(\(toolCall.function.arguments.count) chars)", source: "FunctionCallingProvider" ) // Parse arguments JSON string @@ -381,8 +385,10 @@ final class FunctionCallingProvider { if let http = response as? HTTPURLResponse, http.statusCode >= 400 { let errText = String(data: data, encoding: .utf8) ?? "Unknown error" + // Privacy: do not persist the error body (it can echo the request/prompt) — log + // status and body size only. The full text is still returned to the caller. DebugLogger.shared.error( - "HTTP \(http.statusCode) in continueWithToolResults: \(errText)", + "HTTP \(http.statusCode) in continueWithToolResults: \(data.count) bytes", source: "FunctionCallingProvider" ) return .error("HTTP \(http.statusCode): \(errText)") diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 6b21b482..c559988c 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -671,7 +671,9 @@ final class ASRService: ObservableObject { guard let hit = hits.first else { return } if hit != self.lastBoostHitTerm { self.lastBoostHitTerm = hit - DebugLogger.shared.info("BOOST_HIT: '\(hit)'", source: "ASRService") + // Privacy: hit is a word from the user's transcript; log only that a boosted term + // was detected (with its length), never the term itself. + DebugLogger.shared.info("BOOST_HIT detected (\(hit.count) chars)", source: "ASRService") } self.refreshWordBoostStatus() } @@ -1092,8 +1094,10 @@ final class ASRService: ObservableObject { let finalAudioSeconds = Double(pcm.count) / 16_000.0 let finalRTF = finalAudioSeconds > 0 ? (Double(finalElapsedMs) / 1000.0) / finalAudioSeconds : 0 DebugLogger.shared.debug("stop(): final transcription finished source=\(finalSource)", source: "ASRService") + // Privacy: result.text is the user's dictated transcript; DebugLogger persists to a + // plaintext disk log, so log only its length and confidence, never the text itself. DebugLogger.shared.debug( - "Transcription completed: '\(result.text)' (confidence: \(result.confidence))", + "Transcription completed: \(result.text.count) chars (confidence: \(result.confidence))", source: "ASRService" ) DebugLogger.shared.info( @@ -1117,7 +1121,8 @@ final class ASRService: ObservableObject { // Do not update self.finalText here to avoid instant binding insert in playground let cleanedText = ASRService.applyCustomDictionary(ASRService.removeFillerWords(result.text)) self.recordWordBoostHitIfAny(transcribedText: cleanedText) - DebugLogger.shared.debug("After post-processing: '\(cleanedText)'", source: "ASRService") + // Privacy: cleanedText is the post-processed dictated transcript; log length only. + DebugLogger.shared.debug("After post-processing: \(cleanedText.count) chars", source: "ASRService") self.benchmarkLog("stop_end result=success totalMs=\(self.elapsedMilliseconds(since: stopStartedAt)) recordingAgeMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) cleanedChars=\(cleanedText.count)") if SettingsStore.shared.saveTranscriptionHistory, SettingsStore.shared.saveAudioWithTranscriptionHistory, @@ -2728,7 +2733,8 @@ final class ASRService: ObservableObject { self.partialTranscription = updatedText self.previousFullTranscription = newText - DebugLogger.shared.debug("✅ Streaming: '\(updatedText)' (\(String(format: "%.2f", duration))s)", source: "ASRService") + // Privacy: updatedText is the live partial transcript; log length only. + DebugLogger.shared.debug("✅ Streaming: \(updatedText.count) chars (\(String(format: "%.2f", duration))s)", source: "ASRService") } let rtf = chunk.isEmpty ? 0 : duration / (Double(chunk.count) / 16_000.0) let chunkDoneAgeMs = self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt) diff --git a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift index 7f0a2a60..989c3c83 100644 --- a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift @@ -196,7 +196,9 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { DebugLogger.shared.debug("AppleSpeechAnalyzer: Results task started, waiting for results...", source: "AppleSpeechAnalyzerProvider") for try await case let result in freshTranscriber.results { let text = String(result.text.characters) - DebugLogger.shared.debug("AppleSpeechAnalyzer: Got result - isFinal: \(result.isFinal), text: '\(text)'", source: "AppleSpeechAnalyzerProvider") + // Privacy: text is recognized speech; DebugLogger persists to a plaintext disk + // log, so log only its length, never the recognized content. + DebugLogger.shared.debug("AppleSpeechAnalyzer: Got result - isFinal: \(result.isFinal), text: \(text.count) chars", source: "AppleSpeechAnalyzerProvider") if result.isFinal { // ACCUMULATE results (per Apple's pattern) - don't break! if !finalText.isEmpty && !text.isEmpty { @@ -206,7 +208,7 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { } // Continue iterating until stream ends (after finalizeAndFinish) } - DebugLogger.shared.debug("AppleSpeechAnalyzer: Results iteration complete, accumulated: '\(finalText)'", source: "AppleSpeechAnalyzerProvider") + DebugLogger.shared.debug("AppleSpeechAnalyzer: Results iteration complete, accumulated: \(finalText.count) chars", source: "AppleSpeechAnalyzerProvider") } // 7. Start the analyzer (this kicks off processing) @@ -235,7 +237,7 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { DebugLogger.shared.warning("Speech recognition error: \(error.localizedDescription)", source: "AppleSpeechAnalyzerProvider") } - DebugLogger.shared.debug("AppleSpeechAnalyzer: Transcription complete - result: '\(finalText)'", source: "AppleSpeechAnalyzerProvider") + DebugLogger.shared.debug("AppleSpeechAnalyzer: Transcription complete - result: \(finalText.count) chars", source: "AppleSpeechAnalyzerProvider") return ASRTranscriptionResult(text: finalText, confidence: 1.0) } diff --git a/Sources/Fluid/Services/AppleSpeechProvider.swift b/Sources/Fluid/Services/AppleSpeechProvider.swift index 1538c287..07e8bd71 100644 --- a/Sources/Fluid/Services/AppleSpeechProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechProvider.swift @@ -104,7 +104,8 @@ final class AppleSpeechProvider: TranscriptionProvider { if let result = result, result.isFinal { hasResumed = true let transcription = result.bestTranscription.formattedString - DebugLogger.shared.debug("AppleSpeechProvider: Got final result: '\(transcription)'", source: "AppleSpeechProvider") + // Privacy: transcription is recognized speech; log length only. + DebugLogger.shared.debug("AppleSpeechProvider: Got final result: \(transcription.count) chars", source: "AppleSpeechProvider") continuation.resume(returning: ASRTranscriptionResult(text: transcription, confidence: 1.0)) } // Partial results ignored as we requested final only diff --git a/Sources/Fluid/Services/LLMClient.swift b/Sources/Fluid/Services/LLMClient.swift index ae907b29..70987540 100644 --- a/Sources/Fluid/Services/LLMClient.swift +++ b/Sources/Fluid/Services/LLMClient.swift @@ -18,7 +18,13 @@ enum LLMError: Error, LocalizedError { case .invalidResponse: return "Invalid response from LLM" case let .httpError(code, message): - return "HTTP \(code): \(message.trimmingCharacters(in: .whitespacesAndNewlines))" + // Privacy: `message` is the raw provider error body, which can echo the request + // (and therefore the user's dictated transcript / prompt). This description is + // surfaced to callers that persist `error.localizedDescription` to the plaintext + // disk log, so it must not embed the body — expose the status code and body size + // only. The raw body remains available on the associated value for non-logging use. + let bodyChars = message.trimmingCharacters(in: .whitespacesAndNewlines).count + return "HTTP \(code) (error body: \(bodyChars) chars)" case let .networkError(error): return Self.userFacingNetworkMessage(from: error) case .encodingError: @@ -244,12 +250,11 @@ final class LLMClient { throw LLMError.encodingError } - // Log the request for debugging + // Log the request for debugging. Privacy: the serialized body embeds the user's + // dictated transcript / prompt, and DebugLogger persists to a plaintext disk log, so + // log only metadata (message count, model, body size) — never the body contents. let messageCount = config.messages.count - if let bodyStr = String(data: jsonData, encoding: .utf8) { - let truncated = bodyStr.count > 500 ? String(bodyStr.prefix(500)) + "..." : bodyStr - DebugLogger.shared.debug("LLMClient: Request (\(messageCount) messages, model=\(config.model), streaming=\(config.streaming)): \(truncated)", source: "LLMClient") - } + DebugLogger.shared.debug("LLMClient: Request (\(messageCount) messages, model=\(config.model), streaming=\(config.streaming), bodyBytes=\(jsonData.count))", source: "LLMClient") // Build URLRequest var request = URLRequest(url: url) @@ -472,7 +477,9 @@ final class LLMClient { if let http = response as? HTTPURLResponse, http.statusCode >= 400 { let errText = String(data: data, encoding: .utf8) ?? "Unknown error" - DebugLogger.shared.error("LLMClient: HTTP error \(http.statusCode): \(errText.prefix(200))", source: "LLMClient") + // Privacy: do not persist the error body (it can echo the request/prompt) — log + // status and body size only. The full text is still thrown to the caller. + DebugLogger.shared.error("LLMClient: HTTP error \(http.statusCode) (\(data.count) bytes)", source: "LLMClient") throw LLMError.httpError(http.statusCode, errText) } @@ -649,12 +656,12 @@ final class LLMClient { continue } - // DEBUG LOG: Show full delta to see all fields (e.g., 'reasoning', 'thought', 'delta_reasoning', etc.) - if let deltaData = try? JSONSerialization.data(withJSONObject: delta, options: [.fragmentsAllowed]), - let deltaString = String(data: deltaData, encoding: .utf8) - { - DebugLogger.shared.debug("LLMClient: Full Delta: \(deltaString)", source: "LLMClient") - } + // DEBUG LOG: Show which delta fields are present (e.g., 'reasoning', 'thought', + // 'delta_reasoning', etc.) to diagnose provider response shapes. Privacy: the + // delta values are the model's response derived from the user's dictation, and + // DebugLogger persists to a plaintext disk log — log field names only, not values. + let deltaFields = delta.keys.sorted().joined(separator: ", ") + DebugLogger.shared.debug("LLMClient: Delta fields: [\(deltaFields)]", source: "LLMClient") // Handle separate reasoning fields (OpenAI 'reasoning', 'reasoning_content', DeepSeek, etc.) let reasoningField = delta["reasoning_content"] as? String ?? @@ -693,12 +700,14 @@ final class LLMClient { // For safety with tag-based parsers, we let the parser decide unless it's a known separate-field model. } - // Debug: Log first few chunks and any chunk containing think tags + // Debug: Log the shape of the first few chunks and any chunk containing think + // tags. Privacy: chunk content is the model's response derived from the user's + // dictation, and DebugLogger persists to a plaintext disk log — log only the + // chunk length and whether a think tag was detected, never the content itself. let containsThinkTag = content.contains(" 50 ? "..." : "")\"") + // Privacy: do not log the text content (the dictation result / AI output); the length + // above is the diagnostic — DebugLogger persists every line to a plaintext disk log. if self.textInsertionMode == .reliablePaste { self.log("[TypingService] Reliable Paste mode enabled") diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift index 9d441a7f..256b47fa 100644 --- a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift +++ b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift @@ -60,6 +60,31 @@ final class DictationE2ETests: XCTestCase { } } + // MARK: - Privacy: disk-log redaction + + // LLMError.httpError carries the raw provider error body, which can echo the request and + // therefore the user's dictated transcript / prompt. Callers persist error.localizedDescription + // to the plaintext disk log (~/Library/Logs/Fluid/Fluid.log), so the description must surface the + // body size only, never the body itself. Regression guard for the disk-log privacy fix: this + // fails if the raw error body is ever re-embedded in the description. + func testHTTPErrorDescriptionDoesNotEmbedProviderBody() { + let secretBody = "echoed-prompt: the user dictated their account password hunter2" + let description = LLMError.httpError(503, secretBody).errorDescription ?? "" + + XCTAssertFalse( + description.contains(secretBody), + "LLMError.httpError description must not embed the raw provider error body. Got: \(description)" + ) + XCTAssertTrue( + description.contains("503"), + "LLMError.httpError description should still surface the status code. Got: \(description)" + ) + XCTAssertTrue( + description.contains("\(secretBody.count) chars"), + "LLMError.httpError description should report the body size for diagnostics. Got: \(description)" + ) + } + func testDictationEndToEnd_whisperTiny_transcribesFixture() async throws { // Arrange SettingsStore.shared.shareAnonymousAnalytics = false