From 7d647c116cf89a322d69657e36315effdbb37f87 Mon Sep 17 00:00:00 2001 From: Nitzan Bar-Ness Date: Tue, 23 Jun 2026 17:17:01 -0700 Subject: [PATCH 1/2] Add ivrit.ai Hebrew model (Whisper Large v3 Turbo) Adds whisperIvritV3Turbo, a Hebrew-specialized Whisper model from ivrit-ai/whisper-large-v3-turbo-ggml, as a first-class speech model. - Routes through the existing SwiftWhisper path (ggml format, no new engine) - Surfaces as the recommended engine when Hebrew is selected - Forces Hebrew decode language so whisper.cpp does not auto-detect on short or accented audio - Per-model download URL so the model loads from the ivrit.ai HF repo, decoupled from the local cache filename Co-Authored-By: Claude Opus 4.8 (1M context) --- Sources/Fluid/Persistence/SettingsStore.swift | 46 +++++++++++- .../VoiceEngineLanguageCatalog.swift | 7 ++ Sources/Fluid/Services/WhisperProvider.swift | 24 +++++-- Sources/Fluid/UI/WelcomeView.swift | 2 + .../IvritHebrewModelTests.swift | 72 +++++++++++++++++++ 5 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 Tests/FluidDictationIntegrationTests/IvritHebrewModelTests.swift diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index e90be710..ccb45eba 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -3536,6 +3536,7 @@ final class SettingsStore: ObservableObject { case whisperMedium = "whisper-medium" case whisperLargeTurbo = "whisper-large-turbo" // temporarily disabled in UI case whisperLarge = "whisper-large" + case whisperIvritV3Turbo = "whisper-ivrit-v3-turbo" // Hebrew-specialized Whisper (ivrit.ai) var id: String { rawValue @@ -3561,6 +3562,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return "Whisper Medium" case .whisperLargeTurbo: return "Whisper Large Turbo (Disabled)" case .whisperLarge: return "Whisper Large" + case .whisperIvritV3Turbo: return "Whisper Hebrew (ivrit.ai Turbo)" } } @@ -3577,6 +3579,8 @@ final class SettingsStore: ObservableObject { case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return "99 Languages" + case .whisperIvritV3Turbo: + return "Hebrew" } } @@ -3598,6 +3602,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return "~1.5 GB" case .whisperLargeTurbo: return "~1.6 GB" case .whisperLarge: return "~2.9 GB" + case .whisperIvritV3Turbo: return "~1.6 GB" } } @@ -3624,6 +3629,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return "ggml-medium.bin" case .whisperLargeTurbo: return "ggml-large-v3-turbo.bin" case .whisperLarge: return "ggml-large-v3.bin" + case .whisperIvritV3Turbo: return "ggml-ivrit-v3-turbo.bin" default: return nil } } @@ -3637,6 +3643,29 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return "medium" case .whisperLargeTurbo: return "large-v3-turbo" case .whisperLarge: return "large-v3" + case .whisperIvritV3Turbo: return "ivrit-v3-turbo" + default: return nil + } + } + + /// Remote URL to download the ggml model from. Standard Whisper models live in + /// ggerganov/whisper.cpp; the ivrit.ai Hebrew model lives in its own HF repo under a + /// generic filename, so the remote path is decoupled from the local cache filename. + var whisperDownloadURL: URL? { + guard let file = self.whisperModelFile else { return nil } + switch self { + case .whisperIvritV3Turbo: + return URL(string: "https://huggingface.co/ivrit-ai/whisper-large-v3-turbo-ggml/resolve/main/ggml-model.bin") + default: + return URL(string: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(file)") + } + } + + /// Forces the whisper.cpp decode language for language-specialized models. + /// Returns a WhisperLanguage raw value (e.g. "iw" for Hebrew), or nil to auto-detect. + var forcedWhisperLanguageCode: String? { + switch self { + case .whisperIvritV3Turbo: return "iw" // SwiftWhisper's WhisperLanguage.hebrew rawValue default: return nil } } @@ -3717,6 +3746,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return "Medium Quality" case .whisperLargeTurbo: return "Higher Quality but Faster" case .whisperLarge: return "Maximum Accuracy" + case .whisperIvritV3Turbo: return "Hebrew - ivrit.ai" } } @@ -3758,6 +3788,8 @@ final class SettingsStore: ObservableObject { return "Near-maximum accuracy with optimized speed." case .whisperLarge: return "Best possible accuracy. Large download and memory usage." + case .whisperIvritV3Turbo: + return "Hebrew-specialized Whisper Large v3 Turbo, fine-tuned by ivrit.ai on real Israeli speech. Best accuracy for Hebrew and mixed Hebrew-English dictation." } } @@ -3786,6 +3818,8 @@ final class SettingsStore: ObservableObject { return 8.0 case .whisperLarge: return 10.0 // Large model needs ~6-8GB working memory + model size + case .whisperIvritV3Turbo: + return 8.0 // large-v3-turbo class model (~1.6 GB) + working memory } } @@ -3823,6 +3857,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return 2 case .whisperLargeTurbo: return 3 case .whisperLarge: return 1 + case .whisperIvritV3Turbo: return 3 } } @@ -3844,6 +3879,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return 4 case .whisperLargeTurbo: return 5 case .whisperLarge: return 5 + case .whisperIvritV3Turbo: return 5 } } @@ -3865,6 +3901,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return 0.40 case .whisperLargeTurbo: return 0.65 case .whisperLarge: return 0.20 + case .whisperIvritV3Turbo: return 0.65 } } @@ -3886,6 +3923,7 @@ final class SettingsStore: ObservableObject { case .whisperMedium: return 0.80 case .whisperLargeTurbo: return 0.95 case .whisperLarge: return 1.00 + case .whisperIvritV3Turbo: return 0.95 } } @@ -3926,7 +3964,7 @@ final class SettingsStore: ObservableObject { /// Large Whisper models are too slow for streaming, so they only do final transcription on stop. var supportsStreaming: Bool { switch self { - case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge: + case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge, .whisperIvritV3Turbo: return false // Too slow for real-time chunk processing default: return true // All other models support streaming @@ -3983,7 +4021,7 @@ final class SettingsStore: ObservableObject { return .qwen case .cohereTranscribeSixBit: return .cohere - case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: + case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge, .whisperIvritV3Turbo: return .openai } } @@ -4082,6 +4120,8 @@ final class SettingsStore: ObservableObject { return "Apple" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return "OpenAI" + case .whisperIvritV3Turbo: + return "ivrit.ai" } } @@ -4106,6 +4146,8 @@ final class SettingsStore: ObservableObject { return "#A2AAAD" // Apple Gray case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return "#10A37F" // OpenAI Teal + case .whisperIvritV3Turbo: + return "#0B6E4F" // ivrit.ai green } } } diff --git a/Sources/Fluid/Persistence/VoiceEngineLanguageCatalog.swift b/Sources/Fluid/Persistence/VoiceEngineLanguageCatalog.swift index 1eedf738..9707a1e2 100644 --- a/Sources/Fluid/Persistence/VoiceEngineLanguageCatalog.swift +++ b/Sources/Fluid/Persistence/VoiceEngineLanguageCatalog.swift @@ -132,6 +132,13 @@ enum VoiceEngineLanguageCatalog { private static func routeCandidates(for language: VoiceEngineLanguage) -> [VoiceEngineLanguageRoute] { var routes: [VoiceEngineLanguageRoute] = [] + // Hebrew-specialized Whisper model (ivrit.ai). Listed first so it is the + // recommended engine when Hebrew is selected. The model forces decode language to + // Hebrew internally (see SpeechModel.forcedWhisperLanguageCode). + if language.id == "he" { + routes.append(Self.route(language, .whisperIvritV3Turbo, .whisper(languageCode: "iw"))) + } + if language.id == "en" { routes.append(Self.route(language, .parakeetTDTv2, .automatic)) routes.append(Self.route(language, .parakeetRealtime, .automatic)) diff --git a/Sources/Fluid/Services/WhisperProvider.swift b/Sources/Fluid/Services/WhisperProvider.swift index e3504068..845acf97 100644 --- a/Sources/Fluid/Services/WhisperProvider.swift +++ b/Sources/Fluid/Services/WhisperProvider.swift @@ -71,6 +71,8 @@ final class WhisperProvider: TranscriptionProvider { return 1000 * 1024 * 1024 // case "ggml-large-v3-turbo.bin": // buggy - so removed temporarily // return 1200 * 1024 * 1024 + case "ggml-ivrit-v3-turbo.bin": + return 1200 * 1024 * 1024 case "ggml-large-v3.bin": return 2000 * 1024 * 1024 default: @@ -170,7 +172,17 @@ final class WhisperProvider: TranscriptionProvider { // Load the model DebugLogger.shared.info("WhisperProvider: Loading Whisper model...", source: "WhisperProvider") - self.whisper = Whisper(fromFileURL: self.modelURL) + let whisperInstance = Whisper(fromFileURL: self.modelURL) + + // Language-specialized models (e.g. ivrit.ai Hebrew) force the decode language so + // whisper.cpp does not fall back to auto-detection on short or accented audio. + if let languageCode = targetModel.forcedWhisperLanguageCode, + let language = WhisperLanguage(rawValue: languageCode) + { + whisperInstance.params.language = language + DebugLogger.shared.info("WhisperProvider: Forcing decode language to \(languageCode)", source: "WhisperProvider") + } + self.whisper = whisperInstance self.loadedModelName = currentModelName self.isReady = true @@ -259,16 +271,18 @@ final class WhisperProvider: TranscriptionProvider { // MARK: - Model Download private func downloadModel(progressHandler: ((Double) -> Void)?) async throws { - // Whisper models are hosted on Hugging Face - let modelURLString = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(modelName)" - - guard let url = URL(string: modelURLString) else { + // Whisper models are hosted on Hugging Face. Most live in ggerganov/whisper.cpp, but + // language-specialized models (e.g. ivrit.ai Hebrew) ship from their own repo, so the + // remote URL is resolved per-model rather than built from the local filename. + let targetModel = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel + guard let url = targetModel.whisperDownloadURL else { throw NSError( domain: "WhisperProvider", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid model URL"] ) } + let modelURLString = url.absoluteString DebugLogger.shared.info("WhisperProvider: Downloading from \(modelURLString)", source: "WhisperProvider") diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 9edb2cb9..40d4d5d0 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -2313,6 +2313,8 @@ struct OnboardingFlowView: View { return "Nemotron Offline" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLarge: return "Whisper" + case .whisperIvritV3Turbo: + return "ivrit.ai ยท Whisper v3 Turbo" default: return model.displayName } diff --git a/Tests/FluidDictationIntegrationTests/IvritHebrewModelTests.swift b/Tests/FluidDictationIntegrationTests/IvritHebrewModelTests.swift new file mode 100644 index 00000000..61fa8804 --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/IvritHebrewModelTests.swift @@ -0,0 +1,72 @@ +// ABOUTME: Tests for the ivrit.ai Hebrew Whisper model wiring (metadata + catalog routing). +// ABOUTME: Verifies Hebrew routes to the ivrit model first and forces Hebrew decoding. +import Foundation +import XCTest + +@testable import FluidVoice_Debug + +final class IvritHebrewModelTests: XCTestCase { + private typealias Model = SettingsStore.SpeechModel + + // MARK: - Model metadata + + func testIvritModelLocalFilenameIsDistinctFromGenericWhisper() { + XCTAssertEqual(Model.whisperIvritV3Turbo.whisperModelFile, "ggml-ivrit-v3-turbo.bin") + } + + func testIvritModelIsTreatedAsWhisper() { + XCTAssertTrue(Model.whisperIvritV3Turbo.isWhisperModel) + } + + func testIvritModelForcesHebrewDecodeLanguage() { + // "iw" is SwiftWhisper's WhisperLanguage.hebrew raw value (legacy ISO 639 code). + XCTAssertEqual(Model.whisperIvritV3Turbo.forcedWhisperLanguageCode, "iw") + } + + func testIvritModelDownloadsFromIvritAIRepoNotGgerganov() { + let url = Model.whisperIvritV3Turbo.whisperDownloadURL + XCTAssertNotNil(url) + XCTAssertEqual(url?.host, "huggingface.co") + XCTAssertTrue(url?.path.contains("ivrit-ai") ?? false, "Expected ivrit.ai HF repo, got \(String(describing: url))") + } + + func testGenericWhisperStillDownloadsFromGgerganov() { + let url = Model.whisperSmall.whisperDownloadURL + XCTAssertTrue(url?.path.contains("ggerganov/whisper.cpp") ?? false) + } + + // MARK: - Catalog routing + + func testHebrewRoutesToIvritModelWhenAvailable() { + let routes = VoiceEngineLanguageCatalog.routes( + forLanguageID: "he", + availableModels: [.whisperIvritV3Turbo] + ) + XCTAssertTrue( + routes.contains { $0.model == .whisperIvritV3Turbo }, + "Hebrew should produce an ivrit.ai route when the model is available" + ) + } + + func testIvritModelIsTheRecommendedHebrewEngine() { + // When several Hebrew-capable engines are available, the ivrit model should rank first. + let routes = VoiceEngineLanguageCatalog.routes( + forLanguageID: "he", + availableModels: [.whisperSmall, .nemotronOffline, .whisperIvritV3Turbo, .appleSpeech] + ) + XCTAssertEqual(routes.first?.model, .whisperIvritV3Turbo) + } + + func testEnglishDoesNotRouteToIvritModel() { + let routes = VoiceEngineLanguageCatalog.routes( + forLanguageID: "en", + availableModels: Model.allCases + ) + XCTAssertFalse(routes.contains { $0.model == .whisperIvritV3Turbo }) + } + + func testHebrewLanguageIsListedWhenIvritModelAvailable() { + let languages = VoiceEngineLanguageCatalog.allLanguages(availableModels: [.whisperIvritV3Turbo]) + XCTAssertTrue(languages.contains { $0.id == "he" }) + } +} From 39232b74a9bdd2ff05c1e2dd06f34e5ff1de0474 Mon Sep 17 00:00:00 2001 From: Nitzan Bar-Ness Date: Tue, 23 Jun 2026 17:33:29 -0700 Subject: [PATCH 2/2] Register IvritHebrewModelTests in the Xcode test target The new test file existed on disk but was not referenced in project.pbxproj, so the manually-enumerated FluidDictationIntegrationTests target never compiled it (no CI coverage). Add the file reference, build file, group membership, and sources build-phase entry, mirroring DictationE2ETests.swift. Co-Authored-By: Claude Opus 4.8 (1M context) --- Fluid.xcodeproj/project.pbxproj | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index 30da8f2d..be9e8697 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -13,6 +13,7 @@ 7C3697892ED70F9C005874CE /* DynamicNotchKit in Frameworks */ = {isa = PBXBuildFile; productRef = 7C3697882ED70F9C005874CE /* DynamicNotchKit */; }; 7C5AF14B2F15041600DE21B0 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = 7C5AF14A2F15041600DE21B0 /* MediaRemoteAdapter */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; + 7CDB0A372F3C4D5600FB7CAD /* IvritHebrewModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; 7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; }; 7CDB0A302F3C4D5600FB7CAD /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */; }; @@ -33,6 +34,7 @@ 7C078D8F2E3B339200FB7CAC /* FluidVoice Debug.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FluidVoice Debug.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; + 7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = IvritHebrewModelTests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = ""; }; 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Platforms/MacOSX.platform/Developer/Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; }; @@ -103,6 +105,7 @@ 7CDB0A262F3C4D5600FB7CAD /* Helpers */, 7CDB0A272F3C4D5600FB7CAD /* Resources */, 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, + 7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -258,6 +261,7 @@ files = ( 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */, 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, + 7CDB0A372F3C4D5600FB7CAD /* IvritHebrewModelTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; };