Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Fluid.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
7C3697892ED70F9C005874CE /* DynamicNotchKit in Frameworks */ = {isa = PBXBuildFile; productRef = 7C3697882ED70F9C005874CE /* DynamicNotchKit */; };
7C5AF14B2F15041600DE21B0 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = 7C5AF14A2F15041600DE21B0 /* MediaRemoteAdapter */; };
7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; };
7CDB0A372F3C4D5600FB7CAD /* IvritHebrewModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */; };
7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; };
7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; };
7CDB0A302F3C4D5600FB7CAD /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */; };
Expand All @@ -33,6 +34,7 @@
7C078D8F2E3B339200FB7CAC /* FluidVoice Debug.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FluidVoice Debug.app"; sourceTree = BUILT_PRODUCTS_DIR; };
7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = "<group>"; };
7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = IvritHebrewModelTests.swift; sourceTree = "<group>"; };
7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = "<group>"; };
7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = "<group>"; };
7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Platforms/MacOSX.platform/Developer/Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; };
Expand Down Expand Up @@ -103,6 +105,7 @@
7CDB0A262F3C4D5600FB7CAD /* Helpers */,
7CDB0A272F3C4D5600FB7CAD /* Resources */,
7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */,
7CDB0A362F3C4D5600FB7CAD /* IvritHebrewModelTests.swift */,
);
path = FluidDictationIntegrationTests;
sourceTree = "<group>";
Expand Down Expand Up @@ -258,6 +261,7 @@
files = (
7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */,
7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */,
7CDB0A372F3C4D5600FB7CAD /* IvritHebrewModelTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
46 changes: 44 additions & 2 deletions Sources/Fluid/Persistence/SettingsStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3536,6 +3536,7 @@ final class SettingsStore: ObservableObject {
case whisperMedium = "whisper-medium"
case whisperLargeTurbo = "whisper-large-turbo" // temporarily disabled in UI
case whisperLarge = "whisper-large"
case whisperIvritV3Turbo = "whisper-ivrit-v3-turbo" // Hebrew-specialized Whisper (ivrit.ai)

var id: String {
rawValue
Expand All @@ -3561,6 +3562,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return "Whisper Medium"
case .whisperLargeTurbo: return "Whisper Large Turbo (Disabled)"
case .whisperLarge: return "Whisper Large"
case .whisperIvritV3Turbo: return "Whisper Hebrew (ivrit.ai Turbo)"
}
}

Expand All @@ -3577,6 +3579,8 @@ final class SettingsStore: ObservableObject {
case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH"
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
return "99 Languages"
case .whisperIvritV3Turbo:
return "Hebrew"
}
}

Expand All @@ -3598,6 +3602,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return "~1.5 GB"
case .whisperLargeTurbo: return "~1.6 GB"
case .whisperLarge: return "~2.9 GB"
case .whisperIvritV3Turbo: return "~1.6 GB"
}
}

Expand All @@ -3624,6 +3629,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return "ggml-medium.bin"
case .whisperLargeTurbo: return "ggml-large-v3-turbo.bin"
case .whisperLarge: return "ggml-large-v3.bin"
case .whisperIvritV3Turbo: return "ggml-ivrit-v3-turbo.bin"
default: return nil
}
}
Expand All @@ -3637,6 +3643,29 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return "medium"
case .whisperLargeTurbo: return "large-v3-turbo"
case .whisperLarge: return "large-v3"
case .whisperIvritV3Turbo: return "ivrit-v3-turbo"
default: return nil
}
}

/// Remote URL to download the ggml model from. Standard Whisper models live in
/// ggerganov/whisper.cpp; the ivrit.ai Hebrew model lives in its own HF repo under a
/// generic filename, so the remote path is decoupled from the local cache filename.
var whisperDownloadURL: URL? {
guard let file = self.whisperModelFile else { return nil }
switch self {
case .whisperIvritV3Turbo:
return URL(string: "https://huggingface.co/ivrit-ai/whisper-large-v3-turbo-ggml/resolve/main/ggml-model.bin")
default:
return URL(string: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(file)")
}
}

/// Forces the whisper.cpp decode language for language-specialized models.
/// Returns a WhisperLanguage raw value (e.g. "iw" for Hebrew), or nil to auto-detect.
var forcedWhisperLanguageCode: String? {
switch self {
case .whisperIvritV3Turbo: return "iw" // SwiftWhisper's WhisperLanguage.hebrew rawValue
default: return nil
}
}
Expand Down Expand Up @@ -3717,6 +3746,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return "Medium Quality"
case .whisperLargeTurbo: return "Higher Quality but Faster"
case .whisperLarge: return "Maximum Accuracy"
case .whisperIvritV3Turbo: return "Hebrew - ivrit.ai"
}
}

Expand Down Expand Up @@ -3758,6 +3788,8 @@ final class SettingsStore: ObservableObject {
return "Near-maximum accuracy with optimized speed."
case .whisperLarge:
return "Best possible accuracy. Large download and memory usage."
case .whisperIvritV3Turbo:
return "Hebrew-specialized Whisper Large v3 Turbo, fine-tuned by ivrit.ai on real Israeli speech. Best accuracy for Hebrew and mixed Hebrew-English dictation."
}
}

Expand Down Expand Up @@ -3786,6 +3818,8 @@ final class SettingsStore: ObservableObject {
return 8.0
case .whisperLarge:
return 10.0 // Large model needs ~6-8GB working memory + model size
case .whisperIvritV3Turbo:
return 8.0 // large-v3-turbo class model (~1.6 GB) + working memory
}
}

Expand Down Expand Up @@ -3823,6 +3857,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return 2
case .whisperLargeTurbo: return 3
case .whisperLarge: return 1
case .whisperIvritV3Turbo: return 3
}
}

Expand All @@ -3844,6 +3879,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return 4
case .whisperLargeTurbo: return 5
case .whisperLarge: return 5
case .whisperIvritV3Turbo: return 5
}
}

Expand All @@ -3865,6 +3901,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return 0.40
case .whisperLargeTurbo: return 0.65
case .whisperLarge: return 0.20
case .whisperIvritV3Turbo: return 0.65
}
}

Expand All @@ -3886,6 +3923,7 @@ final class SettingsStore: ObservableObject {
case .whisperMedium: return 0.80
case .whisperLargeTurbo: return 0.95
case .whisperLarge: return 1.00
case .whisperIvritV3Turbo: return 0.95
}
}

Expand Down Expand Up @@ -3926,7 +3964,7 @@ final class SettingsStore: ObservableObject {
/// Large Whisper models are too slow for streaming, so they only do final transcription on stop.
var supportsStreaming: Bool {
switch self {
case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge, .whisperIvritV3Turbo:
return false // Too slow for real-time chunk processing
default:
return true // All other models support streaming
Expand Down Expand Up @@ -3983,7 +4021,7 @@ final class SettingsStore: ObservableObject {
return .qwen
case .cohereTranscribeSixBit:
return .cohere
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge, .whisperIvritV3Turbo:
return .openai
}
}
Expand Down Expand Up @@ -4082,6 +4120,8 @@ final class SettingsStore: ObservableObject {
return "Apple"
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
return "OpenAI"
case .whisperIvritV3Turbo:
return "ivrit.ai"
}
}

Expand All @@ -4106,6 +4146,8 @@ final class SettingsStore: ObservableObject {
return "#A2AAAD" // Apple Gray
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
return "#10A37F" // OpenAI Teal
case .whisperIvritV3Turbo:
return "#0B6E4F" // ivrit.ai green
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions Sources/Fluid/Persistence/VoiceEngineLanguageCatalog.swift
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ enum VoiceEngineLanguageCatalog {
private static func routeCandidates(for language: VoiceEngineLanguage) -> [VoiceEngineLanguageRoute] {
var routes: [VoiceEngineLanguageRoute] = []

// Hebrew-specialized Whisper model (ivrit.ai). Listed first so it is the
// recommended engine when Hebrew is selected. The model forces decode language to
// Hebrew internally (see SpeechModel.forcedWhisperLanguageCode).
if language.id == "he" {
routes.append(Self.route(language, .whisperIvritV3Turbo, .whisper(languageCode: "iw")))
}

if language.id == "en" {
routes.append(Self.route(language, .parakeetTDTv2, .automatic))
routes.append(Self.route(language, .parakeetRealtime, .automatic))
Expand Down
24 changes: 19 additions & 5 deletions Sources/Fluid/Services/WhisperProvider.swift
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ final class WhisperProvider: TranscriptionProvider {
return 1000 * 1024 * 1024
// case "ggml-large-v3-turbo.bin": // buggy - so removed temporarily
// return 1200 * 1024 * 1024
case "ggml-ivrit-v3-turbo.bin":
return 1200 * 1024 * 1024
case "ggml-large-v3.bin":
return 2000 * 1024 * 1024
default:
Expand Down Expand Up @@ -170,7 +172,17 @@ final class WhisperProvider: TranscriptionProvider {

// Load the model
DebugLogger.shared.info("WhisperProvider: Loading Whisper model...", source: "WhisperProvider")
self.whisper = Whisper(fromFileURL: self.modelURL)
let whisperInstance = Whisper(fromFileURL: self.modelURL)

// Language-specialized models (e.g. ivrit.ai Hebrew) force the decode language so
// whisper.cpp does not fall back to auto-detection on short or accented audio.
if let languageCode = targetModel.forcedWhisperLanguageCode,
let language = WhisperLanguage(rawValue: languageCode)
{
whisperInstance.params.language = language
DebugLogger.shared.info("WhisperProvider: Forcing decode language to \(languageCode)", source: "WhisperProvider")
}
self.whisper = whisperInstance

self.loadedModelName = currentModelName
self.isReady = true
Expand Down Expand Up @@ -259,16 +271,18 @@ final class WhisperProvider: TranscriptionProvider {
// MARK: - Model Download

private func downloadModel(progressHandler: ((Double) -> Void)?) async throws {
// Whisper models are hosted on Hugging Face
let modelURLString = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(modelName)"

guard let url = URL(string: modelURLString) else {
// Whisper models are hosted on Hugging Face. Most live in ggerganov/whisper.cpp, but
// language-specialized models (e.g. ivrit.ai Hebrew) ship from their own repo, so the
// remote URL is resolved per-model rather than built from the local filename.
let targetModel = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel
guard let url = targetModel.whisperDownloadURL else {
throw NSError(
domain: "WhisperProvider",
code: -1,
userInfo: [NSLocalizedDescriptionKey: "Invalid model URL"]
)
}
let modelURLString = url.absoluteString

DebugLogger.shared.info("WhisperProvider: Downloading from \(modelURLString)", source: "WhisperProvider")

Expand Down
2 changes: 2 additions & 0 deletions Sources/Fluid/UI/WelcomeView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2313,6 +2313,8 @@ struct OnboardingFlowView: View {
return "Nemotron Offline"
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLarge:
return "Whisper"
case .whisperIvritV3Turbo:
return "ivrit.ai · Whisper v3 Turbo"
default:
return model.displayName
}
Expand Down
72 changes: 72 additions & 0 deletions Tests/FluidDictationIntegrationTests/IvritHebrewModelTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// ABOUTME: Tests for the ivrit.ai Hebrew Whisper model wiring (metadata + catalog routing).
// ABOUTME: Verifies Hebrew routes to the ivrit model first and forces Hebrew decoding.
import Foundation
import XCTest

@testable import FluidVoice_Debug

final class IvritHebrewModelTests: XCTestCase {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Add this test file to the Xcode test target

This new test class is never compiled or run by the configured test scheme: I checked Fluid.xcodeproj/project.pbxproj, and the FluidDictationIntegrationTests PBXSourcesBuildPhase still lists only AudioFixtureLoader.swift and DictationE2ETests.swift, with no file reference/build file for IvritHebrewModelTests.swift. Because this repo's test target is manually enumerated rather than file-system-synchronized, the ivrit routing/metadata checks added here provide no CI coverage until the file is added to the target.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed in 39232b7. Added IvritHebrewModelTests.swift to the FluidDictationIntegrationTests target (file reference + build file + group membership + Sources build phase), mirroring DictationE2ETests.swift, so the routing/metadata checks are now compiled and run by the test scheme.

private typealias Model = SettingsStore.SpeechModel

// MARK: - Model metadata

func testIvritModelLocalFilenameIsDistinctFromGenericWhisper() {
XCTAssertEqual(Model.whisperIvritV3Turbo.whisperModelFile, "ggml-ivrit-v3-turbo.bin")
}

func testIvritModelIsTreatedAsWhisper() {
XCTAssertTrue(Model.whisperIvritV3Turbo.isWhisperModel)
}

func testIvritModelForcesHebrewDecodeLanguage() {
// "iw" is SwiftWhisper's WhisperLanguage.hebrew raw value (legacy ISO 639 code).
XCTAssertEqual(Model.whisperIvritV3Turbo.forcedWhisperLanguageCode, "iw")
}

func testIvritModelDownloadsFromIvritAIRepoNotGgerganov() {
let url = Model.whisperIvritV3Turbo.whisperDownloadURL
XCTAssertNotNil(url)
XCTAssertEqual(url?.host, "huggingface.co")
XCTAssertTrue(url?.path.contains("ivrit-ai") ?? false, "Expected ivrit.ai HF repo, got \(String(describing: url))")
}

func testGenericWhisperStillDownloadsFromGgerganov() {
let url = Model.whisperSmall.whisperDownloadURL
XCTAssertTrue(url?.path.contains("ggerganov/whisper.cpp") ?? false)
}

// MARK: - Catalog routing

func testHebrewRoutesToIvritModelWhenAvailable() {
let routes = VoiceEngineLanguageCatalog.routes(
forLanguageID: "he",
availableModels: [.whisperIvritV3Turbo]
)
XCTAssertTrue(
routes.contains { $0.model == .whisperIvritV3Turbo },
"Hebrew should produce an ivrit.ai route when the model is available"
)
}

func testIvritModelIsTheRecommendedHebrewEngine() {
// When several Hebrew-capable engines are available, the ivrit model should rank first.
let routes = VoiceEngineLanguageCatalog.routes(
forLanguageID: "he",
availableModels: [.whisperSmall, .nemotronOffline, .whisperIvritV3Turbo, .appleSpeech]
)
XCTAssertEqual(routes.first?.model, .whisperIvritV3Turbo)
}

func testEnglishDoesNotRouteToIvritModel() {
let routes = VoiceEngineLanguageCatalog.routes(
forLanguageID: "en",
availableModels: Model.allCases
)
XCTAssertFalse(routes.contains { $0.model == .whisperIvritV3Turbo })
}

func testHebrewLanguageIsListedWhenIvritModelAvailable() {
let languages = VoiceEngineLanguageCatalog.allLanguages(availableModels: [.whisperIvritV3Turbo])
XCTAssertTrue(languages.contains { $0.id == "he" })
}
}