diff --git a/Sources/Fluid/Networking/ModelDownloader.swift b/Sources/Fluid/Networking/ModelDownloader.swift
index e7b067a9..8c817e19 100644
--- a/Sources/Fluid/Networking/ModelDownloader.swift
+++ b/Sources/Fluid/Networking/ModelDownloader.swift
@@ -104,13 +104,13 @@ final class HuggingFaceModelDownloader {
                 let files = try await listFilesRecursively(relativePath: item.path)
                 for rel in files {
                     let dest = targetRoot.appendingPathComponent(rel)
-                    if FileManager.default.fileExists(atPath: dest.path) == false {
+                    if self.needsDownload(relativePath: rel, at: dest) {
                         pendingFiles.append(rel)
                     }
                 }
             } else {
                 let dest = targetRoot.appendingPathComponent(item.path)
-                if FileManager.default.fileExists(atPath: dest.path) == false {
+                if self.needsDownload(relativePath: item.path, at: dest) {
                     pendingFiles.append(item.path)
                 }
             }
@@ -170,6 +170,36 @@ final class HuggingFaceModelDownloader {
         self.requiredItemsList
     }
 
+    /// Decides whether `relativePath` needs to be (re)downloaded into `destination`.
+    ///
+    /// A file is pending when it is missing, OR when it is present but its cached content
+    /// looks like an HTML/markup payload — a corrupt artifact cached before download-time
+    /// content validation existed (see #353). `fileExists` alone would leave such a payload
+    /// stuck forever, because `downloadFile` (and its validator) only runs for pending files.
+    /// A present markup file is deleted here so a clean copy is fetched; on a read error the
+    /// file is left in place and treated as valid, so we never delete on uncertainty.
+    private func needsDownload(relativePath: String, at destination: URL) -> Bool {
+        guard FileManager.default.fileExists(atPath: destination.path) else {
+            return true
+        }
+        guard Self.cachedFileIsMarkup(at: destination) else {
+            return false
+        }
+        DebugLogger.shared.warning(
+            "[ModelDL] Cached file is an HTML/markup page, not model data; deleting to re-download: \(relativePath)",
+            source: "ModelDownloader"
+        )
+        do {
+            try FileManager.default.removeItem(at: destination)
+        } catch {
+            DebugLogger.shared.error(
+                "[ModelDL] Failed to delete corrupt cached file \(relativePath): \(error.localizedDescription)",
+                source: "ModelDownloader"
+            )
+        }
+        return true
+    }
+
     private func downloadDirectory(relativePath: String, to destination: URL) async throws {
         try FileManager.default.createDirectory(at: destination, withIntermediateDirectories: true)
 
@@ -196,6 +226,10 @@ final class HuggingFaceModelDownloader {
                         continuation.resume(throwing: NSError(domain: "HF", code: http.statusCode))
                         return
                     }
+                    // Reject HTML error/block pages (e.g. a corporate proxy returning its
+                    // notification page with HTTP 200) before persisting them as a model
+                    // file, otherwise a corrupt payload is cached permanently. See #353.
+                    try Self.validateDownloadedFile(at: tempUrl, response: response, relativePath: relativePath)
                     try FileManager.default.createDirectory(at: destination.deletingLastPathComponent(), withIntermediateDirectories: true)
                     if FileManager.default.fileExists(atPath: destination.path) {
                         try FileManager.default.removeItem(at: destination)
@@ -203,6 +237,8 @@ final class HuggingFaceModelDownloader {
                     try FileManager.default.moveItem(at: tempUrl, to: destination)
                     continuation.resume()
                 } catch {
+                    // Never leave a rejected/partial payload behind.
+                    try? FileManager.default.removeItem(at: tempUrl)
                     continuation.resume(throwing: error)
                 }
             }
@@ -213,6 +249,144 @@ final class HuggingFaceModelDownloader {
         }
     }
 
+    // MARK: - Content Validation
+
+    /// Validates a freshly-downloaded artifact before it is persisted as a model file.
+    ///
+    /// A network proxy / secure web gateway can return an HTML (or XML) block page with
+    /// HTTP 200 in place of the real file. Persisting that markup (e.g. as `coremldata.bin`)
+    /// permanently caches a corrupt model. We reject any payload that looks like HTML/XML
+    /// markup — by its `Content-Type` or by its leading bytes — since no model artifact
+    /// (CoreML binary, JSON vocab, `.mil`) is a markup document. See issue #353.
+    static func validateDownloadedFile(at fileURL: URL, response: URLResponse?, relativePath: String) throws {
+        if let http = response as? HTTPURLResponse,
+           let contentType = http.value(forHTTPHeaderField: "Content-Type") {
+            let lowered = contentType.lowercased()
+            if lowered.contains("text/html") || lowered.contains("text/xml") || lowered.contains("application/xml") {
+                throw Self.invalidContentError(
+                    relativePath: relativePath,
+                    detail: "the server returned a markup page (Content-Type: \(contentType))"
+                )
+            }
+        }
+
+        // Sniff the leading bytes in case markup was returned without a markup Content-Type.
+        // Read a small prefix only — model files can be gigabytes.
+        let handle = try FileHandle(forReadingFrom: fileURL)
+        defer { try? handle.close() }
+        let prefix = (try? handle.read(upToCount: 512)) ?? Data()
+        if Self.looksLikeHTML(prefix) {
+            throw Self.invalidContentError(
+                relativePath: relativePath,
+                detail: "the downloaded file is an HTML/markup document, not the expected model data"
+            )
+        }
+    }
+
+    /// Returns `true` if a file already on disk is an HTML/markup payload rather than real
+    /// model data — a corrupt artifact cached before download-time validation existed (#353).
+    ///
+    /// This is the cached-file analog of `validateDownloadedFile`'s byte-sniff: it reuses the
+    /// same `looksLikeHTML` check on a small leading prefix (model files can be gigabytes, so
+    /// only 512 bytes are read). There is no `URLResponse` for a cached file, so only the
+    /// content is inspected, not a `Content-Type`. Returns `false` (treat as valid) on any
+    /// read error, so an unreadable file is never deleted on uncertainty.
+    static func cachedFileIsMarkup(at fileURL: URL) -> Bool {
+        guard let handle = try? FileHandle(forReadingFrom: fileURL) else {
+            return false
+        }
+        defer { try? handle.close() }
+        let prefix = (try? handle.read(upToCount: 512)) ?? Data()
+        return Self.looksLikeHTML(prefix)
+    }
+
+    /// Returns `true` if any cached payload under `relativePaths` (each resolved against `root`)
+    /// is an HTML/markup document rather than real model data — the cached-*tree* analog of
+    /// `cachedFileIsMarkup`, intended for a provider preflight to call before trusting a present
+    /// cache and skipping the downloader. The downloader itself already re-validates each file via
+    /// `needsDownload`, but a preflight that returns on file-existence alone never reaches it, so a
+    /// corrupt-but-present cache would slip through (see #353).
+    ///
+    /// Each relative path may be a regular file or a directory (e.g. a `.mlpackage` bundle).
+    /// Directories are scanned recursively and every regular file inside is byte-sniffed with
+    /// `cachedFileIsMarkup`, reusing the single `looksLikeHTML` detector — there is no second
+    /// markup heuristic. Conservative on uncertainty, mirroring `cachedFileIsMarkup`: a path that
+    /// does not exist, a file that cannot be read, or a directory that cannot be enumerated is
+    /// skipped (treated as non-markup), so a valid cache is never reported corrupt. An empty
+    /// required directory therefore yields `false` here — its incompleteness is the existence
+    /// check's concern, not this markup check's.
+    static func cachedPayloadContainsMarkup(root: URL, relativePaths: [String]) -> Bool {
+        let fileManager = FileManager.default
+        for relativePath in relativePaths {
+            let url = root.appendingPathComponent(relativePath)
+            var isDirectory: ObjCBool = false
+            guard fileManager.fileExists(atPath: url.path, isDirectory: &isDirectory) else {
+                continue
+            }
+            if isDirectory.boolValue {
+                guard let enumerator = fileManager.enumerator(
+                    at: url,
+                    includingPropertiesForKeys: [.isRegularFileKey],
+                    options: [.skipsHiddenFiles]
+                ) else {
+                    continue
+                }
+                for case let fileURL as URL in enumerator {
+                    let isRegularFile = (try? fileURL.resourceValues(forKeys: [.isRegularFileKey]))?.isRegularFile ?? false
+                    guard isRegularFile else { continue }
+                    if Self.cachedFileIsMarkup(at: fileURL) {
+                        return true
+                    }
+                }
+            } else if Self.cachedFileIsMarkup(at: url) {
+                return true
+            }
+        }
+        return false
+    }
+
+    /// Returns `true` if `data` begins with an HTML / XML markup marker, ignoring a leading
+    /// UTF-8 BOM and ASCII whitespace.
+    ///
+    /// No artifact this downloader fetches legitimately begins with `<`: CoreML compiled
+    /// `.mlmodelc` / `.mlpackage` payloads are binary (`coremldata.bin`, `weights/weight.bin`,
+    /// `model.mlmodel` protobuf) or JSON (`metadata.json`, `Manifest.json`) starting with
+    /// `{` / `[`; the MIL program text (`model.mil`) starts with `program`; the vocab JSON
+    /// starts with `{`; and `tokenizer.model` is a SentencePiece binary. So any payload that,
+    /// after BOM + whitespace stripping, starts with `<` followed by a markup-ish byte is a
+    /// proxy/block page or a markup document standing in for the real file — reject it. This
+    /// catches `<!doctype`, `<html`, `<head>`, `<body>`, `<script>`, `<meta>`, comments
+    /// (`<!-- -->`) and XML / `<?xml` declarations, not just the two prefixes we used to
+    /// match. See issue #353.
+    static func looksLikeHTML(_ data: Data) -> Bool {
+        var bytes = [UInt8](data.prefix(512))
+        if bytes.starts(with: [0xEF, 0xBB, 0xBF]) {
+            bytes.removeFirst(3)
+        }
+        while let first = bytes.first,
+              first == 0x20 || first == 0x09 || first == 0x0A || first == 0x0D {
+            bytes.removeFirst()
+        }
+        // Must begin with `<` (0x3C)…
+        guard bytes.first == 0x3C, bytes.count >= 2 else {
+            return false
+        }
+        // …immediately followed by a markup-ish byte: an ASCII letter (a tag such as
+        // `<html`), `!` (0x21 — `<!doctype`, `<!--`), `?` (0x3F — `<?xml`), or `/` (0x2F —
+        // a stray closing tag). Requiring this second byte avoids over-rejecting a
+        // hypothetical text artifact that merely contains a stray `<` not followed by markup.
+        let second = bytes[1]
+        let isAsciiLetter = (second >= 0x41 && second <= 0x5A) || (second >= 0x61 && second <= 0x7A)
+        return isAsciiLetter || second == 0x21 || second == 0x3F || second == 0x2F
+    }
+
+    private static func invalidContentError(relativePath: String, detail: String) -> NSError {
+        NSError(domain: "HF", code: -3, userInfo: [
+            NSLocalizedDescriptionKey:
+                "Could not download \(relativePath): \(detail). A network proxy or firewall may be blocking model downloads.",
+        ])
+    }
+
     private final class DownloadProgressDelegate: NSObject, URLSessionDownloadDelegate {
         private let onProgress: ((Double) -> Void)?
         var onFinish: ((URL, URLResponse) -> Void)?
diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift
index 5fc50ba3..5f73603f 100644
--- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift
+++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift
@@ -230,6 +230,14 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider {
         try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
         let isManagedDirectory = Self.isAppManagedArtifactsDirectory(directory, spec: spec)
 
+        // `validateArtifacts` proves the required entries exist and the manifest JSON decodes, but
+        // it does NOT byte-check the `.mlpackage` binaries — a network proxy can have returned an
+        // HTML block page (HTTP 200) in place of one, persisting markup as a model file. Re-sniff
+        // the present artifacts so such a payload forces the downloader to run (it then deletes +
+        // re-fetches the corrupt files via `needsDownload`) instead of being trusted forever. The
+        // outdated-bundle-stamp refresh below is preserved and takes precedence: a stamp-stale
+        // managed cache is still fully removed even if it is also markup-corrupt, so the bundle
+        // is wholly refreshed rather than only the corrupt files re-fetched. See #353.
         if spec.validateArtifacts(at: directory) {
             if isManagedDirectory, self.artifactBundleStampMatches(spec: spec, directory: directory) == false {
                 DebugLogger.shared.warning(
@@ -238,6 +246,11 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider {
                 )
                 try FileManager.default.removeItem(at: directory)
                 try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
+            } else if Self.cachedArtifactsAreMarkupCorrupt(spec: spec, directory: directory) {
+                DebugLogger.shared.warning(
+                    "ExternalCoreML: cached artifacts for \(directory.lastPathComponent) contain an HTML/markup payload (corrupt); re-downloading",
+                    source: "ExternalCoreML"
+                )
             } else {
                 DebugLogger.shared.info(
                     "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)",
@@ -248,7 +261,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider {
             }
         }
 
-        if spec.validateArtifacts(at: directory) {
+        if spec.validateArtifacts(at: directory),
+           !Self.cachedArtifactsAreMarkupCorrupt(spec: spec, directory: directory) {
             DebugLogger.shared.info(
                 "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)",
                 source: "ExternalCoreML"
@@ -307,6 +321,19 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider {
         return directory.standardizedFileURL.path == defaultCacheDirectory.standardizedFileURL.path
     }
 
+    /// `true` if any required cached artifact is an HTML/markup payload instead of model data — a
+    /// corrupt cache a markup-blind `validateArtifacts` check would otherwise trust (#353). Reuses
+    /// the downloader's shared byte-sniff; conservative on read errors (never flags on uncertainty).
+    private static func cachedArtifactsAreMarkupCorrupt(
+        spec: ExternalCoreMLASRModelSpec,
+        directory: URL
+    ) -> Bool {
+        HuggingFaceModelDownloader.cachedPayloadContainsMarkup(
+            root: directory,
+            relativePaths: spec.requiredEntries
+        )
+    }
+
     private static func makeError(_ description: String) -> NSError {
         NSError(
             domain: "ExternalCoreMLTranscriptionProvider",
diff --git a/Sources/Fluid/Services/NemotronProvider.swift b/Sources/Fluid/Services/NemotronProvider.swift
index 6a32dba9..49a9b6d7 100644
--- a/Sources/Fluid/Services/NemotronProvider.swift
+++ b/Sources/Fluid/Services/NemotronProvider.swift
@@ -88,13 +88,27 @@ final class NemotronProvider: TranscriptionProvider {
         }
         try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
 
-        if self.modelsExistOnDisk() {
+        // A file-existence check alone would trust a corrupt cache: a network proxy can have
+        // returned an HTML block page (HTTP 200) in place of a model file, persisting markup as
+        // e.g. a `.mlpackage` binary or `tokenizer.model`. Re-sniff the present artifacts so such
+        // a payload forces a re-download (the downloader then deletes + re-fetches the bad files
+        // via `needsDownload`) instead of being loaded as a model forever. See #353.
+        let modelsPresent = self.modelsExistOnDisk()
+        let cachedArtifactsCorrupt = modelsPresent
+            && HuggingFaceModelDownloader.cachedPayloadContainsMarkup(root: dir, relativePaths: self.requiredFiles)
+        if modelsPresent && !cachedArtifactsCorrupt {
             DebugLogger.shared.info(
                 "Nemotron: artifacts present at \(dir.path); skipping download",
                 source: "Nemotron"
             )
             progressHandler?(0.8)
         } else {
+            if cachedArtifactsCorrupt {
+                DebugLogger.shared.warning(
+                    "Nemotron: cached artifacts at \(dir.path) contain an HTML/markup payload (corrupt); re-downloading",
+                    source: "Nemotron"
+                )
+            }
             DebugLogger.shared.info(
                 "Nemotron: artifacts missing; downloading from \(self.repositoryOwner)/\(self.repositoryName)",
                 source: "Nemotron"
diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift
index 96c02372..86281924 100644
--- a/Sources/Fluid/UI/WelcomeView.swift
+++ b/Sources/Fluid/UI/WelcomeView.swift
@@ -1903,6 +1903,12 @@ struct OnboardingFlowView: View {
                 try await self.asr.ensureAsrReady()
             } catch {
                 DebugLogger.shared.error("Failed to prepare onboarding voice model \(route.model.displayName): \(error)", source: "OnboardingFlowView")
+                // Surface the failure in the UI instead of only logging it, so the user
+                // isn't stuck at a disabled button. The shared ContentView alert (bound to
+                // asr.showError) presents this during onboarding. See #355.
+                self.asr.errorTitle = "Voice Model Setup Failed"
+                self.asr.errorMessage = error.localizedDescription
+                self.asr.showError = true
             }
             await self.asr.checkIfModelsExistAsync()
         }
diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
index ec528172..3f97b50a 100644
--- a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
+++ b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
@@ -519,6 +519,178 @@ final class DictationE2ETests: XCTestCase {
         XCTAssertFalse(SimpleUpdater.isRollbackVersion(nil, differentFrom: "1.5.11-beta.3"))
     }
 
+    // MARK: - Model download HTML/markup rejection (#353)
+
+    func testLooksLikeHTML_rejectsMarkupVariants() {
+        // A proxy/block page or stand-in markup document must be rejected regardless of
+        // which markup token it opens with — not just <!doctype / <html.
+        let rejected = [
+            "<!DOCTYPE html><html lang=\"en\"><head></head></html>",
+            "<html><body>Blocked by corporate proxy</body></html>",
+            "<script>window.location='https://proxy'</script>",
+            "<head><title>Access Denied</title></head>",
+            "<body>Forbidden</body>",
+            "<meta http-equiv=\"refresh\" content=\"0\">",
+            "<!-- corporate gateway notice -->",
+            "<?xml version=\"1.0\" encoding=\"UTF-8\"?><error>blocked</error>",
+            "</html>",
+            "<!doctype HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">",
+        ]
+        for markup in rejected {
+            XCTAssertTrue(
+                HuggingFaceModelDownloader.looksLikeHTML(Data(markup.utf8)),
+                "Expected markup to be rejected: \(markup)"
+            )
+        }
+    }
+
+    func testLooksLikeHTML_rejectsLeadingWhitespaceAndBOMVariants() {
+        let bom: [UInt8] = [0xEF, 0xBB, 0xBF]
+
+        // Leading ASCII whitespace before the markup token.
+        XCTAssertTrue(HuggingFaceModelDownloader.looksLikeHTML(Data("   \n\t<!DOCTYPE html>".utf8)))
+        XCTAssertTrue(HuggingFaceModelDownloader.looksLikeHTML(Data("\r\n  <html>".utf8)))
+
+        // UTF-8 BOM, then markup.
+        XCTAssertTrue(HuggingFaceModelDownloader.looksLikeHTML(Data(bom + Array("<html>".utf8))))
+
+        // BOM, then whitespace, then an XML declaration.
+        XCTAssertTrue(
+            HuggingFaceModelDownloader.looksLikeHTML(Data(bom + Array("  \n<?xml version=\"1.0\"?>".utf8)))
+        )
+    }
+
+    func testLooksLikeHTML_acceptsModelArtifacts() {
+        // JSON object (vocab / metadata / Manifest) — note the embedded `<pad>` must NOT
+        // trip the detector; only a LEADING `<` does.
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data("{\"0\": \"<pad>\", \"1\": \"a\"}".utf8)))
+        // JSON array body.
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data("[1, 2, 3]".utf8)))
+        // MIL program text (`model.mil`).
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data("program(1.0)\n[buildInfo = ...]".utf8)))
+        // Binary CoreML / Mach-O magic prefix.
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data([0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00])))
+        // Leading-NUL binary (e.g. coremldata.bin / weight.bin style payloads).
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data([0x00, 0x00, 0x01, 0x3C, 0x68])))
+        // Empty payload.
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data()))
+        // A stray `<` NOT followed by a markup-ish byte must not be over-rejected.
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data("< not markup".utf8)))
+        XCTAssertFalse(HuggingFaceModelDownloader.looksLikeHTML(Data("<".utf8)))
+    }
+
+    func testValidateDownloadedFile_rejectsHTMLBodyAndAcceptsJSON() throws {
+        let dir = FileManager.default.temporaryDirectory
+            .appendingPathComponent("FluidVoice-ValidateTest-\(UUID().uuidString)", isDirectory: true)
+        try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
+        defer { try? FileManager.default.removeItem(at: dir) }
+
+        // HTML body written without an HTML Content-Type (response: nil) must still be
+        // rejected by the byte-sniff path.
+        let htmlURL = dir.appendingPathComponent("coremldata.bin")
+        try Data("<!DOCTYPE html><html><body>Blocked</body></html>".utf8).write(to: htmlURL)
+        XCTAssertThrowsError(
+            try HuggingFaceModelDownloader.validateDownloadedFile(
+                at: htmlURL,
+                response: nil,
+                relativePath: "coremldata.bin"
+            )
+        )
+
+        // A real JSON vocab payload must pass validation.
+        let jsonURL = dir.appendingPathComponent("parakeet_v3_vocab.json")
+        try Data("{\"0\": \"<pad>\", \"1\": \"the\"}".utf8).write(to: jsonURL)
+        XCTAssertNoThrow(
+            try HuggingFaceModelDownloader.validateDownloadedFile(
+                at: jsonURL,
+                response: nil,
+                relativePath: "parakeet_v3_vocab.json"
+            )
+        )
+    }
+
+    func testCachedFileIsMarkup_detectsCachedCorruptHTMLAndAcceptsModelData() throws {
+        // Guards the #353 cached-file path: a corrupt HTML payload already on disk (cached
+        // before download-time validation existed) must be detected so it is re-downloaded,
+        // while a real model artifact must not be flagged, and an unreadable path must be
+        // treated as valid (never deleted on uncertainty).
+        let dir = FileManager.default.temporaryDirectory
+            .appendingPathComponent("FluidVoice-CachedMarkupTest-\(UUID().uuidString)", isDirectory: true)
+        try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
+        defer { try? FileManager.default.removeItem(at: dir) }
+
+        // A cached HTML/proxy page persisted as a model file must be detected as markup.
+        let htmlURL = dir.appendingPathComponent("coremldata.bin")
+        try Data("<!DOCTYPE html><html><body>Blocked by proxy</body></html>".utf8).write(to: htmlURL)
+        XCTAssertTrue(HuggingFaceModelDownloader.cachedFileIsMarkup(at: htmlURL))
+
+        // A real JSON vocab payload must not be flagged.
+        let jsonURL = dir.appendingPathComponent("parakeet_v3_vocab.json")
+        try Data("{\"0\": \"<pad>\", \"1\": \"the\"}".utf8).write(to: jsonURL)
+        XCTAssertFalse(HuggingFaceModelDownloader.cachedFileIsMarkup(at: jsonURL))
+
+        // An unreadable / missing path must be treated as valid (conservative on read error).
+        let missingURL = dir.appendingPathComponent("does-not-exist.bin")
+        XCTAssertFalse(HuggingFaceModelDownloader.cachedFileIsMarkup(at: missingURL))
+    }
+
+    func testCachedPayloadContainsMarkup_detectsCorruptFileInPresentArtifactTree() throws {
+        // Guards the #353 provider-PREFLIGHT path: a corrupt HTML payload nested inside a
+        // present `.mlpackage` bundle (or a loose required file) must be detected so the preflight
+        // re-downloads instead of trusting a file-existence/manifest check, while a valid cached
+        // tree must not be flagged, and missing/empty required entries stay conservative.
+        let root = FileManager.default.temporaryDirectory
+            .appendingPathComponent("FluidVoice-CachedPayloadTest-\(UUID().uuidString)", isDirectory: true)
+        try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
+        defer { try? FileManager.default.removeItem(at: root) }
+
+        // A realistic `.mlpackage` layout: a JSON manifest plus a nested binary weight payload.
+        let packageName = "encoder.mlpackage"
+        let weightsDir = root.appendingPathComponent(packageName)
+            .appendingPathComponent("Data/com.apple.CoreML/weights", isDirectory: true)
+        try FileManager.default.createDirectory(at: weightsDir, withIntermediateDirectories: true)
+        let manifestURL = root.appendingPathComponent(packageName).appendingPathComponent("Manifest.json")
+        try Data("{\"fileFormatVersion\": \"1.0.0\"}".utf8).write(to: manifestURL)
+        let weightURL = weightsDir.appendingPathComponent("weight.bin")
+        try Data([0x00, 0x01, 0x02, 0x03, 0x04]).write(to: weightURL)
+
+        // A loose required file (e.g. a tokenizer) with real binary content.
+        let tokenizerURL = root.appendingPathComponent("tokenizer.model")
+        try Data([0x0A, 0x09, 0x05, 0x00]).write(to: tokenizerURL)
+
+        let entries = [packageName, "tokenizer.model"]
+
+        // An all-valid tree must not be flagged.
+        XCTAssertFalse(
+            HuggingFaceModelDownloader.cachedPayloadContainsMarkup(root: root, relativePaths: entries)
+        )
+
+        // A proxy HTML page persisted as a binary INSIDE the package must be detected.
+        try Data("<!DOCTYPE html><html><body>Blocked by proxy</body></html>".utf8).write(to: weightURL)
+        XCTAssertTrue(
+            HuggingFaceModelDownloader.cachedPayloadContainsMarkup(root: root, relativePaths: entries)
+        )
+
+        // Restore the binary; corrupt the loose required file instead — must still be detected.
+        try Data([0x00, 0x01, 0x02, 0x03, 0x04]).write(to: weightURL)
+        try Data("<html><head></head></html>".utf8).write(to: tokenizerURL)
+        XCTAssertTrue(
+            HuggingFaceModelDownloader.cachedPayloadContainsMarkup(root: root, relativePaths: entries)
+        )
+
+        // Missing entries and an empty required directory are conservative: never flagged corrupt
+        // on uncertainty (incompleteness is the existence check's concern, not this one's).
+        try Data([0x0A, 0x09, 0x05, 0x00]).write(to: tokenizerURL)
+        let emptyPackage = root.appendingPathComponent("empty.mlpackage", isDirectory: true)
+        try FileManager.default.createDirectory(at: emptyPackage, withIntermediateDirectories: true)
+        XCTAssertFalse(
+            HuggingFaceModelDownloader.cachedPayloadContainsMarkup(
+                root: root,
+                relativePaths: ["empty.mlpackage", "does-not-exist.json"]
+            )
+        )
+    }
+
     private static func modelDirectoryForRun() -> URL {
         // Use a stable path on CI so GitHub Actions cache can speed up runs.
         if ProcessInfo.processInfo.environment["GITHUB_ACTIONS"] == "true" ||