From 5b9e072d0b06cf6f64a19e5c3280216077b31065 Mon Sep 17 00:00:00 2001 From: Alexey Masolov Date: Mon, 16 Mar 2026 17:01:35 +1100 Subject: [PATCH] fix: respect HF_HUB_OFFLINE in download_model to avoid network calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When HF_HUB_OFFLINE is set to a truthy value (1, true, yes, on), download_model() should treat local_files_only=True to avoid any network calls. Currently, even with the local-cache-first pass (which may fail due to missing metadata), the retry loop still calls download_files_from_huggingface() without local_files_only, which triggers model_info() — a network API call that immediately fails in offline mode. This causes an unnecessary fallback to GCS download from storage.googleapis.com. By setting local_files_only=True when HF_HUB_OFFLINE is enabled: 1. The HF local cache pass works if the model is cached 2. The retry loop skips the network-dependent HF path entirely 3. retrieve_model_gcs() only checks for local fast-* directories 4. No network calls are attempted at all The truthy value check aligns with huggingface_hub's own parsing of HF_HUB_OFFLINE, which accepts "1", "true", "yes", "on" (case-insensitive). This is critical for air-gapped / restricted environments where both HuggingFace and Google Cloud Storage are unreachable. Made-with: Cursor --- fastembed/common/model_management.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fastembed/common/model_management.py b/fastembed/common/model_management.py index 5301def80..bfcf27cec 100644 --- a/fastembed/common/model_management.py +++ b/fastembed/common/model_management.py @@ -395,6 +395,10 @@ def download_model(cls, model: T, cache_dir: str, retries: int = 3, **kwargs: An Path: The path to the downloaded model directory. """ local_files_only = kwargs.get("local_files_only", False) + hf_offline = os.environ.get("HF_HUB_OFFLINE", "").strip().upper() + if not local_files_only and hf_offline in {"1", "TRUE", "YES", "ON"}: + local_files_only = True + kwargs["local_files_only"] = True specific_model_path: str | None = kwargs.pop("specific_model_path", None) if specific_model_path: return Path(specific_model_path)