From faf02818e7c64bdf74c9181973f93ddf9de44bb1 Mon Sep 17 00:00:00 2001 From: lucifertrj Date: Thu, 5 Feb 2026 17:56:18 +0530 Subject: [PATCH 1/2] bgem3 embed support added --- fastembed/text/onnx_embedding.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py index 1e9978f75..1ad498909 100644 --- a/fastembed/text/onnx_embedding.py +++ b/fastembed/text/onnx_embedding.py @@ -180,6 +180,20 @@ sources=ModelSource(hf="jinaai/jina-clip-v1"), model_file="onnx/text_model.onnx", ), + DenseModelDescription( + model="BAAI/bge-m3", + dim=1024, + description=( + "Text embeddings, Unimodal (text), Multilingual (100+ languages), " + "8192 input tokens truncation, " + "versatility in Multi-Functionality, Multi-Linguality, and Multi-Granularity." + ), + license="mit", + size_in_GB=2.27, + sources=ModelSource(hf="BAAI/bge-m3"), + model_file="onnx/model.onnx", + additional_files=["onnx/model.onnx_data", "onnx/sentencepiece.bpe.model"], + ), ] From 92e87e9ff8a2ff02746ab0e07f492c523c9a9de3 Mon Sep 17 00:00:00 2001 From: lucifertrj Date: Thu, 5 Feb 2026 17:59:42 +0530 Subject: [PATCH 2/2] CANONICAL_VECTOR_VALUES for bge m3 --- tests/test_text_onnx_embeddings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py index e919faf9d..11aa90a6f 100644 --- a/tests/test_text_onnx_embeddings.py +++ b/tests/test_text_onnx_embeddings.py @@ -68,6 +68,7 @@ "Qdrant/clip-ViT-B-32-text": np.array([0.0083, 0.0103, -0.0138, 0.0199, -0.0069]), "thenlper/gte-base": np.array([0.0038, 0.0355, 0.0181, 0.0092, 0.0654]), "jinaai/jina-clip-v1": np.array([-0.0862, -0.0101, -0.0056, 0.0375, -0.0472]), + "BAAI/bge-m3": np.array([-0.0404, 0.037, -0.029, 0.0161, -0.0357]), } MULTI_TASK_MODELS = ["jinaai/jina-embeddings-v3"]