diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py index 1e9978f75..1ad498909 100644 --- a/fastembed/text/onnx_embedding.py +++ b/fastembed/text/onnx_embedding.py @@ -180,6 +180,20 @@ sources=ModelSource(hf="jinaai/jina-clip-v1"), model_file="onnx/text_model.onnx", ), + DenseModelDescription( + model="BAAI/bge-m3", + dim=1024, + description=( + "Text embeddings, Unimodal (text), Multilingual (100+ languages), " + "8192 input tokens truncation, " + "versatility in Multi-Functionality, Multi-Linguality, and Multi-Granularity." + ), + license="mit", + size_in_GB=2.27, + sources=ModelSource(hf="BAAI/bge-m3"), + model_file="onnx/model.onnx", + additional_files=["onnx/model.onnx_data", "onnx/sentencepiece.bpe.model"], + ), ] diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py index e919faf9d..11aa90a6f 100644 --- a/tests/test_text_onnx_embeddings.py +++ b/tests/test_text_onnx_embeddings.py @@ -68,6 +68,7 @@ "Qdrant/clip-ViT-B-32-text": np.array([0.0083, 0.0103, -0.0138, 0.0199, -0.0069]), "thenlper/gte-base": np.array([0.0038, 0.0355, 0.0181, 0.0092, 0.0654]), "jinaai/jina-clip-v1": np.array([-0.0862, -0.0101, -0.0056, 0.0375, -0.0472]), + "BAAI/bge-m3": np.array([-0.0404, 0.037, -0.029, 0.0161, -0.0357]), } MULTI_TASK_MODELS = ["jinaai/jina-embeddings-v3"]