Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions chromadb/utils/embedding_functions/text2vec_embedding_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,24 @@ def __init__(self, model_name: str = "shibing624/text2vec-base-chinese"):
model_name (str, optional): The name of the model to use for text embeddings.
Defaults to "shibing624/text2vec-base-chinese".
"""
try:
from text2vec import SentenceModel
except ImportError:
# Move the import out of the try block for clearer error handling and potentially faster repeated initializations
import importlib

if importlib.util.find_spec("text2vec") is None:
raise ValueError(
"The text2vec python package is not installed. Please install it with `pip install text2vec`"
)
from text2vec import SentenceModel

self.model_name = model_name
self._model = SentenceModel(model_name_or_path=model_name)
# SentenceModel loading can be expensive if repeatedly called with the same model_name,
# so reuse the model across instances if possible. Here we use a class-level cache.
if not hasattr(self.__class__, "_model_cache"):
self.__class__._model_cache = {}
model_cache = self.__class__._model_cache
if model_name not in model_cache:
model_cache[model_name] = SentenceModel(model_name_or_path=model_name)
self._model = model_cache[model_name]

def __call__(self, input: Documents) -> Embeddings:
"""
Expand Down