feat: add embedding device support + fix DefaultEmbeddingFunction regression

FabioLissi · FabioLissi · commit 5cf62eee61eb · 2026-04-09T18:20:39.000-05:00
Builds on top of MemPalace#442 to add two improvements: 1. Expose embedding device via MEMPALACE_EMBEDDING_DEVICE env var / embedding_device config key. This lets Apple Silicon users set device='mps' and NVIDIA users set device='cuda' to dramatically speed up embedding generation during mempalace mine (5-15x measured on M-series). 2. Ergonomic default: setting only MEMPALACE_EMBEDDING_DEVICE automatically uses sentence-transformers/all-MiniLM-L6-v2 (same weights as ChromaDB's default ONNX embedder), so users don't have to know the model name to get GPU acceleration, and existing palaces remain vector-compatible. 3. Fix a regression in MemPalace#442: when no model is configured, get_embedding_function() used to return None, which newer ChromaDB rejects with 'You must provide an embedding function' at collection.add() time. Now returns ChromaDB's DefaultEmbeddingFunction() explicitly, restoring the pre-MemPalace#442 default behavior and making tests/test_convo_miner.py pass again. All 552 tests pass, including 7 new tests covering: - embedding_device property reads from env var and config.json - device is passed through to SentenceTransformerEmbeddingFunction when set - device alone activates the default model - device is NOT passed when unset (preserves original MemPalace#442 call signature) - device can be set via config.json independent of model
diff --git a/mempalace/config.py b/mempalace/config.py
@@ -208,6 +208,20 @@ def embedding_model(self):
             return env_val
         return self._file_config.get("embedding_model", None)
 
+    @property
+    def embedding_device(self):
+        """Configured embedding device ('cpu', 'mps', 'cuda', ...) or None.
+
+        When None, ``SentenceTransformerEmbeddingFunction`` picks its own
+        default (CPU).  Setting this to ``'mps'`` on Apple Silicon or
+        ``'cuda'`` on NVIDIA GPUs can dramatically speed up embedding
+        generation during ``mempalace mine``.
+        """
+        env_val = os.environ.get("MEMPALACE_EMBEDDING_DEVICE")
+        if env_val:
+            return env_val
+        return self._file_config.get("embedding_device", None)
+
     def save_people_map(self, people_map):
         """Write people_map.json to config directory.
 
@@ -225,15 +239,33 @@ def save_people_map(self, people_map):
 _embedding_function = None
 _embedding_function_resolved = False
 
+# Default model used when a device is explicitly requested but no model name
+# is configured.  This is the same underlying model ChromaDB uses by default
+# (via its ONNX runtime), so vectors remain compatible with existing palaces.
+_DEFAULT_MODEL_FOR_DEVICE = "sentence-transformers/all-MiniLM-L6-v2"
 
-def get_embedding_function(config=None):
-    """Return the configured ChromaDB embedding function, or None for default.
 
-    Checks MEMPALACE_EMBEDDING_MODEL env var first, then config.json
-    ``embedding_model`` key.  When a model name is found, attempts to import
-    ``SentenceTransformerEmbeddingFunction`` from chromadb.  If
-    sentence-transformers is not installed the import will fail and we fall
-    back to None (ChromaDB's built-in default), logging a warning.
+def get_embedding_function(config=None):
+    """Return the configured ChromaDB embedding function.
+
+    Resolution order:
+
+    1. If ``MEMPALACE_EMBEDDING_MODEL`` / ``embedding_model`` is set, use that
+       model via :class:`SentenceTransformerEmbeddingFunction`.
+    2. Else, if ``MEMPALACE_EMBEDDING_DEVICE`` / ``embedding_device`` is set
+       (e.g. ``'mps'``, ``'cuda'``), use the default model
+       ``sentence-transformers/all-MiniLM-L6-v2`` on that device.  This gives
+       Apple Silicon / NVIDIA users a GPU speedup without having to think
+       about model names, while staying vector-compatible with ChromaDB's
+       default ONNX embedder (same underlying weights).
+    3. Else, return ChromaDB's built-in ``DefaultEmbeddingFunction`` (ONNX
+       MiniLM on CPU).  Newer ChromaDB versions require an explicit embedding
+       function at collection creation time, so returning ``None`` here would
+       break ``collection.add()`` calls.
+
+    When a model is resolved, the configured device (if any) is passed through
+    to ``SentenceTransformerEmbeddingFunction``.  If ``sentence-transformers``
+    isn't installed, we log a warning and fall back to ChromaDB's default.
 
     The result is cached so the function is only resolved once per process.
     """
@@ -245,19 +277,49 @@ def get_embedding_function(config=None):
 
     cfg = config or MempalaceConfig()
     model_name = cfg.embedding_model
+    device = cfg.embedding_device
+
+    # Ergonomic default: if the user asked for a device but didn't pick a
+    # model, use the same model ChromaDB uses by default so vectors stay
+    # compatible with existing palaces.
+    if not model_name and device:
+        model_name = _DEFAULT_MODEL_FOR_DEVICE
+
     if not model_name:
-        return None
+        # No explicit configuration — use ChromaDB's default embedder.
+        # We must return a real callable (not None), because newer ChromaDB
+        # versions reject `embedding_function=None` at collection.add() time.
+        try:
+            from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
+
+            _embedding_function = DefaultEmbeddingFunction()
+        except Exception:
+            _embedding_function = None
+        return _embedding_function
 
     try:
         from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 
-        _embedding_function = SentenceTransformerEmbeddingFunction(model_name=model_name)
-        logger.info("Using embedding model: %s", model_name)
+        kwargs = {"model_name": model_name}
+        if device:
+            kwargs["device"] = device
+
+        _embedding_function = SentenceTransformerEmbeddingFunction(**kwargs)
+        logger.info(
+            "Using embedding model: %s (device=%s)",
+            model_name,
+            device or "default",
+        )
     except Exception:
         logger.warning(
             "sentence-transformers not installed — falling back to ChromaDB default. "
             "Install with: pip install mempalace[multilingual]"
         )
-        _embedding_function = None
+        try:
+            from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
+
+            _embedding_function = DefaultEmbeddingFunction()
+        except Exception:
+            _embedding_function = None
 
     return _embedding_function
diff --git a/tests/test_multilingual.py b/tests/test_multilingual.py
@@ -33,19 +33,23 @@ def config_dir(tmp_path):
 
 
 class TestGetEmbeddingFunctionDefault:
-    """When no model is configured, get_embedding_function returns None."""
+    """When no model is configured, get_embedding_function returns ChromaDB's default."""
 
-    def test_returns_none_no_config(self, tmp_path):
+    def test_returns_default_no_config(self, tmp_path):
         config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
         result = get_embedding_function(config=config)
-        assert result is None
+        # Must not be None — newer ChromaDB requires an explicit callable
+        # so that `collection.add()` can compute embeddings.
+        assert result is not None
+        assert callable(result)
 
-    def test_returns_none_empty_config(self, config_dir):
+    def test_returns_default_empty_config(self, config_dir):
         config_file = config_dir / "config.json"
         config_file.write_text("{}")
         config = MempalaceConfig(config_dir=str(config_dir))
         result = get_embedding_function(config=config)
-        assert result is None
+        assert result is not None
+        assert callable(result)
 
 
 class TestGetEmbeddingFunctionEnvVar:
@@ -117,7 +121,7 @@ def test_config_file_model(self, config_dir):
 class TestGetEmbeddingFunctionFallback:
     """Graceful fallback when sentence-transformers is not installed."""
 
-    def test_import_error_returns_none(self, config_dir):
+    def test_import_error_falls_back_to_default(self, config_dir):
         config_file = config_dir / "config.json"
         config_file.write_text(json.dumps({"embedding_model": "some-model"}))
         config = MempalaceConfig(config_dir=str(config_dir))
@@ -128,7 +132,9 @@ def test_import_error_returns_none(self, config_dir):
         ):
             result = get_embedding_function(config=config)
 
-        assert result is None
+        # Falls back to ChromaDB's DefaultEmbeddingFunction, not None
+        assert result is not None
+        assert callable(result)
 
 
 class TestGetEmbeddingFunctionCaching:
@@ -153,12 +159,14 @@ def test_caches_result(self, config_dir):
         # Constructor called only once due to caching
         assert mock_st_cls.call_count == 1
 
-    def test_caches_none_result(self, tmp_path):
+    def test_caches_default_result(self, tmp_path):
+        """The default embedding function is also cached between calls."""
         config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
         result1 = get_embedding_function(config=config)
         result2 = get_embedding_function(config=config)
-        assert result1 is None
-        assert result2 is None
+        # Same instance returned (cached), and never None
+        assert result1 is result2
+        assert result1 is not None
 
 
 class TestEmbeddingModelProperty:
@@ -180,3 +188,130 @@ def test_env_var_overrides(self, config_dir):
         config = MempalaceConfig(config_dir=str(config_dir))
         with patch.dict(os.environ, {"MEMPALACE_EMBEDDING_MODEL": "env-model"}):
             assert config.embedding_model == "env-model"
+
+
+class TestEmbeddingDeviceProperty:
+    """MempalaceConfig.embedding_device property."""
+
+    def test_returns_none_by_default(self, tmp_path):
+        config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
+        assert config.embedding_device is None
+
+    def test_reads_from_config_file(self, config_dir):
+        config_file = config_dir / "config.json"
+        config_file.write_text(json.dumps({"embedding_device": "mps"}))
+        config = MempalaceConfig(config_dir=str(config_dir))
+        assert config.embedding_device == "mps"
+
+    def test_env_var_overrides(self, config_dir):
+        config_file = config_dir / "config.json"
+        config_file.write_text(json.dumps({"embedding_device": "cpu"}))
+        config = MempalaceConfig(config_dir=str(config_dir))
+        with patch.dict(os.environ, {"MEMPALACE_EMBEDDING_DEVICE": "mps"}):
+            assert config.embedding_device == "mps"
+
+
+class TestGetEmbeddingFunctionDevice:
+    """MEMPALACE_EMBEDDING_DEVICE controls the device passed to the embedder."""
+
+    def test_device_passed_to_embedder_with_explicit_model(self, tmp_path):
+        """When both model and device are set, both are passed through."""
+        mock_ef = MagicMock()
+        mock_st_cls = MagicMock(return_value=mock_ef)
+        config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
+
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "MEMPALACE_EMBEDDING_MODEL": "intfloat/multilingual-e5-base",
+                    "MEMPALACE_EMBEDDING_DEVICE": "mps",
+                },
+            ),
+            patch(
+                "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
+                mock_st_cls,
+            ),
+        ):
+            result = get_embedding_function(config=config)
+
+        assert result is mock_ef
+        mock_st_cls.assert_called_once_with(
+            model_name="intfloat/multilingual-e5-base", device="mps"
+        )
+
+    def test_device_alone_activates_default_model(self, tmp_path):
+        """Setting only the device should trigger the default model on that device.
+
+        This is the ergonomic path for Apple Silicon / CUDA users: they
+        don't need to know the model name, just the device.
+        """
+        mock_ef = MagicMock()
+        mock_st_cls = MagicMock(return_value=mock_ef)
+        config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
+
+        with (
+            patch.dict(os.environ, {"MEMPALACE_EMBEDDING_DEVICE": "mps"}),
+            patch(
+                "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
+                mock_st_cls,
+            ),
+        ):
+            result = get_embedding_function(config=config)
+
+        assert result is mock_ef
+        mock_st_cls.assert_called_once_with(
+            model_name="sentence-transformers/all-MiniLM-L6-v2", device="mps"
+        )
+
+    def test_no_device_no_kwarg(self, tmp_path, monkeypatch):
+        """When no device is set, ``device`` is NOT passed as a kwarg.
+
+        This preserves backward compatibility with the original PR #442
+        behavior where only ``model_name`` was passed.
+        """
+        mock_ef = MagicMock()
+        mock_st_cls = MagicMock(return_value=mock_ef)
+        config = MempalaceConfig(config_dir=str(tmp_path / "empty"))
+
+        monkeypatch.setenv("MEMPALACE_EMBEDDING_MODEL", "some-model")
+        monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
+
+        with patch(
+            "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
+            mock_st_cls,
+        ):
+            result = get_embedding_function(config=config)
+
+        assert result is mock_ef
+        mock_st_cls.assert_called_once_with(model_name="some-model")
+
+    def test_device_from_config_file(self, config_dir, monkeypatch):
+        """Device can be set via config.json instead of env var."""
+        config_file = config_dir / "config.json"
+        config_file.write_text(
+            json.dumps(
+                {
+                    "embedding_model": "intfloat/multilingual-e5-base",
+                    "embedding_device": "cuda",
+                }
+            )
+        )
+        config = MempalaceConfig(config_dir=str(config_dir))
+
+        mock_ef = MagicMock()
+        mock_st_cls = MagicMock(return_value=mock_ef)
+
+        monkeypatch.delenv("MEMPALACE_EMBEDDING_MODEL", raising=False)
+        monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
+
+        with patch(
+            "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
+            mock_st_cls,
+        ):
+            result = get_embedding_function(config=config)
+
+        assert result is mock_ef
+        mock_st_cls.assert_called_once_with(
+            model_name="intfloat/multilingual-e5-base", device="cuda"
+        )