livekit · Piyussh01 · Jun 22, 2026 · Jun 22, 2026
@@ -0,0 +1,45 @@
+"""Semantic memory for an agent: pin fixed facts, store memories, retrieve fast.
+
+Run with the recommended extras for the real static embedder + ANN index:
+
+    uv run --with model2vec --with usearch python examples/memory.py
+
+Without those extras it falls back to the dependency-free HashingEmbedder (lexical
+only) and the exact brute-force index — still fully functional, just less semantic.
+"""
+
+from livekit.memory import MemoryStore
+
+try:
+    from livekit.memory import Model2VecEmbedder
+
+    embedder = Model2VecEmbedder()  # static, ~0.03ms/query, no transformer forward pass
+except ImportError:
+    embedder = None  # -> HashingEmbedder default
+    print("(model2vec not installed; using the dependency-free HashingEmbedder)\n")
+
+
+def main() -> None:
+    # One store per user/session. `expected_size` lets `auto` pick the HNSW backend.
+    store = MemoryStore(embedder=embedder, backend="auto", expected_size=1_000_000)
+
+    # "Fixed" facts about the user — pinned, always available to context().
+    store.upsert("name", "The user's name is Ada Lovelace.")
+    store.upsert("units", "The user prefers metric units and a 24-hour clock.")
+
+    # Free-form semantic memories accumulated over the conversation.
+    store.add("We talked about the analytical engine and Bernoulli numbers.")
+    store.add("The user is planning a trip to Turin next spring.")
+    store.add("The user dislikes phone calls and prefers async messages.")
+
+    # The latency-critical call an agent makes each turn: one prompt-ready string.
+    print("=== context() for 'what should I call them and any travel plans?' ===")
+    print(store.context("what should I call them and any travel plans?", limit=3))
+
+    print("\n=== search() ranked hits for 'communication preferences' ===")
+    for hit in store.search("how does the user like to communicate?", limit=3):
+        print(f"  {hit.score:+.3f}  {hit.text}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,72 @@
+# LiveKit Memory
+
+In-process, in-memory **semantic memory** for LiveKit agents — sub-10ms end-to-end
+retrieval (embed the query *and* search) of a user's fixed context, fully self-hosted.
+
+```shell
+pip install livekit-memory[recommended]   # static embedder + ANN index
+```
+
+## Why it's fast
+
+The hard constraint for a voice agent loop is the *end-to-end* budget: you hand it text,
+it must embed and search in under 10ms. A transformer embedding on CPU alone is ~10ms
+(p99 ~50ms) and blows that. The route here:
+
+- **Static embeddings (Model2Vec)** — token-lookup + mean-pool, no transformer forward
+  pass. ~0.03ms per short query on CPU.
+- **In-memory index** — exact brute-force cosine (sub-ms to ~100k vectors), automatically
+  upgrading to a [usearch](https://github.com/unum-cloud/usearch) HNSW graph for large
+  per-user corpora (~0.27ms search at 1M vectors).
+
+Measured on an Apple M4 Pro: **0.17ms median / 0.31ms p99 end-to-end at 1M vectors** —
+~30× under budget.
+
+## Usage
+
+```python
+from livekit.memory import MemoryStore, Model2VecEmbedder
+
+# one store per user / session
+store = MemoryStore(embedder=Model2VecEmbedder(), backend="auto", expected_size=1_000_000)
+
+# "fixed" facts about the user — pinned, always available
+store.upsert("name", "The user's name is Ada Lovelace.")
+store.upsert("units", "Prefers metric units.")
+
+# free-form semantic memories — ranked by relevance
+store.add("Discussed the analytical engine and Bernoulli numbers.", metadata={"session": 42})
+
+# the latency-critical call your agent makes each turn:
+context = store.context("what should I call them, and what did we talk about?")
+# -> a prompt-ready string with the pinned facts + top relevant memories
+
+# or rank directly:
+for hit in store.search("mathematics history", limit=5):
+    print(hit.score, hit.text)
+```
+
+### Bring your own embedder
+
+`embedder` accepts an `Embedder`, any batched `list[str] -> vectors` callable (pass
+`dims=`), or `None` for the dependency-free `HashingEmbedder` (deterministic, no model
+download — good for tests/offline). For higher recall at the cost of latency, an ONNX
+transformer embedder can be wrapped via `CallableEmbedder`.
+
+### Persistence
+
+`store.save(dir)` / `MemoryStore.load(dir, embedder=...)` snapshot items and both indices
+to disk (the same embedder must be supplied on load).
+
+## Backends
+
+| Backend | When | Latency (384d) |
+|---|---|---|
+| `bruteforce` (default) | ≲100k vectors / user | exact, ~1.5ms @ 100k |
+| `usearch` | ≳100k vectors / user | HNSW, ~0.27ms @ 1M |
+| `auto` | picks per `expected_size` | — |
+
+`usearch` is an optional dependency (`pip install livekit-memory[ann]`); without it,
+`auto` stays on exact brute force.
+
+See https://docs.livekit.io for more information.
@@ -0,0 +1,60 @@
+# Copyright 2024 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""LiveKit Memory — in-process semantic memory for agents.
+
+`pip install livekit-memory[recommended]`
+
+Sub-10ms end-to-end (embed query + ANN search) retrieval of a user's fixed context,
+fully self-hosted. The default route is a static Model2Vec embedder (no transformer
+forward pass) plus an in-memory index (exact brute-force, upgrading to a usearch HNSW
+graph for large per-user corpora).
+
+    from livekit.memory import MemoryStore, Model2VecEmbedder
+
+    store = MemoryStore(embedder=Model2VecEmbedder())
+    store.upsert("name", "The user's name is Ada.")          # a fixed fact
+    store.add("They prefer metric units and dark mode.")      # semantic memory
+    ctx = store.context("what should I call them?")           # prompt-ready string
+"""
+
+from ._index import ANN_CROSSOVER, BruteForceIndex, UsearchIndex, VectorIndex
+from ._types import DEFAULT_NAMESPACE, FACTS_NAMESPACE, MemoryItem
+from .embeddings import (
+    CallableEmbedder,
+    Embedder,
+    EmbedFn,
+    HashingEmbedder,
+    Model2VecEmbedder,
+)
+from .store import MemoryStore, QueryLike
+from .version import __version__
+
+__all__ = [
+    "MemoryStore",
+    "MemoryItem",
+    "QueryLike",
+    "Embedder",
+    "Model2VecEmbedder",
+    "HashingEmbedder",
+    "CallableEmbedder",
+    "EmbedFn",
+    "VectorIndex",
+    "BruteForceIndex",
+    "UsearchIndex",
+    "ANN_CROSSOVER",
+    "DEFAULT_NAMESPACE",
+    "FACTS_NAMESPACE",
+    "__version__",
+]