Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions examples/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Semantic memory for an agent: pin fixed facts, store memories, retrieve fast.

Run with the recommended extras for the real static embedder + ANN index:

uv run --with model2vec --with usearch python examples/memory.py

Without those extras it falls back to the dependency-free HashingEmbedder (lexical
only) and the exact brute-force index — still fully functional, just less semantic.
"""

from livekit.memory import MemoryStore

try:
from livekit.memory import Model2VecEmbedder

embedder = Model2VecEmbedder() # static, ~0.03ms/query, no transformer forward pass
except ImportError:
embedder = None # -> HashingEmbedder default
print("(model2vec not installed; using the dependency-free HashingEmbedder)\n")


def main() -> None:
# One store per user/session. `expected_size` lets `auto` pick the HNSW backend.
store = MemoryStore(embedder=embedder, backend="auto", expected_size=1_000_000)

# "Fixed" facts about the user — pinned, always available to context().
store.upsert("name", "The user's name is Ada Lovelace.")
store.upsert("units", "The user prefers metric units and a 24-hour clock.")

# Free-form semantic memories accumulated over the conversation.
store.add("We talked about the analytical engine and Bernoulli numbers.")
store.add("The user is planning a trip to Turin next spring.")
store.add("The user dislikes phone calls and prefers async messages.")

# The latency-critical call an agent makes each turn: one prompt-ready string.
print("=== context() for 'what should I call them and any travel plans?' ===")
print(store.context("what should I call them and any travel plans?", limit=3))

print("\n=== search() ranked hits for 'communication preferences' ===")
for hit in store.search("how does the user like to communicate?", limit=3):
print(f" {hit.score:+.3f} {hit.text}")


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions livekit-memory/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# LiveKit Memory

In-process, in-memory **semantic memory** for LiveKit agents — sub-10ms end-to-end
retrieval (embed the query *and* search) of a user's fixed context, fully self-hosted.

```shell
pip install livekit-memory[recommended] # static embedder + ANN index
```

## Why it's fast

The hard constraint for a voice agent loop is the *end-to-end* budget: you hand it text,
it must embed and search in under 10ms. A transformer embedding on CPU alone is ~10ms
(p99 ~50ms) and blows that. The route here:

- **Static embeddings (Model2Vec)** — token-lookup + mean-pool, no transformer forward
pass. ~0.03ms per short query on CPU.
- **In-memory index** — exact brute-force cosine (sub-ms to ~100k vectors), automatically
upgrading to a [usearch](https://github.com/unum-cloud/usearch) HNSW graph for large
per-user corpora (~0.27ms search at 1M vectors).

Measured on an Apple M4 Pro: **0.17ms median / 0.31ms p99 end-to-end at 1M vectors** —
~30× under budget.

## Usage

```python
from livekit.memory import MemoryStore, Model2VecEmbedder

# one store per user / session
store = MemoryStore(embedder=Model2VecEmbedder(), backend="auto", expected_size=1_000_000)

# "fixed" facts about the user — pinned, always available
store.upsert("name", "The user's name is Ada Lovelace.")
store.upsert("units", "Prefers metric units.")

# free-form semantic memories — ranked by relevance
store.add("Discussed the analytical engine and Bernoulli numbers.", metadata={"session": 42})

# the latency-critical call your agent makes each turn:
context = store.context("what should I call them, and what did we talk about?")
# -> a prompt-ready string with the pinned facts + top relevant memories

# or rank directly:
for hit in store.search("mathematics history", limit=5):
print(hit.score, hit.text)
```

### Bring your own embedder

`embedder` accepts an `Embedder`, any batched `list[str] -> vectors` callable (pass
`dims=`), or `None` for the dependency-free `HashingEmbedder` (deterministic, no model
download — good for tests/offline). For higher recall at the cost of latency, an ONNX
transformer embedder can be wrapped via `CallableEmbedder`.

### Persistence

`store.save(dir)` / `MemoryStore.load(dir, embedder=...)` snapshot items and both indices
to disk (the same embedder must be supplied on load).

## Backends

| Backend | When | Latency (384d) |
|---|---|---|
| `bruteforce` (default) | ≲100k vectors / user | exact, ~1.5ms @ 100k |
| `usearch` | ≳100k vectors / user | HNSW, ~0.27ms @ 1M |
| `auto` | picks per `expected_size` | — |

`usearch` is an optional dependency (`pip install livekit-memory[ann]`); without it,
`auto` stays on exact brute force.

See https://docs.livekit.io for more information.
60 changes: 60 additions & 0 deletions livekit-memory/livekit/memory/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2024 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""LiveKit Memory — in-process semantic memory for agents.

`pip install livekit-memory[recommended]`

Sub-10ms end-to-end (embed query + ANN search) retrieval of a user's fixed context,
fully self-hosted. The default route is a static Model2Vec embedder (no transformer
forward pass) plus an in-memory index (exact brute-force, upgrading to a usearch HNSW
graph for large per-user corpora).

from livekit.memory import MemoryStore, Model2VecEmbedder

store = MemoryStore(embedder=Model2VecEmbedder())
store.upsert("name", "The user's name is Ada.") # a fixed fact
store.add("They prefer metric units and dark mode.") # semantic memory
ctx = store.context("what should I call them?") # prompt-ready string
"""

from ._index import ANN_CROSSOVER, BruteForceIndex, UsearchIndex, VectorIndex
from ._types import DEFAULT_NAMESPACE, FACTS_NAMESPACE, MemoryItem
from .embeddings import (
CallableEmbedder,
Embedder,
EmbedFn,
HashingEmbedder,
Model2VecEmbedder,
)
from .store import MemoryStore, QueryLike
from .version import __version__

__all__ = [
"MemoryStore",
"MemoryItem",
"QueryLike",
"Embedder",
"Model2VecEmbedder",
"HashingEmbedder",
"CallableEmbedder",
"EmbedFn",
"VectorIndex",
"BruteForceIndex",
"UsearchIndex",
"ANN_CROSSOVER",
"DEFAULT_NAMESPACE",
"FACTS_NAMESPACE",
"__version__",
]
Loading