Chat memory buffer

ChatMemoryBuffer #

Bases: BaseMemory

Simple buffer for storing chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

class ChatMemoryBuffer(BaseMemory):
    """Simple buffer for storing chat history."""

    token_limit: int
    tokenizer_fn: Callable[[str], List] = Field(
        # NOTE: mypy does not handle the typing here well, hence the cast
        default_factory=get_tokenizer,
        exclude=True,
    )
    chat_store: BaseChatStore = Field(default_factory=SimpleChatStore)
    chat_store_key: str = Field(default=DEFAULT_CHAT_STORE_KEY)

    @classmethod
    def class_name(cls) -> str:
        """Get class name."""
        return "ChatMemoryBuffer"

    @root_validator(pre=True)
    def validate_memory(cls, values: dict) -> dict:
        # Validate token limit
        token_limit = values.get("token_limit", -1)
        if token_limit < 1:
            raise ValueError("Token limit must be set and greater than 0.")

        # Validate tokenizer -- this avoids errors when loading from json/dict
        tokenizer_fn = values.get("tokenizer_fn", None)
        if tokenizer_fn is None:
            values["tokenizer_fn"] = get_tokenizer()

        return values

    @classmethod
    def from_defaults(
        cls,
        chat_history: Optional[List[ChatMessage]] = None,
        llm: Optional[LLM] = None,
        chat_store: Optional[BaseChatStore] = None,
        chat_store_key: str = DEFAULT_CHAT_STORE_KEY,
        token_limit: Optional[int] = None,
        tokenizer_fn: Optional[Callable[[str], List]] = None,
    ) -> "ChatMemoryBuffer":
        """Create a chat memory buffer from an LLM."""
        if llm is not None:
            context_window = llm.metadata.context_window
            token_limit = token_limit or int(context_window * DEFAULT_TOKEN_LIMIT_RATIO)
        elif token_limit is None:
            token_limit = DEFAULT_TOKEN_LIMIT

        if chat_history is not None:
            chat_store = chat_store or SimpleChatStore()
            chat_store.set_messages(chat_store_key, chat_history)

        return cls(
            token_limit=token_limit,
            tokenizer_fn=tokenizer_fn or get_tokenizer(),
            chat_store=chat_store or SimpleChatStore(),
            chat_store_key=chat_store_key,
        )

    def to_string(self) -> str:
        """Convert memory to string."""
        return self.json()

    @classmethod
    def from_string(cls, json_str: str) -> "ChatMemoryBuffer":
        """Create a chat memory buffer from a string."""
        dict_obj = json.loads(json_str)
        return cls.from_dict(dict_obj)

    def to_dict(self, **kwargs: Any) -> dict:
        """Convert memory to dict."""
        return self.dict()

    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> "ChatMemoryBuffer":
        from llama_index.core.storage.chat_store.loading import load_chat_store

        # NOTE: this handles backwards compatibility with the old chat history
        if "chat_history" in data:
            chat_history = data.pop("chat_history")
            chat_store = SimpleChatStore(store={DEFAULT_CHAT_STORE_KEY: chat_history})
            data["chat_store"] = chat_store
        elif "chat_store" in data:
            chat_store = data.pop("chat_store")
            chat_store = load_chat_store(chat_store)
            data["chat_store"] = chat_store

        return cls(**data)

    def get(self, initial_token_count: int = 0, **kwargs: Any) -> List[ChatMessage]:
        """Get chat history."""
        chat_history = self.get_all()

        if initial_token_count > self.token_limit:
            raise ValueError("Initial token count exceeds token limit")

        message_count = len(chat_history)

        cur_messages = chat_history[-message_count:]
        token_count = self._token_count_for_messages(cur_messages) + initial_token_count

        while token_count > self.token_limit and message_count > 1:
            message_count -= 1
            if chat_history[-message_count].role == MessageRole.TOOL:
                # all tool messages should be preceded by an assistant message
                # if we remove a tool message, we need to remove the assistant message too
                message_count -= 1

            if chat_history[-message_count].role == MessageRole.ASSISTANT:
                # we cannot have an assistant message at the start of the chat history
                # if after removal of the first, we have an assistant message,
                # we need to remove the assistant message too
                message_count -= 1

            cur_messages = chat_history[-message_count:]
            token_count = (
                self._token_count_for_messages(cur_messages) + initial_token_count
            )

        # catch one message longer than token limit
        if token_count > self.token_limit or message_count <= 0:
            return []

        return chat_history[-message_count:]

    def get_all(self) -> List[ChatMessage]:
        """Get all chat history."""
        return self.chat_store.get_messages(self.chat_store_key)

    def put(self, message: ChatMessage) -> None:
        """Put chat history."""
        # ensure everything is serialized
        self.chat_store.add_message(self.chat_store_key, message)

    def set(self, messages: List[ChatMessage]) -> None:
        """Set chat history."""
        self.chat_store.set_messages(self.chat_store_key, messages)

    def reset(self) -> None:
        """Reset chat history."""
        self.chat_store.delete_messages(self.chat_store_key)

    def _token_count_for_messages(self, messages: List[ChatMessage]) -> int:
        if len(messages) <= 0:
            return 0

        msg_str = " ".join(str(m.content) for m in messages)
        return len(self.tokenizer_fn(msg_str))

class_name `classmethod` #

class_name() -> str

Get class name.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

@classmethod
def class_name(cls) -> str:
    """Get class name."""
    return "ChatMemoryBuffer"

from_defaults `classmethod` #

from_defaults(chat_history: Optional[List[ChatMessage]] = None, llm: Optional[LLM] = None, chat_store: Optional[BaseChatStore] = None, chat_store_key: str = DEFAULT_CHAT_STORE_KEY, token_limit: Optional[int] = None, tokenizer_fn: Optional[Callable[[str], List]] = None) -> ChatMemoryBuffer

Create a chat memory buffer from an LLM.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

@classmethod
def from_defaults(
    cls,
    chat_history: Optional[List[ChatMessage]] = None,
    llm: Optional[LLM] = None,
    chat_store: Optional[BaseChatStore] = None,
    chat_store_key: str = DEFAULT_CHAT_STORE_KEY,
    token_limit: Optional[int] = None,
    tokenizer_fn: Optional[Callable[[str], List]] = None,
) -> "ChatMemoryBuffer":
    """Create a chat memory buffer from an LLM."""
    if llm is not None:
        context_window = llm.metadata.context_window
        token_limit = token_limit or int(context_window * DEFAULT_TOKEN_LIMIT_RATIO)
    elif token_limit is None:
        token_limit = DEFAULT_TOKEN_LIMIT

    if chat_history is not None:
        chat_store = chat_store or SimpleChatStore()
        chat_store.set_messages(chat_store_key, chat_history)

    return cls(
        token_limit=token_limit,
        tokenizer_fn=tokenizer_fn or get_tokenizer(),
        chat_store=chat_store or SimpleChatStore(),
        chat_store_key=chat_store_key,
    )

to_string #

to_string() -> str

Convert memory to string.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def to_string(self) -> str:
    """Convert memory to string."""
    return self.json()

from_string `classmethod` #

from_string(json_str: str) -> ChatMemoryBuffer

Create a chat memory buffer from a string.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

@classmethod
def from_string(cls, json_str: str) -> "ChatMemoryBuffer":
    """Create a chat memory buffer from a string."""
    dict_obj = json.loads(json_str)
    return cls.from_dict(dict_obj)

to_dict #

to_dict(**kwargs: Any) -> dict

Convert memory to dict.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def to_dict(self, **kwargs: Any) -> dict:
    """Convert memory to dict."""
    return self.dict()

get #

get(initial_token_count: int = 0, **kwargs: Any) -> List[ChatMessage]

Get chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def get(self, initial_token_count: int = 0, **kwargs: Any) -> List[ChatMessage]:
    """Get chat history."""
    chat_history = self.get_all()

    if initial_token_count > self.token_limit:
        raise ValueError("Initial token count exceeds token limit")

    message_count = len(chat_history)

    cur_messages = chat_history[-message_count:]
    token_count = self._token_count_for_messages(cur_messages) + initial_token_count

    while token_count > self.token_limit and message_count > 1:
        message_count -= 1
        if chat_history[-message_count].role == MessageRole.TOOL:
            # all tool messages should be preceded by an assistant message
            # if we remove a tool message, we need to remove the assistant message too
            message_count -= 1

        if chat_history[-message_count].role == MessageRole.ASSISTANT:
            # we cannot have an assistant message at the start of the chat history
            # if after removal of the first, we have an assistant message,
            # we need to remove the assistant message too
            message_count -= 1

        cur_messages = chat_history[-message_count:]
        token_count = (
            self._token_count_for_messages(cur_messages) + initial_token_count
        )

    # catch one message longer than token limit
    if token_count > self.token_limit or message_count <= 0:
        return []

    return chat_history[-message_count:]

get_all #

get_all() -> List[ChatMessage]

Get all chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def get_all(self) -> List[ChatMessage]:
    """Get all chat history."""
    return self.chat_store.get_messages(self.chat_store_key)

put #

put(message: ChatMessage) -> None

Put chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def put(self, message: ChatMessage) -> None:
    """Put chat history."""
    # ensure everything is serialized
    self.chat_store.add_message(self.chat_store_key, message)

set #

set(messages: List[ChatMessage]) -> None

Set chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def set(self, messages: List[ChatMessage]) -> None:
    """Set chat history."""
    self.chat_store.set_messages(self.chat_store_key, messages)

reset #

reset() -> None

Reset chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py

def reset(self) -> None:
    """Reset chat history."""
    self.chat_store.delete_messages(self.chat_store_key)

Chat memory buffer