Index

ChatResponseMode #

Bases: str, Enum

Flag toggling waiting/streaming in Agent._chat.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

class ChatResponseMode(str, Enum):
    """Flag toggling waiting/streaming in `Agent._chat`."""

    WAIT = "wait"
    STREAM = "stream"

AgentChatResponse `dataclass` #

Agent chat response.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@dataclass
class AgentChatResponse:
    """Agent chat response."""

    response: str = ""
    sources: List[ToolOutput] = field(default_factory=list)
    source_nodes: List[NodeWithScore] = field(default_factory=list)
    is_dummy_stream: bool = False

    def __post_init__(self) -> None:
        if self.sources and not self.source_nodes:
            for tool_output in self.sources:
                if isinstance(tool_output.raw_output, (Response, StreamingResponse)):
                    self.source_nodes.extend(tool_output.raw_output.source_nodes)

    def __str__(self) -> str:
        return self.response

    @property
    def response_gen(self) -> Generator[str, None, None]:
        """Used for fake streaming, i.e. with tool outputs."""
        if not self.is_dummy_stream:
            raise ValueError(
                "response_gen is only available for streaming responses. "
                "Set is_dummy_stream=True if you still want a generator."
            )

        for token in self.response.split(" "):
            yield token + " "
            time.sleep(0.1)

    async def async_response_gen(self) -> AsyncGenerator[str, None]:
        """Used for fake streaming, i.e. with tool outputs."""
        if not self.is_dummy_stream:
            raise ValueError(
                "response_gen is only available for streaming responses. "
                "Set is_dummy_stream=True if you still want a generator."
            )

        for token in self.response.split(" "):
            yield token + " "
            await asyncio.sleep(0.1)

response_gen `property` #

response_gen: Generator[str, None, None]

Used for fake streaming, i.e. with tool outputs.

async_response_gen `async` #

async_response_gen() -> AsyncGenerator[str, None]

Used for fake streaming, i.e. with tool outputs.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

async def async_response_gen(self) -> AsyncGenerator[str, None]:
    """Used for fake streaming, i.e. with tool outputs."""
    if not self.is_dummy_stream:
        raise ValueError(
            "response_gen is only available for streaming responses. "
            "Set is_dummy_stream=True if you still want a generator."
        )

    for token in self.response.split(" "):
        yield token + " "
        await asyncio.sleep(0.1)

StreamingAgentChatResponse `dataclass` #

Streaming chat response to user and writing to chat history.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@dataclass
class StreamingAgentChatResponse:
    """Streaming chat response to user and writing to chat history."""

    response: str = ""
    sources: List[ToolOutput] = field(default_factory=list)
    chat_stream: Optional[ChatResponseGen] = None
    achat_stream: Optional[ChatResponseAsyncGen] = None
    source_nodes: List[NodeWithScore] = field(default_factory=list)
    _unformatted_response: str = ""
    _queue: queue.Queue = field(default_factory=queue.Queue)
    _aqueue: Optional[asyncio.Queue] = None
    # flag when chat message is a function call
    _is_function: Optional[bool] = None
    # flag when processing done
    _is_done = False
    # signal when a new item is added to the queue
    _new_item_event: Optional[asyncio.Event] = None
    # NOTE: async code uses two events rather than one since it yields
    # control when waiting for queue item
    # signal when the OpenAI functions stop executing
    _is_function_false_event: Optional[asyncio.Event] = None
    # signal when an OpenAI function is being executed
    _is_function_not_none_thread_event: Event = field(default_factory=Event)

    def __post_init__(self) -> None:
        if self.sources and not self.source_nodes:
            for tool_output in self.sources:
                if isinstance(tool_output.raw_output, (Response, StreamingResponse)):
                    self.source_nodes.extend(tool_output.raw_output.source_nodes)

    def __str__(self) -> str:
        if self._is_done and not self._queue.empty() and not self._is_function:
            while self._queue.queue:
                delta = self._queue.queue.popleft()
                self._unformatted_response += delta
            self.response = self._unformatted_response.strip()
        return self.response

    def _ensure_async_setup(self) -> None:
        if self._aqueue is None:
            self._aqueue = asyncio.Queue()
        if self._new_item_event is None:
            self._new_item_event = asyncio.Event()
        if self._is_function_false_event is None:
            self._is_function_false_event = asyncio.Event()

    def put_in_queue(self, delta: Optional[str]) -> None:
        self._queue.put_nowait(delta)
        self._is_function_not_none_thread_event.set()

    def aput_in_queue(self, delta: Optional[str]) -> None:
        self._aqueue.put_nowait(delta)
        self._new_item_event.set()

    @dispatcher.span
    def write_response_to_history(
        self,
        memory: BaseMemory,
        on_stream_end_fn: Optional[callable] = None,
        raise_error: bool = False,
    ) -> None:
        if self.chat_stream is None:
            raise ValueError(
                "chat_stream is None. Cannot write to history without chat_stream."
            )
        dispatch_event = dispatcher.get_dispatch_event()

        # try/except to prevent hanging on error
        dispatch_event(StreamChatStartEvent())
        try:
            final_text = ""
            for chat in self.chat_stream:
                self._is_function = is_function(chat.message)
                if chat.delta:
                    dispatch_event(
                        StreamChatDeltaReceivedEvent(
                            delta=chat.delta,
                        )
                    )
                    self.put_in_queue(chat.delta)
                final_text += chat.delta or ""
            if self._is_function is not None:  # if loop has gone through iteration
                # NOTE: this is to handle the special case where we consume some of the
                # chat stream, but not all of it (e.g. in react agent)
                chat.message.content = final_text.strip()  # final message
                memory.put(chat.message)
        except Exception as e:
            dispatch_event(StreamChatErrorEvent(exception=e))
            if not raise_error:
                logger.warning(
                    f"Encountered exception writing response to history: {e}"
                )
            else:
                raise
        dispatch_event(StreamChatEndEvent())

        self._is_done = True

        # This act as is_done events for any consumers waiting
        self._is_function_not_none_thread_event.set()
        if on_stream_end_fn is not None and not self._is_function:
            on_stream_end_fn()

    @dispatcher.span
    async def awrite_response_to_history(
        self,
        memory: BaseMemory,
        on_stream_end_fn: Optional[callable] = None,
    ) -> None:
        self._ensure_async_setup()
        dispatch_event = dispatcher.get_dispatch_event()

        if self.achat_stream is None:
            raise ValueError(
                "achat_stream is None. Cannot asynchronously write to "
                "history without achat_stream."
            )

        # try/except to prevent hanging on error
        dispatch_event(StreamChatStartEvent())
        try:
            final_text = ""
            async for chat in self.achat_stream:
                self._is_function = is_function(chat.message)
                if chat.delta:
                    dispatch_event(
                        StreamChatDeltaReceivedEvent(
                            delta=chat.delta,
                        )
                    )
                    self.aput_in_queue(chat.delta)
                final_text += chat.delta or ""
                self._new_item_event.set()
                if self._is_function is False:
                    self._is_function_false_event.set()
            if self._is_function is not None:  # if loop has gone through iteration
                # NOTE: this is to handle the special case where we consume some of the
                # chat stream, but not all of it (e.g. in react agent)
                chat.message.content = final_text.strip()  # final message
                memory.put(chat.message)
        except Exception as e:
            dispatch_event(StreamChatErrorEvent(exception=e))
            logger.warning(f"Encountered exception writing response to history: {e}")
        dispatch_event(StreamChatEndEvent())
        self._is_done = True

        # These act as is_done events for any consumers waiting
        self._is_function_false_event.set()
        self._new_item_event.set()
        if on_stream_end_fn is not None and not self._is_function:
            on_stream_end_fn()

    @property
    def response_gen(self) -> Generator[str, None, None]:
        while not self._is_done or not self._queue.empty():
            try:
                delta = self._queue.get(block=False)
                self._unformatted_response += delta
                yield delta
            except queue.Empty:
                # Queue is empty, but we're not done yet. Sleep for 0 secs to release the GIL and allow other threads to run.
                time.sleep(0)
        self.response = self._unformatted_response.strip()

    async def async_response_gen(self) -> AsyncGenerator[str, None]:
        self._ensure_async_setup()
        while True:
            if not self._aqueue.empty() or not self._is_done:
                try:
                    delta = await asyncio.wait_for(self._aqueue.get(), timeout=0.1)
                except asyncio.TimeoutError:
                    if self._is_done:
                        break
                    continue
                if delta is not None:
                    self._unformatted_response += delta
                    yield delta
            else:
                break
        self.response = self._unformatted_response.strip()

    def print_response_stream(self) -> None:
        for token in self.response_gen:
            print(token, end="", flush=True)

    async def aprint_response_stream(self) -> None:
        async for token in self.async_response_gen():
            print(token, end="", flush=True)

BaseChatEngine #

Bases: ABC

Base Chat Engine.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

class BaseChatEngine(ABC):
    """Base Chat Engine."""

    @abstractmethod
    def reset(self) -> None:
        """Reset conversation state."""

    @abstractmethod
    def chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> AGENT_CHAT_RESPONSE_TYPE:
        """Main chat interface."""

    @abstractmethod
    def stream_chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> StreamingAgentChatResponse:
        """Stream chat interface."""

    @abstractmethod
    async def achat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> AGENT_CHAT_RESPONSE_TYPE:
        """Async version of main chat interface."""

    @abstractmethod
    async def astream_chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> StreamingAgentChatResponse:
        """Async version of main chat interface."""

    def chat_repl(self) -> None:
        """Enter interactive chat REPL."""
        print("===== Entering Chat REPL =====")
        print('Type "exit" to exit.\n')
        self.reset()
        message = input("Human: ")
        while message != "exit":
            response = self.chat(message)
            print(f"Assistant: {response}\n")
            message = input("Human: ")

    def streaming_chat_repl(self) -> None:
        """Enter interactive chat REPL with streaming responses."""
        print("===== Entering Chat REPL =====")
        print('Type "exit" to exit.\n')
        self.reset()
        message = input("Human: ")
        while message != "exit":
            response = self.stream_chat(message)
            print("Assistant: ", end="", flush=True)
            response.print_response_stream()
            print("\n")
            message = input("Human: ")

    @property
    @abstractmethod
    def chat_history(self) -> List[ChatMessage]:
        pass

reset `abstractmethod` #

reset() -> None

Reset conversation state.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@abstractmethod
def reset(self) -> None:
    """Reset conversation state."""

chat `abstractmethod` #

chat(message: str, chat_history: Optional[List[ChatMessage]] = None) -> AGENT_CHAT_RESPONSE_TYPE

Main chat interface.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@abstractmethod
def chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> AGENT_CHAT_RESPONSE_TYPE:
    """Main chat interface."""

stream_chat `abstractmethod` #

stream_chat(message: str, chat_history: Optional[List[ChatMessage]] = None) -> StreamingAgentChatResponse

Stream chat interface.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@abstractmethod
def stream_chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> StreamingAgentChatResponse:
    """Stream chat interface."""

achat `abstractmethod` `async` #

achat(message: str, chat_history: Optional[List[ChatMessage]] = None) -> AGENT_CHAT_RESPONSE_TYPE

Async version of main chat interface.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@abstractmethod
async def achat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> AGENT_CHAT_RESPONSE_TYPE:
    """Async version of main chat interface."""

astream_chat `abstractmethod` `async` #

astream_chat(message: str, chat_history: Optional[List[ChatMessage]] = None) -> StreamingAgentChatResponse

Async version of main chat interface.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

@abstractmethod
async def astream_chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> StreamingAgentChatResponse:
    """Async version of main chat interface."""

chat_repl #

chat_repl() -> None

Enter interactive chat REPL.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

def chat_repl(self) -> None:
    """Enter interactive chat REPL."""
    print("===== Entering Chat REPL =====")
    print('Type "exit" to exit.\n')
    self.reset()
    message = input("Human: ")
    while message != "exit":
        response = self.chat(message)
        print(f"Assistant: {response}\n")
        message = input("Human: ")

streaming_chat_repl #

streaming_chat_repl() -> None

Enter interactive chat REPL with streaming responses.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

def streaming_chat_repl(self) -> None:
    """Enter interactive chat REPL with streaming responses."""
    print("===== Entering Chat REPL =====")
    print('Type "exit" to exit.\n')
    self.reset()
    message = input("Human: ")
    while message != "exit":
        response = self.stream_chat(message)
        print("Assistant: ", end="", flush=True)
        response.print_response_stream()
        print("\n")
        message = input("Human: ")

ChatMode #

Bases: str, Enum

Chat Engine Modes.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

class ChatMode(str, Enum):
    """Chat Engine Modes."""

    SIMPLE = "simple"
    """Corresponds to `SimpleChatEngine`.

    Chat with LLM, without making use of a knowledge base.
    """

    CONDENSE_QUESTION = "condense_question"
    """Corresponds to `CondenseQuestionChatEngine`.

    First generate a standalone question from conversation context and last message,
    then query the query engine for a response.
    """

    CONTEXT = "context"
    """Corresponds to `ContextChatEngine`.

    First retrieve text from the index using the user's message, then use the context
    in the system prompt to generate a response.
    """

    CONDENSE_PLUS_CONTEXT = "condense_plus_context"
    """Corresponds to `CondensePlusContextChatEngine`.

    First condense a conversation and latest user message to a standalone question.
    Then build a context for the standalone question from a retriever,
    Then pass the context along with prompt and user message to LLM to generate a response.
    """

    REACT = "react"
    """Corresponds to `ReActAgent`.

    Use a ReAct agent loop with query engine tools.
    """

    OPENAI = "openai"
    """Corresponds to `OpenAIAgent`.

    Use an OpenAI function calling agent loop.

    NOTE: only works with OpenAI models that support function calling API.
    """

    BEST = "best"
    """Select the best chat engine based on the current LLM.

    Corresponds to `OpenAIAgent` if using an OpenAI model that supports
    function calling API, otherwise, corresponds to `ReActAgent`.
    """

SIMPLE `class-attribute` `instance-attribute` #

SIMPLE = 'simple'

Corresponds to SimpleChatEngine.

Chat with LLM, without making use of a knowledge base.

CONDENSE_QUESTION `class-attribute` `instance-attribute` #

CONDENSE_QUESTION = 'condense_question'

Corresponds to CondenseQuestionChatEngine.

First generate a standalone question from conversation context and last message, then query the query engine for a response.

CONTEXT `class-attribute` `instance-attribute` #

CONTEXT = 'context'

Corresponds to ContextChatEngine.

First retrieve text from the index using the user's message, then use the context in the system prompt to generate a response.

CONDENSE_PLUS_CONTEXT `class-attribute` `instance-attribute` #

CONDENSE_PLUS_CONTEXT = 'condense_plus_context'

Corresponds to CondensePlusContextChatEngine.

First condense a conversation and latest user message to a standalone question. Then build a context for the standalone question from a retriever, Then pass the context along with prompt and user message to LLM to generate a response.

REACT `class-attribute` `instance-attribute` #

REACT = 'react'

Corresponds to ReActAgent.

Use a ReAct agent loop with query engine tools.

OPENAI `class-attribute` `instance-attribute` #

OPENAI = 'openai'

Corresponds to OpenAIAgent.

Use an OpenAI function calling agent loop.

NOTE: only works with OpenAI models that support function calling API.

BEST `class-attribute` `instance-attribute` #

BEST = 'best'

Select the best chat engine based on the current LLM.

Corresponds to OpenAIAgent if using an OpenAI model that supports function calling API, otherwise, corresponds to ReActAgent.

is_function #

is_function(message: ChatMessage) -> bool

Utility for ChatMessage responses from OpenAI models.

Source code in llama-index-core/llama_index/core/chat_engine/types.py

def is_function(message: ChatMessage) -> bool:
    """Utility for ChatMessage responses from OpenAI models."""
    return "tool_calls" in message.additional_kwargs

Index

ChatResponseMode #

AgentChatResponse dataclass #

response_gen property #

async_response_gen async #

StreamingAgentChatResponse dataclass #

BaseChatEngine #

reset abstractmethod #

chat abstractmethod #

stream_chat abstractmethod #

achat abstractmethod async #

astream_chat abstractmethod async #

chat_repl #

streaming_chat_repl #

ChatMode #

SIMPLE class-attribute instance-attribute #

CONDENSE_QUESTION class-attribute instance-attribute #

CONTEXT class-attribute instance-attribute #

CONDENSE_PLUS_CONTEXT class-attribute instance-attribute #

REACT class-attribute instance-attribute #

OPENAI class-attribute instance-attribute #

BEST class-attribute instance-attribute #

is_function #

AgentChatResponse `dataclass` #

response_gen `property` #

async_response_gen `async` #

StreamingAgentChatResponse `dataclass` #

reset `abstractmethod` #

chat `abstractmethod` #

stream_chat `abstractmethod` #

achat `abstractmethod` `async` #

astream_chat `abstractmethod` `async` #

SIMPLE `class-attribute` `instance-attribute` #

CONDENSE_QUESTION `class-attribute` `instance-attribute` #

CONTEXT `class-attribute` `instance-attribute` #

CONDENSE_PLUS_CONTEXT `class-attribute` `instance-attribute` #

REACT `class-attribute` `instance-attribute` #

OPENAI `class-attribute` `instance-attribute` #

BEST `class-attribute` `instance-attribute` #