Streaming

Load documents, build the VectorStoreIndex

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor
from langchain import OpenAI
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", streaming=True))
service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor
)
# load documents
documents = SimpleDirectoryReader('../../data/paul_graham').load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 20729 tokens
> [build_index_from_nodes] Total embedding token usage: 20729 tokens
> [build_index_from_nodes] Total embedding token usage: 20729 tokens
> [build_index_from_nodes] Total embedding token usage: 20729 tokens

Query Index

# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=1
)
response_stream = query_engine.query(
    "What did the author do growing up?", 
)
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens
> [retrieve] Total embedding token usage: 8 tokens
> [retrieve] Total embedding token usage: 8 tokens
> [retrieve] Total embedding token usage: 8 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens
> [get_response] Total LLM token usage: 0 tokens
> [get_response] Total LLM token usage: 0 tokens
> [get_response] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens
> [get_response] Total embedding token usage: 0 tokens
response_stream.print_response_stream()
The author grew up writing short stories and programming on an IBM 1401. He also nagged his father to buy him a TRS-80 microcomputer, on which he wrote simple games, a program to predict how high his model rockets would fly, and a word processor. He also studied philosophy in college.