Azure Cognitive Search
Basic Example
In this basic example, we take a Paul Graham essay, split it into chunks, embed it using an OpenAI embedding model, load it into an Azure Cognitive Search index, and then query it.
import logging
import sys
from IPython.display import Markdown, display
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# logger = logging.getLogger(__name__)
#!{sys.executable} -m pip install llama-index
#!{sys.executable} -m pip install azure-search-documents==11.4.0b8
#!{sys.executable} -m pip install azure-identity
# set up OpenAI
import os
import getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]
# set up Azure Cognitive Search
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
search_service_name = getpass.getpass("Azure Cognitive Search Service Name")
key = getpass.getpass("Azure Cognitive Search Key")
cognitive_search_credential = AzureKeyCredential(key)
service_endpoint = f"https://{search_service_name}.search.windows.net"
# Index name to use
index_name = "quickstart"
# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
endpoint=service_endpoint,
credential=cognitive_search_credential,
)
# Use search client to demonstration using existing index
search_client = SearchClient(
endpoint=service_endpoint,
index_name=index_name,
credential=cognitive_search_credential,
)
Create Index (if it does not exist)
Demonstrates creating a vector index named quickstart01 if one doesn’t exist. The index has the following fields:
id (Edm.String)
content (Edm.String)
embedding (Edm.SingleCollection)
li_jsonMetadata (Edm.String)
li_doc_id (Edm.String)
author (Edm.String)
theme (Edm.String)
director (Edm.String)
from azure.search.documents import SearchClient
from llama_index.vector_stores import CognitiveSearchVectorStore
from llama_index.vector_stores.cogsearch import (
IndexManagement,
MetadataIndexFieldType,
CognitiveSearchVectorStore,
)
# Example of a complex mapping, metadata field 'theme' is mapped to a differently name index field 'topic' with its type explicitly set
metadata_fields = {
"author": "author",
"theme": ("topic", MetadataIndexFieldType.STRING),
"director": "director",
}
# A simplified metadata specification is available if all metadata and index fields are similarly named
# metadata_fields = {"author", "theme", "director"}
vector_store = CognitiveSearchVectorStore(
search_or_index_client=index_client,
index_name=index_name,
filterable_metadata_field_keys=metadata_fields,
index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
id_field_key="id",
chunk_field_key="content",
embedding_field_key="embedding",
metadata_string_field_key="li_jsonMetadata",
doc_id_field_key="li_doc_id",
)
# define embedding function
from llama_index.embeddings import OpenAIEmbedding
from llama_index import (
SimpleDirectoryReader,
StorageContext,
ServiceContext,
VectorStoreIndex,
)
embed_model = OpenAIEmbedding()
# load documents
documents = SimpleDirectoryReader(
"../../../examples/paul_graham_essay/data"
).load_data()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_model)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context, service_context=service_context
)
# Query Data
query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")
display(Markdown(f"<b>{response}</b>"))
The author wrote short stories and programmed on an IBM 1401 computer during their time in school. They later got their own microcomputer, a TRS-80, and started programming games and a word processor.
response = query_engine.query(
"What did the author learn?",
)
display(Markdown(f"<b>{response}</b>"))
The author learned several things during their time at Interleaf. They learned that it’s better for technology companies to be run by product people than sales people, that code edited by too many people leads to bugs, that cheap office space is not worth it if it’s depressing, that planned meetings are inferior to corridor conversations, that big bureaucratic customers can be a dangerous source of money, and that there’s not much overlap between conventional office hours and the optimal time for hacking. However, the most important thing the author learned is that the low end eats the high end, meaning that it’s better to be the “entry level” option because if you’re not, someone else will be and will surpass you.
Use Existing Index
from llama_index.vector_stores import CognitiveSearchVectorStore
from llama_index.vector_stores.cogsearch import (
IndexManagement,
MetadataIndexFieldType,
CognitiveSearchVectorStore,
)
index_name = "quickstart"
metadata_fields = {
"author": "author",
"theme": ("topic", MetadataIndexFieldType.STRING),
"director": "director",
}
vector_store = CognitiveSearchVectorStore(
search_or_index_client=search_client,
filterable_metadata_field_keys=metadata_fields,
index_management=IndexManagement.NO_VALIDATION,
id_field_key="id",
chunk_field_key="content",
embedding_field_key="embedding",
metadata_string_field_key="li_jsonMetadata",
doc_id_field_key="li_doc_id",
)
# define embedding function
from llama_index.embeddings import OpenAIEmbedding
from llama_index import (
SimpleDirectoryReader,
StorageContext,
ServiceContext,
VectorStoreIndex,
)
embed_model = OpenAIEmbedding()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_model)
index = VectorStoreIndex.from_documents(
[], storage_context=storage_context, service_context=service_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What was a hard moment for the author?")
display(Markdown(f"<b>{response}</b>"))
The author experienced a difficult moment when their mother had a stroke and was put in a nursing home. The stroke destroyed her balance, and the author and their sister were determined to help her get out of the nursing home and back to her house.
response = query_engine.query("Who is the author?")
display(Markdown(f"<b>{response}</b>"))
The author of the given context is Paul Graham.
import time
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What happened at interleaf?")
start_time = time.time()
token_count = 0
for token in response.response_gen:
print(token, end="")
token_count += 1
time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed
print(f"\n\nStreamed output at {tokens_per_second} tokens/s")
At Interleaf, there was a group called Release Engineering that seemed to be as big as the group that actually wrote the software. The software at Interleaf had to be updated on the server, and there was a lot of emphasis on high production values to make the online store builders look legitimate.
Streamed output at 20.953424485215063 tokens/s
Adding a document to existing index
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))
The color of the sky can vary depending on various factors such as time of day, weather conditions, and location. It can range from shades of blue during the day to hues of orange, pink, and purple during sunrise or sunset.
from llama_index import Document
index.insert_nodes([Document(text="The sky is indigo today")])
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))
The colour of the sky is indigo.
Filtering
from llama_index.schema import TextNode
nodes = [
TextNode(
text="The Shawshank Redemption",
metadata={
"author": "Stephen King",
"theme": "Friendship",
},
),
TextNode(
text="The Godfather",
metadata={
"director": "Francis Ford Coppola",
"theme": "Mafia",
},
),
TextNode(
text="Inception",
metadata={
"director": "Christopher Nolan",
},
),
]
index.insert_nodes(nodes)
from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters
filters = MetadataFilters(filters=[ExactMatchFilter(key="theme", value="Mafia")])
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")
[NodeWithScore(node=TextNode(id_='5a97da0c-8f04-4c63-b90b-8c474d8c273d', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='81cf4b9e847ba42e83fc401e31af8e17d629f0d5cf9c0c320ec7ac69dd0257e1', text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.81316805)]