Document Store#

class llama_index.storage.docstore.BaseDocumentStore#
abstract async adelete_document(doc_id: str, raise_error: bool = True) None#

Delete a document from the store.

abstract async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

abstract async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

abstract async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

abstract delete_document(doc_id: str, raise_error: bool = True) None#

Delete a document from the store.

abstract delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

abstract get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

abstract get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

llama_index.storage.docstore.DocumentStore#

alias of SimpleDocumentStore

class llama_index.storage.docstore.DynamoDBDocumentStore(dynamodb_kvstore: DynamoDBKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

class llama_index.storage.docstore.FirestoreDocumentStore(firestore_kvstore: FirestoreKVStore, namespace: Optional[str] = None, batch_size: int = 1)#

Firestore Document (Node) store.

A Firestore store for Document and Node objects.

Parameters
  • firestore_kvstore (FirestoreKVStore) – Firestore key-value store

  • namespace (str) – namespace for the docstore

add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

classmethod from_database(project: str, database: str, namespace: Optional[str] = None) FirestoreDocumentStore#
Parameters
  • project (str) – The project which the client acts on behalf of.

  • database (str) – The database name that the client targets.

  • namespace (str) – namespace for the docstore.

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

class llama_index.storage.docstore.KVDocumentStore(kvstore: BaseKVStore, namespace: Optional[str] = None, batch_size: int = 1)#

Document (Node) store.

NOTE: at the moment, this store is primarily used to store Node objects. Each node will be assigned an ID.

The same docstore can be reused across index structures. This allows you to reuse the same storage for multiple index structures; otherwise, each index would create a docstore under the hood.

This will use the same docstore for multiple index structures.

Parameters
  • kvstore (BaseKVStore) – key-value store

  • namespace (str) – namespace for the docstore

add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

class llama_index.storage.docstore.MongoDocumentStore(mongo_kvstore: MongoDBKVStore, namespace: Optional[str] = None, batch_size: int = 1)#

Mongo Document (Node) store.

A MongoDB store for Document and Node objects.

Parameters
  • mongo_kvstore (MongoDBKVStore) – MongoDB key-value store

  • namespace (str) – namespace for the docstore

add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

classmethod from_host_and_port(host: str, port: int, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore#

Load a MongoDocumentStore from a MongoDB host and port.

classmethod from_uri(uri: str, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore#

Load a MongoDocumentStore from a MongoDB URI.

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

class llama_index.storage.docstore.RedisDocumentStore(redis_kvstore: RedisKVStore, namespace: Optional[str] = None, batch_size: int = 1)#

Redis Document (Node) store.

A Redis store for Document and Node objects.

Parameters
  • redis_kvstore (RedisKVStore) – Redis key-value store

  • namespace (str) – namespace for the docstore

add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

classmethod from_host_and_port(host: str, port: int, namespace: Optional[str] = None) RedisDocumentStore#

Load a RedisDocumentStore from a Redis host and port.

classmethod from_redis_client(redis_client: Any, namespace: Optional[str] = None) RedisDocumentStore#

Load a RedisDocumentStore from a Redis Client.

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

class llama_index.storage.docstore.SimpleDocumentStore(simple_kvstore: Optional[SimpleKVStore] = None, namespace: Optional[str] = None, batch_size: int = 1)#

Simple Document (Node) store.

An in-memory store for Document and Node objects.

Parameters
  • simple_kvstore (SimpleKVStore) – simple key-value store

  • namespace (str) – namespace for the docstore

add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

async adocument_exists(doc_id: str) bool#

Check if document exists.

async aget_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

async aget_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

async aget_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

async aref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

async aset_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

async aset_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.

async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None#

Add a document to the store.

Parameters
  • docs (List[BaseDocument]) – documents

  • allow_update (bool) – allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None#

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None#

Delete a ref_doc and all it’s associated nodes.

property docs: Dict[str, BaseNode]#

Get all documents.

Returns

documents

Return type

Dict[str, BaseDocument]

document_exists(doc_id: str) bool#

Check if document exists.

classmethod from_persist_dir(persist_dir: str = './storage', namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore#

Create a SimpleDocumentStore from a persist directory.

Parameters
  • persist_dir (str) – directory to persist the store

  • namespace (Optional[str]) – namespace for the docstore

  • fs (Optional[fsspec.AbstractFileSystem]) – filesystem to use

classmethod from_persist_path(persist_path: str, namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore#

Create a SimpleDocumentStore from a persist path.

Parameters
  • persist_path (str) – Path to persist the store

  • namespace (Optional[str]) – namespace for the docstore

  • fs (Optional[fsspec.AbstractFileSystem]) – filesystem to use

get_all_document_hashes() Dict[str, str]#

Get the stored hash for all documents.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]#

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode]#

Get a document from the store.

Parameters
  • doc_id (str) – document id

  • raise_error (bool) – raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]#

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) BaseNode#

Get node from docstore.

Parameters
  • node_id (str) – node id

  • raise_error (bool) – raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode]#

Get node dict from docstore given a mapping of index to node ids.

Parameters

node_id_dict (Dict[int, str]) – mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode]#

Get nodes from docstore.

Parameters
  • node_ids (List[str]) – node ids

  • raise_error (bool) – raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]#

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None#

Persist the store.

ref_doc_exists(ref_doc_id: str) bool#

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None#

Set the hash for a given doc_id.

set_document_hashes(doc_hashes: Dict[str, str]) None#

Set the hash for a given doc_id.