Skip to content

Docugami kg rag

DocugamiKgRagPack #

Bases: BaseLlamaPack

Docugami KG-RAG Pack.

A pack for performing evaluation with your own RAG pipeline.

Source code in llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class DocugamiKgRagPack(BaseLlamaPack):
    """Docugami KG-RAG Pack.

    A pack for performing evaluation with your own RAG pipeline.

    """

    def __init__(self) -> None:
        self.docugami_client = Docugami()

    def list_docsets(self):
        """
        List your Docugami docsets and their docset name and ids.
        """
        docsets_response = self.docugami_client.docsets.list()
        for idx, docset in enumerate(docsets_response.docsets, start=1):
            print(f"{idx}: {docset.name} (ID: {docset.id})")

    def index_docset(self, docset_id: str, overwrite: bool = False):
        """
        Build the index for the docset and create the agent for it.
        """
        docsets_response = self.docugami_client.docsets.list()
        docset = next(
            (docset for docset in docsets_response.docsets if docset.id == docset_id),
            None,
        )

        if not docset:
            raise Exception(
                f"Docset with id {docset_id} does not exist in your workspace"
            )

        index_docset(docset_id, docset.name, overwrite)

    def build_agent_for_docset(
        self, docset_id: str, use_reports: bool = DEFAULT_USE_REPORTS
    ):
        local_state = read_all_local_index_state()

        tools: List[BaseTool] = []
        for docset_id in local_state:
            docset_state = local_state[docset_id]
            direct_retrieval_tool = get_retrieval_tool_for_docset(
                docset_id, docset_state
            )
            if direct_retrieval_tool:
                # Direct retrieval tool for each indexed docset (direct KG-RAG against semantic XML)
                tools.append(direct_retrieval_tool)

            if use_reports:
                for report in docset_state.reports:
                    # Report retrieval tool for each published report (user-curated views on semantic XML)
                    report_retrieval_tool = get_retrieval_tool_for_report(report)
                    if report_retrieval_tool:
                        tools.append(report_retrieval_tool)

        self.agent = ReActAgent.from_tools(
            tools,
            llm=LARGE_CONTEXT_INSTRUCT_LLM,
            verbose=True,
            context=ASSISTANT_SYSTEM_MESSAGE,
        )

    def get_modules(self) -> Dict[str, Any]:
        """Get modules."""
        return {
            "agent": self.agent,
        }

    def run(self, *args: Any, **kwargs: Any) -> Any:
        """Run the pipeline."""
        return self.agent.query(*args, **kwargs)

list_docsets #

list_docsets()

List your Docugami docsets and their docset name and ids.

Source code in llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py
35
36
37
38
39
40
41
def list_docsets(self):
    """
    List your Docugami docsets and their docset name and ids.
    """
    docsets_response = self.docugami_client.docsets.list()
    for idx, docset in enumerate(docsets_response.docsets, start=1):
        print(f"{idx}: {docset.name} (ID: {docset.id})")

index_docset #

index_docset(docset_id: str, overwrite: bool = False)

Build the index for the docset and create the agent for it.

Source code in llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def index_docset(self, docset_id: str, overwrite: bool = False):
    """
    Build the index for the docset and create the agent for it.
    """
    docsets_response = self.docugami_client.docsets.list()
    docset = next(
        (docset for docset in docsets_response.docsets if docset.id == docset_id),
        None,
    )

    if not docset:
        raise Exception(
            f"Docset with id {docset_id} does not exist in your workspace"
        )

    index_docset(docset_id, docset.name, overwrite)

get_modules #

get_modules() -> Dict[str, Any]

Get modules.

Source code in llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py
89
90
91
92
93
def get_modules(self) -> Dict[str, Any]:
    """Get modules."""
    return {
        "agent": self.agent,
    }

run #

run(*args: Any, **kwargs: Any) -> Any

Run the pipeline.

Source code in llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py
95
96
97
def run(self, *args: Any, **kwargs: Any) -> Any:
    """Run the pipeline."""
    return self.agent.query(*args, **kwargs)