Skip to content

Feishu docs

FeishuDocsReader #

Bases: BaseReader

Feishu Docs reader.

Reads a page from Google Docs

Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class FeishuDocsReader(BaseReader):
    """Feishu Docs reader.

    Reads a page from Google Docs

    """

    host = "https://open.feishu.cn"
    documents_raw_content_url_path = "/open-apis/docx/v1/documents/{}/raw_content"
    tenant_access_token_internal_url_path = (
        "/open-apis/auth/v3/tenant_access_token/internal"
    )

    def __init__(self, app_id, app_secret) -> None:
        """

        Args:
            app_id: The unique identifier of the application is obtained after the application is created.
            app_secret: Application key, obtained after creating the application.
        """
        super().__init__()
        self.app_id = app_id
        self.app_secret = app_secret

        self.tenant_access_token = ""
        self.expire = 0

    def load_data(self, document_ids: List[str]) -> List[Document]:
        """Load data from the input directory.

        Args:
            document_ids (List[str]): a list of document ids.
        """
        if document_ids is None:
            raise ValueError('Must specify a "document_ids" in `load_kwargs`.')

        results = []
        for document_id in document_ids:
            doc = self._load_doc(document_id)
            results.append(Document(text=doc, extra_info={"document_id": document_id}))
        return results

    def _load_doc(self, document_id) -> str:
        """Load a document from Feishu Docs.

        Args:
            document_id: the document id.

        Returns:
            The document text.
        """
        url = self.host + self.documents_raw_content_url_path.format(document_id)
        if self.tenant_access_token == "" or self.expire < time.time():
            self._update_tenant_access_token()
        headers = {
            "Authorization": f"Bearer {self.tenant_access_token}",
            "Content-Type": "application/json; charset=utf-8",
        }
        response = requests.get(url, headers=headers)
        return response.json()["data"]["content"]

    def _update_tenant_access_token(self):
        """For update tenant_access_token."""
        url = self.host + self.tenant_access_token_internal_url_path
        headers = {"Content-Type": "application/json; charset=utf-8"}
        data = {"app_id": self.app_id, "app_secret": self.app_secret}
        response = requests.post(url, data=json.dumps(data), headers=headers)
        self.tenant_access_token = response.json()["tenant_access_token"]
        self.expire = time.time() + response.json()["expire"]

    def set_lark_domain(self):
        """The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain."""
        self.host = "https://open.larksuite.com"

load_data #

load_data(document_ids: List[str]) -> List[Document]

Load data from the input directory.

Parameters:

Name Type Description Default
document_ids List[str]

a list of document ids.

required
Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def load_data(self, document_ids: List[str]) -> List[Document]:
    """Load data from the input directory.

    Args:
        document_ids (List[str]): a list of document ids.
    """
    if document_ids is None:
        raise ValueError('Must specify a "document_ids" in `load_kwargs`.')

    results = []
    for document_id in document_ids:
        doc = self._load_doc(document_id)
        results.append(Document(text=doc, extra_info={"document_id": document_id}))
    return results

set_lark_domain #

set_lark_domain()

The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain.

Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
96
97
98
def set_lark_domain(self):
    """The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain."""
    self.host = "https://open.larksuite.com"