Skip to content

Readme

ReadmeReader #

Bases: BaseReader

Readme reader. Reads data from a Readme.com docs.

Parameters:

Name Type Description Default
api_key str

Readme.com API Key

required
Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class ReadmeReader(BaseReader):
    """Readme reader. Reads data from a Readme.com docs.

    Args:
        api_key (str): Readme.com API Key
    """

    def __init__(self, api_key: str) -> None:
        """Initialize Readme reader."""
        self.api_key = base64.b64encode(bytes(f"{api_key}:", "utf-8")).decode("utf-8")
        self._headers = {
            "accept": "*/*",
            "authorization": f"Basic {self.api_key}",
            "Content-Type": "application/json",
        }

    def load_data(self) -> List[Document]:
        """Load data from the docs (pages).

        Returns:
            List[Document]: List of documents.
        """
        from bs4 import BeautifulSoup

        results = []

        docs = self.get_all_docs()
        for doc in docs:
            body = doc["body_html"]
            if body is None:
                continue
            soup = BeautifulSoup(body, "html.parser")
            body = soup.get_text()
            extra_info = {
                "id": doc["id"],
                "title": doc["title"],
                "type": doc["title"],
                "slug": doc["slug"],
                "updated_at": doc["updatedAt"],
            }

            results.append(
                Document(
                    text=body,
                    extra_info=extra_info,
                )
            )

        return results

    def get_all_docs(self):
        """
        Retrieves all documents, along with their information, categorized by categories.

        Returns:
            list: A list containing dictionaries with document information.
        """
        categories = self.get_all_categories()
        docs = []
        for category in categories:
            category_docs = self.get_docs_in_category(category.get("slug"))
            documents_slugs = [
                category_doc.get("slug") for category_doc in category_docs
            ]
            for document_slug in documents_slugs:
                doc = self.get_document_info(document_slug)
                doc["category_name"] = category["title"]
                docs.append(doc)

        return docs

    def get_docs_in_category(self, category_slug):
        """
        Retrieves documents belonging to a specific category.

        Args:
            category_slug (str): The slug of the category.

        Returns:
            list: A list containing dictionaries with document information.
        """
        url = f"https://dash.readme.com/api/v1/categories/{category_slug}/docs"
        response = requests.get(url, headers=self._headers)

        docs = response.json()

        # Filter documents hidden=False
        return [doc for doc in docs if not doc.get("hidden", True)]

    def get_document_info(self, document_slug):
        """
        Retrieves information about a specific document.

        Args:
            document_slug (str): The slug of the document.

        Returns:
            dict: A dictionary containing document information.
        """
        url = f"https://dash.readme.com/api/v1/docs/{document_slug}"
        response = requests.get(url, headers=self._headers)

        return response.json()

    def get_categories_page(self, params, page):
        """
        Sends a GET request to a specific page of categories.

        Args:
            params (dict): Parameters of the request, such as perPage and others.
            page (int): The number of the page to be retrieved.

        Returns:
            tuple: A tuple containing the total number of items and the retrieved categories.
        """
        url = "https://dash.readme.com/api/v1/categories"
        params["page"] = page
        response = requests.get(url, params=params, headers=self._headers)
        # total counts and categories
        return int(response.headers.get("x-total-count", 0)), response.json()

    def get_all_categories(self):
        """
        Retrieves all categories from the API.

        Returns:
            list: A list containing all categories with type "guide".
        """
        perPage = 100
        page = 1
        params = {
            "perPage": perPage,
            "page": page,
        }

        total_count, categories = self.get_categories_page(params=params, page=1)
        remaining_pages = math.ceil(total_count / perPage) - 1

        for i in range(2, remaining_pages + 2):
            categories.extend(self.get_categories_page(params=params, page=i))

        # Include just categories with type: "guide"
        return [category for category in categories if category.get("type") == "guide"]

load_data #

load_data() -> List[Document]

Load data from the docs (pages).

Returns:

Type Description
List[Document]

List[Document]: List of documents.

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def load_data(self) -> List[Document]:
    """Load data from the docs (pages).

    Returns:
        List[Document]: List of documents.
    """
    from bs4 import BeautifulSoup

    results = []

    docs = self.get_all_docs()
    for doc in docs:
        body = doc["body_html"]
        if body is None:
            continue
        soup = BeautifulSoup(body, "html.parser")
        body = soup.get_text()
        extra_info = {
            "id": doc["id"],
            "title": doc["title"],
            "type": doc["title"],
            "slug": doc["slug"],
            "updated_at": doc["updatedAt"],
        }

        results.append(
            Document(
                text=body,
                extra_info=extra_info,
            )
        )

    return results

get_all_docs #

get_all_docs()

Retrieves all documents, along with their information, categorized by categories.

Returns:

Name Type Description
list

A list containing dictionaries with document information.

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def get_all_docs(self):
    """
    Retrieves all documents, along with their information, categorized by categories.

    Returns:
        list: A list containing dictionaries with document information.
    """
    categories = self.get_all_categories()
    docs = []
    for category in categories:
        category_docs = self.get_docs_in_category(category.get("slug"))
        documents_slugs = [
            category_doc.get("slug") for category_doc in category_docs
        ]
        for document_slug in documents_slugs:
            doc = self.get_document_info(document_slug)
            doc["category_name"] = category["title"]
            docs.append(doc)

    return docs

get_docs_in_category #

get_docs_in_category(category_slug)

Retrieves documents belonging to a specific category.

Parameters:

Name Type Description Default
category_slug str

The slug of the category.

required

Returns:

Name Type Description
list

A list containing dictionaries with document information.

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def get_docs_in_category(self, category_slug):
    """
    Retrieves documents belonging to a specific category.

    Args:
        category_slug (str): The slug of the category.

    Returns:
        list: A list containing dictionaries with document information.
    """
    url = f"https://dash.readme.com/api/v1/categories/{category_slug}/docs"
    response = requests.get(url, headers=self._headers)

    docs = response.json()

    # Filter documents hidden=False
    return [doc for doc in docs if not doc.get("hidden", True)]

get_document_info #

get_document_info(document_slug)

Retrieves information about a specific document.

Parameters:

Name Type Description Default
document_slug str

The slug of the document.

required

Returns:

Name Type Description
dict

A dictionary containing document information.

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def get_document_info(self, document_slug):
    """
    Retrieves information about a specific document.

    Args:
        document_slug (str): The slug of the document.

    Returns:
        dict: A dictionary containing document information.
    """
    url = f"https://dash.readme.com/api/v1/docs/{document_slug}"
    response = requests.get(url, headers=self._headers)

    return response.json()

get_categories_page #

get_categories_page(params, page)

Sends a GET request to a specific page of categories.

Parameters:

Name Type Description Default
params dict

Parameters of the request, such as perPage and others.

required
page int

The number of the page to be retrieved.

required

Returns:

Name Type Description
tuple

A tuple containing the total number of items and the retrieved categories.

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def get_categories_page(self, params, page):
    """
    Sends a GET request to a specific page of categories.

    Args:
        params (dict): Parameters of the request, such as perPage and others.
        page (int): The number of the page to be retrieved.

    Returns:
        tuple: A tuple containing the total number of items and the retrieved categories.
    """
    url = "https://dash.readme.com/api/v1/categories"
    params["page"] = page
    response = requests.get(url, params=params, headers=self._headers)
    # total counts and categories
    return int(response.headers.get("x-total-count", 0)), response.json()

get_all_categories #

get_all_categories()

Retrieves all categories from the API.

Returns:

Name Type Description
list

A list containing all categories with type "guide".

Source code in llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def get_all_categories(self):
    """
    Retrieves all categories from the API.

    Returns:
        list: A list containing all categories with type "guide".
    """
    perPage = 100
    page = 1
    params = {
        "perPage": perPage,
        "page": page,
    }

    total_count, categories = self.get_categories_page(params=params, page=1)
    remaining_pages = math.ceil(total_count / perPage) - 1

    for i in range(2, remaining_pages + 2):
        categories.extend(self.get_categories_page(params=params, page=i))

    # Include just categories with type: "guide"
    return [category for category in categories if category.get("type") == "guide"]