Skip to content

Clickhouse

ClickHouseReader #

Bases: BaseReader

ClickHouse reader.

Parameters:

Name Type Description Default
clickhouse_host str)

An URL to connect to ClickHouse backend. Default to "localhost".

'localhost'
username str)

Username to login. Defaults to "default".

'default'
password str)

Password to login. Defaults to "".

''
clickhouse_port int)

URL port to connect with HTTP. Defaults to 8123.

8123
database str)

Database name to find the table. Defaults to 'default'.

'default'
engine str)

Engine. Options are "MergeTree" and "Memory". Default is "MergeTree".

'MergeTree'
table str)

Table name to operate on. Defaults to 'vector_table'.

'llama_index'
index_type str

index type string. Default to "NONE", supported are ("NONE", "HNSW", "ANNOY")

'NONE'
metric str)

Metric to compute distance, supported are ('l2', 'cosine', 'dot'). Defaults to 'cosine'

'cosine'
batch_size int

the size of documents to insert. Defaults to 1000.

1000
index_params dict

The index parameters for ClickHouse. Defaults to None.

None
search_params dict

The search parameters for a ClicKHouse query. Defaults to None.

None
Source code in llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
class ClickHouseReader(BaseReader):
    """ClickHouse reader.

    Args:
        clickhouse_host (str) : An URL to connect to ClickHouse backend. Default to "localhost".
        username (str) : Username to login. Defaults to "default".
        password (str) : Password to login. Defaults to "".
        clickhouse_port (int) : URL port to connect with HTTP. Defaults to 8123.
        database (str) : Database name to find the table. Defaults to 'default'.
        engine (str) : Engine. Options are "MergeTree" and "Memory". Default is "MergeTree".
        table (str) : Table name to operate on. Defaults to 'vector_table'.
        index_type (str): index type string. Default to "NONE", supported are ("NONE", "HNSW", "ANNOY")
        metric (str) : Metric to compute distance, supported are ('l2', 'cosine', 'dot').
            Defaults to 'cosine'
        batch_size (int, optional): the size of documents to insert. Defaults to 1000.
        index_params (dict, optional): The index parameters for ClickHouse.
            Defaults to None.
        search_params (dict, optional): The search parameters for a ClicKHouse query.
            Defaults to None.
    """

    def __init__(
        self,
        clickhouse_host: str = "localhost",
        username: str = "default",
        password: str = "",
        clickhouse_port: Optional[int] = 8123,
        database: str = "default",
        engine: str = "MergeTree",
        table: str = "llama_index",
        index_type: str = "NONE",
        metric: str = "cosine",
        batch_size: int = 1000,
        index_params: Optional[dict] = None,
        search_params: Optional[dict] = None,
        **kwargs: Any,
    ) -> None:
        self.client = clickhouse_connect.get_client(
            host=clickhouse_host,
            port=clickhouse_port,
            username=username,
            password=password,
        )

        self.config = ClickHouseSettings(
            table=table,
            database=database,
            engine=engine,
            index_type=index_type,
            metric=metric,
            batch_size=batch_size,
            index_params=index_params,
            search_params=search_params,
            **kwargs,
        )

    def load_data(
        self,
        query_vector: List[float],
        where_str: Optional[str] = None,
        limit: int = 10,
    ) -> List[Document]:
        """Load data from ClickHouse.

        Args:
            query_vector (List[float]): Query vector.
            where_str (Optional[str], optional): where condition string.
                Defaults to None.
            limit (int): Number of results to return.

        Returns:
            List[Document]: A list of documents.
        """
        query_statement = self.config.build_query_statement(
            query_embed=query_vector,
            where_str=where_str,
            limit=limit,
        )

        return [
            Document(id_=r["doc_id"], text=r["text"], metadata=r["metadata"])
            for r in self.client.query(query_statement).named_results()
        ]

load_data #

load_data(query_vector: List[float], where_str: Optional[str] = None, limit: int = 10) -> List[Document]

Load data from ClickHouse.

Parameters:

Name Type Description Default
query_vector List[float]

Query vector.

required
where_str Optional[str]

where condition string. Defaults to None.

None
limit int

Number of results to return.

10

Returns:

Type Description
List[Document]

List[Document]: A list of documents.

Source code in llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def load_data(
    self,
    query_vector: List[float],
    where_str: Optional[str] = None,
    limit: int = 10,
) -> List[Document]:
    """Load data from ClickHouse.

    Args:
        query_vector (List[float]): Query vector.
        where_str (Optional[str], optional): where condition string.
            Defaults to None.
        limit (int): Number of results to return.

    Returns:
        List[Document]: A list of documents.
    """
    query_statement = self.config.build_query_statement(
        query_embed=query_vector,
        where_str=where_str,
        limit=limit,
    )

    return [
        Document(id_=r["doc_id"], text=r["text"], metadata=r["metadata"])
        for r in self.client.query(query_statement).named_results()
    ]