Zilliz

ZillizCloudPipelineIndex #

Bases: BaseManagedIndex

Zilliz Cloud Pipeline's Index.

The Zilliz Cloud Pipeline's index implements a managed index that uses Zilliz Cloud Pipelines as the backend.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	Zilliz Cloud's project ID.	required
`cluster_id`	`str`	Zilliz Cloud's cluster ID.	required
`token`	`str`	Zilliz Cloud's token.	required
`cloud_region`	`str='gcp-us-west1'`	The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'.	`'gcp-us-west1'`
`pipeline_ids`	`dict=None`	A dictionary of pipeline ids for INGESTION, SEARCH, DELETION. Defaults to None.	`None`
`collection_name`	`str='zcp_llamalection'`	A collection name, defaults to 'zcp_llamalection'. If no pipeline_ids is given, get pipelines with collection_name.	`'zcp_llamalection'`
`show_progress`	`bool`	Whether to show tqdm progress bars. Defaults to False.	`False`

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

class ZillizCloudPipelineIndex(BaseManagedIndex):
    """Zilliz Cloud Pipeline's Index.

    The Zilliz Cloud Pipeline's index implements a managed index that uses Zilliz Cloud Pipelines as the backend.

    Args:
        project_id (str): Zilliz Cloud's project ID.
        cluster_id (str): Zilliz Cloud's cluster ID.
        token (str): Zilliz Cloud's token.
        cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'.
        pipeline_ids (dict=None): A dictionary of pipeline ids for INGESTION, SEARCH, DELETION. Defaults to None.
        collection_name (str='zcp_llamalection'): A collection name, defaults to 'zcp_llamalection'. If no pipeline_ids is given, get pipelines with collection_name.
        show_progress (bool): Whether to show tqdm progress bars. Defaults to False.
    """

    def __init__(
        self,
        project_id: str,
        cluster_id: str,
        token: str,
        cloud_region: str = "gcp-us-west1",
        pipeline_ids: Optional[Dict] = None,
        collection_name: str = "zcp_llamalection",
        show_progress: bool = False,
        **kwargs: Any,
    ) -> None:
        self.project_id = project_id
        self.cluster_id = cluster_id
        self.token = token
        self.cloud_region = cloud_region
        self.collection_name = collection_name
        self.domain = (
            f"https://controller.api.{cloud_region}.zillizcloud.com/v1/pipelines"
        )
        self.headers = {
            "Authorization": f"Bearer {token}",
            "Accept": "application/json",
            "Content-Type": "application/json",
        }
        self.pipeline_ids = pipeline_ids or self.get_pipeline_ids()

        index_struct = ZillizCloudPipelineIndexStruct(
            index_id=collection_name,
            summary="Zilliz Cloud Pipeline Index",
        )

        super().__init__(
            show_progress=show_progress, index_struct=index_struct, **kwargs
        )

        if len(self.pipeline_ids) == 0:
            print("No available pipelines. Please create pipelines first.")
        else:
            assert set(PIPELINE_TYPES).issubset(
                set(self.pipeline_ids.keys())
            ), f"Missing pipeline(s): {set(PIPELINE_TYPES) - set(self.pipeline_ids.keys())}"

    def insert_doc_url(self, url: str, metadata: Optional[Dict] = None) -> None:
        """Insert doc from url with an initialized index.


        Example:
        >>> from llama_index.indices import ZillizCloudPipelineIndex
        >>> index = ZillizCloudPipelineIndex(
        >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
        >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
        >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
        >>>     collection_name='your_collection_name'
        >>> )
        >>> index.insert_doc_url(
        >>>     url='https://oss_bucket.test_doc.ext',
        >>>     metadata={'year': 2023, 'author': 'zilliz'}  # only required when the Index was created with metadata schemas
        >>> )
        """
        ingest_pipe_id = self.pipeline_ids.get("INGESTION")
        ingestion_url = f"{self.domain}/{ingest_pipe_id}/run"

        if metadata is None:
            metadata = {}
        params = {"data": {"doc_url": url}}
        params["data"].update(metadata)
        response = requests.post(ingestion_url, headers=self.headers, json=params)
        if response.status_code != 200:
            raise RuntimeError(response.text)
        response_dict = response.json()
        if response_dict["code"] != 200:
            raise RuntimeError(response_dict)
        return response_dict["data"]

    def delete_by_doc_name(self, doc_name: str) -> int:
        deletion_pipe_id = self.pipeline_ids.get("DELETION")
        deletion_url = f"{self.domain}/{deletion_pipe_id}/run"

        params = {"data": {"doc_name": doc_name}}
        response = requests.post(deletion_url, headers=self.headers, json=params)
        if response.status_code != 200:
            raise RuntimeError(response.text)
        response_dict = response.json()
        if response_dict["code"] != 200:
            raise RuntimeError(response_dict)
        try:
            return response_dict["data"]
        except Exception as e:
            raise RuntimeError(f"Run Zilliz Cloud Pipelines failed: {e}")

    def as_retriever(self, **kwargs: Any) -> BaseRetriever:
        """Return a retriever."""
        from llama_index.indices.managed.zilliz.retriever import (
            ZillizCloudPipelineRetriever,
        )

        return ZillizCloudPipelineRetriever(self, **kwargs)

    def get_pipeline_ids(self) -> dict:
        """Get pipeline ids."""
        url = f"{self.domain}?projectId={self.project_id}"

        # Get pipelines
        response = requests.get(url, headers=self.headers)
        if response.status_code != 200:
            raise RuntimeError(response.text)
        response_dict = response.json()
        if response_dict["code"] != 200:
            raise RuntimeError(response_dict)
        data = response_dict["data"]
        pipeline_ids = {}
        for pipe_info in data:
            pipe_id = pipe_info["pipelineId"]
            pipe_type = pipe_info["type"]

            if pipe_type == "SEARCH":
                pipe_clusters = [x["clusterId"] for x in pipe_info["functions"]]
                pipe_collections = [x["collectionName"] for x in pipe_info["functions"]]
                if (
                    self.cluster_id in pipe_clusters
                    and self.collection_name in pipe_collections
                ):
                    pipeline_ids[pipe_type] = pipe_id
            elif pipe_type == "INGESTION":
                if (
                    self.cluster_id == pipe_info["clusterId"]
                    and self.collection_name == pipe_info["newCollectionName"]
                ):
                    pipeline_ids[pipe_type] = pipe_id
            elif pipe_type == "DELETION":
                if (
                    self.cluster_id == pipe_info["clusterId"]
                    and self.collection_name == pipe_info["collectionName"]
                ):
                    pipeline_ids[pipe_type] = pipe_id
        return pipeline_ids

    def create_pipelines(
        self, metadata_schema: Optional[Dict] = None, **kwargs: str
    ) -> dict:
        """Create INGESTION, SEARCH, DELETION pipelines using self.collection_name.

        Args:
            metadata_schema (Dict=None): A dictionary of metadata schema, defaults to None. Use metadata name as key and the corresponding data type as value: {'field_name': 'field_type'}.
                Only support the following values as the field type: 'Bool', 'Int8', 'Int16', 'Int32', 'Int64', 'Float', 'Double', 'VarChar'.
            kwargs: optional parameters to create ingestion pipeline
                - chunkSize: An integer within range [20, 500] to customize chunk size.
                - language: The language of documents. Available options: "ENGLISH", "CHINESE".

        Returns:
            A dictionary of pipeline ids for INGESTION, SEARCH, and DELETION pipelines.

        Example:
            >>> from llama_index.indices import ZillizCloudPipelineIndex
            >>> index = ZillizCloudPipelineIndex(
            >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
            >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
            >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
            >>>     collection_name='your_new_collection_name'
            >>> )
            >>> pipeline_ids = index.create_pipelines(
            >>>     metadata_schema={'year': 'Int32', 'author': 'VarChar'}  # optional, defaults to None
            >>> )
        """
        if len(self.pipeline_ids) > 0:
            raise RuntimeError(
                f"Pipelines already exist for collection {self.collection_name}: {self.pipeline_ids}"
            )

        params_dict = {}
        index_doc_func = {
            "name": "index_my_doc",
            "action": "INDEX_DOC",
            "inputField": "doc_url",
            "language": "ENGLISH",
        }
        index_doc_func.update(kwargs)
        functions = [index_doc_func]
        if metadata_schema:
            for k, v in metadata_schema.items():
                preserve_func = {
                    "name": f"keep_{k}",
                    "action": "PRESERVE",
                    "inputField": k,
                    "outputField": k,
                    "fieldType": v,
                }
                functions.append(preserve_func)
        params_dict["INGESTION"] = {
            "name": f"{self.collection_name}_ingestion",
            "projectId": self.project_id,
            "clusterId": self.cluster_id,
            "newCollectionName": self.collection_name,
            "type": "INGESTION",
            "functions": functions,
        }

        params_dict["SEARCH"] = {
            "name": f"{self.collection_name}_search",
            "projectId": self.project_id,
            "type": "SEARCH",
            "functions": [
                {
                    "name": "search_chunk_text",
                    "action": "SEARCH_DOC_CHUNK",
                    "inputField": "query_text",
                    "clusterId": self.cluster_id,
                    "collectionName": self.collection_name,
                }
            ],
        }

        params_dict["DELETION"] = {
            "name": f"{self.collection_name}_deletion",
            "type": "DELETION",
            "functions": [
                {
                    "name": "purge_chunks_by_doc_name",
                    "action": "PURGE_DOC_INDEX",
                    "inputField": "doc_name",
                }
            ],
            "projectId": self.project_id,
            "clusterId": self.cluster_id,
            "collectionName": self.collection_name,
        }

        for k, v in params_dict.items():
            response = requests.post(self.domain, headers=self.headers, json=v)
            if response.status_code != 200:
                raise RuntimeError(response.text)
            response_dict = response.json()
            if response_dict["code"] != 200:
                raise RuntimeError(response_dict)
            self.pipeline_ids[k] = response_dict["data"]["pipelineId"]

        return self.pipeline_ids

    @classmethod
    def from_document_url(
        cls,
        url: str,
        project_id: str,
        cluster_id: str,
        token: str,
        cloud_region: str = "gcp-us-west1",
        pipeline_ids: Optional[Dict] = None,
        collection_name: str = "zcp_llamalection",
        metadata: Optional[Dict] = None,
        show_progress: bool = False,
        **kwargs: Any,
    ) -> BaseManagedIndex:
        """Zilliz Cloud Pipeline loads document from a signed url and then builds auto index for it.

        Args:
            url: a gcs or s3 signed url.
            project_id (str): Zilliz Cloud's project ID.
            cluster_id (str): Zilliz Cloud's cluster ID.
            token (str): Zilliz Cloud's token.
            cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'.
            pipeline_ids (dict=None): A dictionary of pipeline ids for INGESTION, SEARCH, DELETION. Defaults to None.
            collection_name (str='zcp_llamalection'): A collection name, defaults to 'zcp_llamalection'. If no pipeline_ids is given, get or create pipelines with collection_name.
            metadata (Dict=None): A dictionary of metadata. Defaults to None. The key must be string and the value must be a string, float, integer, or boolean.
            show_progress (bool): Whether to show tqdm progress bars. Defaults to False.

        Returns:
            An initialized ZillizCloudPipelineIndex

        Example:
            >>> from llama_index.indices import ZillizCloudPipelineIndex
            >>> index = ZillizCloudPipelineIndex.from_document_url(
            >>>     url='https://oss_bucket.test_doc.ext',
            >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
            >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
            >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
            >>>     collection_name='your_collection_name'
            >>> )
        """
        metadata = metadata or {}
        index = cls(
            project_id=project_id,
            cluster_id=cluster_id,
            token=token,
            cloud_region=cloud_region,
            pipeline_ids=pipeline_ids,
            collection_name=collection_name,
            show_progress=show_progress,
            **kwargs,
        )
        if len(index.pipeline_ids) == 0:
            index.pipeline_ids = index.create_pipelines(
                metadata_schema={k: get_zcp_type(v) for k, v in metadata.items()}
            )
            print("Pipelines are automatically created.")

        try:
            index.insert_doc_url(url=url, metadata=metadata)
        except Exception as e:
            logger.error(
                "Failed to build managed index given document url (%s):\n%s", url, e
            )
        return index

    def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
        raise NotImplementedError(
            "Inserting nodes is not yet supported with Zilliz Cloud Pipeline."
        )

    def delete_ref_doc(
        self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
    ) -> None:
        raise NotImplementedError(
            "Deleting a reference document is not yet supported with Zilliz Cloud Pipeline."
        )

    def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
        raise NotImplementedError(
            "Updating referenced document is not yet supported with Zilliz Cloud Pipeline."
        )

    @classmethod
    def from_documents(
        cls: Type[IndexType],
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        show_progress: bool = False,
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        # deprecated
        service_context: Optional[ServiceContext] = None,
        **kwargs: Any,
    ) -> IndexType:
        """Build a Zilliz Cloud Pipeline index from a sequence of documents."""
        raise NotImplementedError(
            "Loading from document texts is not yet supported with Zilliz Cloud Pipeline."
        )

    def _build_index_from_nodes(self, nodes: Sequence[BaseNode]) -> IndexDict:
        raise NotImplementedError(
            "Building index from nodes is not yet supported with Zilliz Cloud Pipeline."
        )

    def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
        raise NotImplementedError(
            "Deleting nodes is not yet supported with Zilliz Cloud Pipeline."
        )

insert_doc_url #

insert_doc_url(url: str, metadata: Optional[Dict] = None) -> None

Insert doc from url with an initialized index.

Example:

from llama_index.indices import ZillizCloudPipelineIndex index = ZillizCloudPipelineIndex( project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID', cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID', token='YOUR_ZILLIZ_CLOUD_API_KEY', collection_name='your_collection_name' ) index.insert_doc_url( url='https://oss_bucket.test_doc.ext', metadata={'year': 2023, 'author': 'zilliz'} # only required when the Index was created with metadata schemas )

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

def insert_doc_url(self, url: str, metadata: Optional[Dict] = None) -> None:
    """Insert doc from url with an initialized index.


    Example:
    >>> from llama_index.indices import ZillizCloudPipelineIndex
    >>> index = ZillizCloudPipelineIndex(
    >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
    >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
    >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
    >>>     collection_name='your_collection_name'
    >>> )
    >>> index.insert_doc_url(
    >>>     url='https://oss_bucket.test_doc.ext',
    >>>     metadata={'year': 2023, 'author': 'zilliz'}  # only required when the Index was created with metadata schemas
    >>> )
    """
    ingest_pipe_id = self.pipeline_ids.get("INGESTION")
    ingestion_url = f"{self.domain}/{ingest_pipe_id}/run"

    if metadata is None:
        metadata = {}
    params = {"data": {"doc_url": url}}
    params["data"].update(metadata)
    response = requests.post(ingestion_url, headers=self.headers, json=params)
    if response.status_code != 200:
        raise RuntimeError(response.text)
    response_dict = response.json()
    if response_dict["code"] != 200:
        raise RuntimeError(response_dict)
    return response_dict["data"]

as_retriever #

as_retriever(**kwargs: Any) -> BaseRetriever

Return a retriever.

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

def as_retriever(self, **kwargs: Any) -> BaseRetriever:
    """Return a retriever."""
    from llama_index.indices.managed.zilliz.retriever import (
        ZillizCloudPipelineRetriever,
    )

    return ZillizCloudPipelineRetriever(self, **kwargs)

get_pipeline_ids #

get_pipeline_ids() -> dict

Get pipeline ids.

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

def get_pipeline_ids(self) -> dict:
    """Get pipeline ids."""
    url = f"{self.domain}?projectId={self.project_id}"

    # Get pipelines
    response = requests.get(url, headers=self.headers)
    if response.status_code != 200:
        raise RuntimeError(response.text)
    response_dict = response.json()
    if response_dict["code"] != 200:
        raise RuntimeError(response_dict)
    data = response_dict["data"]
    pipeline_ids = {}
    for pipe_info in data:
        pipe_id = pipe_info["pipelineId"]
        pipe_type = pipe_info["type"]

        if pipe_type == "SEARCH":
            pipe_clusters = [x["clusterId"] for x in pipe_info["functions"]]
            pipe_collections = [x["collectionName"] for x in pipe_info["functions"]]
            if (
                self.cluster_id in pipe_clusters
                and self.collection_name in pipe_collections
            ):
                pipeline_ids[pipe_type] = pipe_id
        elif pipe_type == "INGESTION":
            if (
                self.cluster_id == pipe_info["clusterId"]
                and self.collection_name == pipe_info["newCollectionName"]
            ):
                pipeline_ids[pipe_type] = pipe_id
        elif pipe_type == "DELETION":
            if (
                self.cluster_id == pipe_info["clusterId"]
                and self.collection_name == pipe_info["collectionName"]
            ):
                pipeline_ids[pipe_type] = pipe_id
    return pipeline_ids

create_pipelines #

create_pipelines(metadata_schema: Optional[Dict] = None, **kwargs: str) -> dict

Create INGESTION, SEARCH, DELETION pipelines using self.collection_name.

Parameters:

Name	Type	Description	Default
`metadata_schema`	`Dict=None`	A dictionary of metadata schema, defaults to None. Use metadata name as key and the corresponding data type as value: {'field_name': 'field_type'}. Only support the following values as the field type: 'Bool', 'Int8', 'Int16', 'Int32', 'Int64', 'Float', 'Double', 'VarChar'.	`None`
`kwargs`	`str`	optional parameters to create ingestion pipeline - chunkSize: An integer within range [20, 500] to customize chunk size. - language: The language of documents. Available options: "ENGLISH", "CHINESE".	`{}`

Returns:

Type	Description
`dict`	A dictionary of pipeline ids for INGESTION, SEARCH, and DELETION pipelines.

Example

from llama_index.indices import ZillizCloudPipelineIndex index = ZillizCloudPipelineIndex( project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID', cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID', token='YOUR_ZILLIZ_CLOUD_API_KEY', collection_name='your_new_collection_name' ) pipeline_ids = index.create_pipelines( metadata_schema={'year': 'Int32', 'author': 'VarChar'} # optional, defaults to None )

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

def create_pipelines(
    self, metadata_schema: Optional[Dict] = None, **kwargs: str
) -> dict:
    """Create INGESTION, SEARCH, DELETION pipelines using self.collection_name.

    Args:
        metadata_schema (Dict=None): A dictionary of metadata schema, defaults to None. Use metadata name as key and the corresponding data type as value: {'field_name': 'field_type'}.
            Only support the following values as the field type: 'Bool', 'Int8', 'Int16', 'Int32', 'Int64', 'Float', 'Double', 'VarChar'.
        kwargs: optional parameters to create ingestion pipeline
            - chunkSize: An integer within range [20, 500] to customize chunk size.
            - language: The language of documents. Available options: "ENGLISH", "CHINESE".

    Returns:
        A dictionary of pipeline ids for INGESTION, SEARCH, and DELETION pipelines.

    Example:
        >>> from llama_index.indices import ZillizCloudPipelineIndex
        >>> index = ZillizCloudPipelineIndex(
        >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
        >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
        >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
        >>>     collection_name='your_new_collection_name'
        >>> )
        >>> pipeline_ids = index.create_pipelines(
        >>>     metadata_schema={'year': 'Int32', 'author': 'VarChar'}  # optional, defaults to None
        >>> )
    """
    if len(self.pipeline_ids) > 0:
        raise RuntimeError(
            f"Pipelines already exist for collection {self.collection_name}: {self.pipeline_ids}"
        )

    params_dict = {}
    index_doc_func = {
        "name": "index_my_doc",
        "action": "INDEX_DOC",
        "inputField": "doc_url",
        "language": "ENGLISH",
    }
    index_doc_func.update(kwargs)
    functions = [index_doc_func]
    if metadata_schema:
        for k, v in metadata_schema.items():
            preserve_func = {
                "name": f"keep_{k}",
                "action": "PRESERVE",
                "inputField": k,
                "outputField": k,
                "fieldType": v,
            }
            functions.append(preserve_func)
    params_dict["INGESTION"] = {
        "name": f"{self.collection_name}_ingestion",
        "projectId": self.project_id,
        "clusterId": self.cluster_id,
        "newCollectionName": self.collection_name,
        "type": "INGESTION",
        "functions": functions,
    }

    params_dict["SEARCH"] = {
        "name": f"{self.collection_name}_search",
        "projectId": self.project_id,
        "type": "SEARCH",
        "functions": [
            {
                "name": "search_chunk_text",
                "action": "SEARCH_DOC_CHUNK",
                "inputField": "query_text",
                "clusterId": self.cluster_id,
                "collectionName": self.collection_name,
            }
        ],
    }

    params_dict["DELETION"] = {
        "name": f"{self.collection_name}_deletion",
        "type": "DELETION",
        "functions": [
            {
                "name": "purge_chunks_by_doc_name",
                "action": "PURGE_DOC_INDEX",
                "inputField": "doc_name",
            }
        ],
        "projectId": self.project_id,
        "clusterId": self.cluster_id,
        "collectionName": self.collection_name,
    }

    for k, v in params_dict.items():
        response = requests.post(self.domain, headers=self.headers, json=v)
        if response.status_code != 200:
            raise RuntimeError(response.text)
        response_dict = response.json()
        if response_dict["code"] != 200:
            raise RuntimeError(response_dict)
        self.pipeline_ids[k] = response_dict["data"]["pipelineId"]

    return self.pipeline_ids

from_document_url `classmethod` #

from_document_url(url: str, project_id: str, cluster_id: str, token: str, cloud_region: str = 'gcp-us-west1', pipeline_ids: Optional[Dict] = None, collection_name: str = 'zcp_llamalection', metadata: Optional[Dict] = None, show_progress: bool = False, **kwargs: Any) -> BaseManagedIndex

Zilliz Cloud Pipeline loads document from a signed url and then builds auto index for it.

Parameters:

Name	Type	Description	Default
`url`	`str`	a gcs or s3 signed url.	required
`project_id`	`str`	Zilliz Cloud's project ID.	required
`cluster_id`	`str`	Zilliz Cloud's cluster ID.	required
`token`	`str`	Zilliz Cloud's token.	required
`cloud_region`	`str='gcp-us-west1'`	The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'.	`'gcp-us-west1'`
`pipeline_ids`	`dict=None`	A dictionary of pipeline ids for INGESTION, SEARCH, DELETION. Defaults to None.	`None`
`collection_name`	`str='zcp_llamalection'`	A collection name, defaults to 'zcp_llamalection'. If no pipeline_ids is given, get or create pipelines with collection_name.	`'zcp_llamalection'`
`metadata`	`Dict=None`	A dictionary of metadata. Defaults to None. The key must be string and the value must be a string, float, integer, or boolean.	`None`
`show_progress`	`bool`	Whether to show tqdm progress bars. Defaults to False.	`False`

Returns:

Type	Description
`BaseManagedIndex`	An initialized ZillizCloudPipelineIndex

Example

from llama_index.indices import ZillizCloudPipelineIndex index = ZillizCloudPipelineIndex.from_document_url( url='https://oss_bucket.test_doc.ext', project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID', cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID', token='YOUR_ZILLIZ_CLOUD_API_KEY', collection_name='your_collection_name' )

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

@classmethod
def from_document_url(
    cls,
    url: str,
    project_id: str,
    cluster_id: str,
    token: str,
    cloud_region: str = "gcp-us-west1",
    pipeline_ids: Optional[Dict] = None,
    collection_name: str = "zcp_llamalection",
    metadata: Optional[Dict] = None,
    show_progress: bool = False,
    **kwargs: Any,
) -> BaseManagedIndex:
    """Zilliz Cloud Pipeline loads document from a signed url and then builds auto index for it.

    Args:
        url: a gcs or s3 signed url.
        project_id (str): Zilliz Cloud's project ID.
        cluster_id (str): Zilliz Cloud's cluster ID.
        token (str): Zilliz Cloud's token.
        cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'.
        pipeline_ids (dict=None): A dictionary of pipeline ids for INGESTION, SEARCH, DELETION. Defaults to None.
        collection_name (str='zcp_llamalection'): A collection name, defaults to 'zcp_llamalection'. If no pipeline_ids is given, get or create pipelines with collection_name.
        metadata (Dict=None): A dictionary of metadata. Defaults to None. The key must be string and the value must be a string, float, integer, or boolean.
        show_progress (bool): Whether to show tqdm progress bars. Defaults to False.

    Returns:
        An initialized ZillizCloudPipelineIndex

    Example:
        >>> from llama_index.indices import ZillizCloudPipelineIndex
        >>> index = ZillizCloudPipelineIndex.from_document_url(
        >>>     url='https://oss_bucket.test_doc.ext',
        >>>     project_id='YOUR_ZILLIZ_CLOUD_PROJECT_ID',
        >>>     cluster_id='YOUR_ZILLIZ_CLOUD_CLUSTER_ID',
        >>>     token='YOUR_ZILLIZ_CLOUD_API_KEY',
        >>>     collection_name='your_collection_name'
        >>> )
    """
    metadata = metadata or {}
    index = cls(
        project_id=project_id,
        cluster_id=cluster_id,
        token=token,
        cloud_region=cloud_region,
        pipeline_ids=pipeline_ids,
        collection_name=collection_name,
        show_progress=show_progress,
        **kwargs,
    )
    if len(index.pipeline_ids) == 0:
        index.pipeline_ids = index.create_pipelines(
            metadata_schema={k: get_zcp_type(v) for k, v in metadata.items()}
        )
        print("Pipelines are automatically created.")

    try:
        index.insert_doc_url(url=url, metadata=metadata)
    except Exception as e:
        logger.error(
            "Failed to build managed index given document url (%s):\n%s", url, e
        )
    return index

from_documents `classmethod` #

from_documents(documents: Sequence[Document], storage_context: Optional[StorageContext] = None, show_progress: bool = False, callback_manager: Optional[CallbackManager] = None, transformations: Optional[List[TransformComponent]] = None, service_context: Optional[ServiceContext] = None, **kwargs: Any) -> IndexType

Build a Zilliz Cloud Pipeline index from a sequence of documents.

Source code in llama-index-integrations/indices/llama-index-indices-managed-zilliz/llama_index/indices/managed/zilliz/base.py

@classmethod
def from_documents(
    cls: Type[IndexType],
    documents: Sequence[Document],
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[List[TransformComponent]] = None,
    # deprecated
    service_context: Optional[ServiceContext] = None,
    **kwargs: Any,
) -> IndexType:
    """Build a Zilliz Cloud Pipeline index from a sequence of documents."""
    raise NotImplementedError(
        "Loading from document texts is not yet supported with Zilliz Cloud Pipeline."
    )

Zilliz

ZillizCloudPipelineIndex #

insert_doc_url #

as_retriever #

get_pipeline_ids #

create_pipelines #

from_document_url classmethod #

from_documents classmethod #

from_document_url `classmethod` #

from_documents `classmethod` #