Skip to content

Exa

ExaToolSpec #

Bases: BaseToolSpec

Exa tool spec.

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class ExaToolSpec(BaseToolSpec):
    """Exa tool spec."""

    spec_functions = [
        "search",
        "retrieve_documents",
        "search_and_retrieve_documents",
        "search_and_retrieve_highlights",
        "find_similar",
        "current_date",
    ]

    def __init__(
        self,
        api_key: str,
        verbose: bool = True,
        max_characters: int = 2000,
    ) -> None:
        """Initialize with parameters."""
        from exa_py import Exa

        self.client = Exa(api_key=api_key, user_agent="llama-index")
        self._verbose = verbose
        # max characters for the text field in the search_and_contents function
        self._max_characters = max_characters

    def search(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List:
        """Exa allows you to use a natural language query to search the internet.

        Args:
            query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
            num_results (Optional[int]): Number of results to return. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
            end_published_date (Optional[str]): End date string
        """
        response = self.client.search(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=True,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def retrieve_documents(self, ids: List[str]) -> List[Document]:
        """Retrieve a list of document texts returned by `exa_search`, using the ID field.

        Args:
            ids (List(str)): the ids of the documents to retrieve
        """
        response = self.client.get_contents(ids)
        return [Document(text=result.text) for result in response.results]

    def find_similar(
        self,
        url: str,
        num_results: Optional[int] = 3,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List:
        """Retrieve a list of similar documents to a given url.

        Args:
            url (str): The web page to find similar results of
            num_results (Optional[int]): Number of results to return. Default 3.
            start_published_date (Optional[str]): A date string like "2020-06-15"
            end_published_date (Optional[str]): End date string
        """
        response = self.client.find_similar(
            url,
            num_results=num_results,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
        )
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def search_and_retrieve_documents(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List[Document]:
        """Combines the functionality of `search` and `retrieve_documents`.

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string
        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=True,
            text={"max_characters": self._max_characters},
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.text) for document in response.results]

    def search_and_retrieve_highlights(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List[Document]:
        """Searches and retrieves highlights (intelligent snippets from the document).

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string
        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=True,
            highlights=True,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.highlights[0]) for document in response.results]

    def current_date(self):
        """A function to return todays date.

        Call this before any other functions that take timestamps as an argument
        """
        return datetime.date.today()

search #

search(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List

Exa allows you to use a natural language query to search the internet.

Parameters:

Name Type Description Default
query str

A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"

required
num_results Optional[int]

Number of results to return. Defaults to 10.

10
include_domains Optional[List(str)]

A list of top level domains like ["wsj.com"] to limit the search to specific sites.

None
exclude_domains Optional[List(str)]

Top level domains to exclude.

None
start_published_date Optional[str]

A date string like "2020-06-15". Get the date from current_date

None
end_published_date Optional[str]

End date string

None
Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def search(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List:
    """Exa allows you to use a natural language query to search the internet.

    Args:
        query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
        num_results (Optional[int]): Number of results to return. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
        end_published_date (Optional[str]): End date string
    """
    response = self.client.search(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=True,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

retrieve_documents #

retrieve_documents(ids: List[str]) -> List[Document]

Retrieve a list of document texts returned by exa_search, using the ID field.

Parameters:

Name Type Description Default
ids List(str

the ids of the documents to retrieve

required
Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
71
72
73
74
75
76
77
78
def retrieve_documents(self, ids: List[str]) -> List[Document]:
    """Retrieve a list of document texts returned by `exa_search`, using the ID field.

    Args:
        ids (List(str)): the ids of the documents to retrieve
    """
    response = self.client.get_contents(ids)
    return [Document(text=result.text) for result in response.results]

find_similar #

find_similar(url: str, num_results: Optional[int] = 3, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List

Retrieve a list of similar documents to a given url.

Parameters:

Name Type Description Default
url str

The web page to find similar results of

required
num_results Optional[int]

Number of results to return. Default 3.

3
start_published_date Optional[str]

A date string like "2020-06-15"

None
end_published_date Optional[str]

End date string

None
Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def find_similar(
    self,
    url: str,
    num_results: Optional[int] = 3,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List:
    """Retrieve a list of similar documents to a given url.

    Args:
        url (str): The web page to find similar results of
        num_results (Optional[int]): Number of results to return. Default 3.
        start_published_date (Optional[str]): A date string like "2020-06-15"
        end_published_date (Optional[str]): End date string
    """
    response = self.client.find_similar(
        url,
        num_results=num_results,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
    )
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

search_and_retrieve_documents #

search_and_retrieve_documents(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List[Document]

Combines the functionality of search and retrieve_documents.

Parameters:

Name Type Description Default
query str

the natural language query

required
num_results Optional[int]

Number of results. Defaults to 10.

10
include_domains Optional[List(str)]

A list of top level domains to search, like ["wsj.com"]

None
exclude_domains Optional[List(str)]

Top level domains to exclude.

None
start_published_date Optional[str]

A date string like "2020-06-15".

None
end_published_date Optional[str]

End date string

None
Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def search_and_retrieve_documents(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List[Document]:
    """Combines the functionality of `search` and `retrieve_documents`.

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string
    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=True,
        text={"max_characters": self._max_characters},
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.text) for document in response.results]

search_and_retrieve_highlights #

search_and_retrieve_highlights(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List[Document]

Searches and retrieves highlights (intelligent snippets from the document).

Parameters:

Name Type Description Default
query str

the natural language query

required
num_results Optional[int]

Number of results. Defaults to 10.

10
include_domains Optional[List(str)]

A list of top level domains to search, like ["wsj.com"]

None
exclude_domains Optional[List(str)]

Top level domains to exclude.

None
start_published_date Optional[str]

A date string like "2020-06-15".

None
end_published_date Optional[str]

End date string

None
Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def search_and_retrieve_highlights(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List[Document]:
    """Searches and retrieves highlights (intelligent snippets from the document).

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string
    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=True,
        highlights=True,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.highlights[0]) for document in response.results]

current_date #

current_date()

A function to return todays date.

Call this before any other functions that take timestamps as an argument

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
172
173
174
175
176
177
def current_date(self):
    """A function to return todays date.

    Call this before any other functions that take timestamps as an argument
    """
    return datetime.date.today()