Skip to content

Dashscope

DashScopeEmbedding #

Bases: MultiModalEmbedding

DashScope class for text embedding.

Parameters:

Name Type Description Default
model_name str

Model name for embedding. Defaults to DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2. Options are:

- DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V1
- DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2
TEXT_EMBEDDING_V2
text_type str

The input type, ['query', 'document'], For asymmetric tasks such as retrieval, in order to achieve better retrieval results, it is recommended to distinguish between query text (query) and base text (document) types, clustering Symmetric tasks such as classification and classification do not need to be specially specified, and the system default value "document" can be used.

'document'
api_key str

The DashScope api key.

None
Source code in llama-index-integrations/embeddings/llama-index-embeddings-dashscope/llama_index/embeddings/dashscope/base.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
class DashScopeEmbedding(MultiModalEmbedding):
    """DashScope class for text embedding.

    Args:
        model_name (str): Model name for embedding.
            Defaults to DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2.
                Options are:

                - DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V1
                - DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2
        text_type (str): The input type, ['query', 'document'],
            For asymmetric tasks such as retrieval, in order to achieve better
            retrieval results, it is recommended to distinguish between query
            text (query) and base text (document) types, clustering Symmetric
            tasks such as classification and classification do not need to
            be specially specified, and the system default
            value "document" can be used.
        api_key (str): The DashScope api key.
    """

    _api_key: Optional[str] = PrivateAttr()
    _text_type: Optional[str] = PrivateAttr()

    def __init__(
        self,
        model_name: str = DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
        text_type: str = "document",
        api_key: Optional[str] = None,
        embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
        **kwargs: Any,
    ) -> None:
        self._api_key = api_key
        self._text_type = text_type
        super().__init__(
            model_name=model_name,
            embed_batch_size=embed_batch_size,
            **kwargs,
        )

    @classmethod
    def class_name(cls) -> str:
        return "DashScopeEmbedding"

    def _get_query_embedding(self, query: str) -> List[float]:
        """Get query embedding."""
        emb = get_text_embedding(
            self.model_name,
            query,
            api_key=self._api_key,
            text_type="query",
        )
        if len(emb) > 0 and emb[0] is not None:
            return emb[0]
        else:
            return []

    def _get_text_embedding(self, text: str) -> List[float]:
        """Get text embedding."""
        emb = get_text_embedding(
            self.model_name,
            text,
            api_key=self._api_key,
            text_type=self._text_type,
        )
        if len(emb) > 0 and emb[0] is not None:
            return emb[0]
        else:
            return []

    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Get text embeddings."""
        return get_text_embedding(
            self.model_name,
            texts,
            api_key=self._api_key,
            text_type=self._text_type,
        )

    # TODO: use proper async methods
    async def _aget_text_embedding(self, query: str) -> List[float]:
        """Get text embedding."""
        return self._get_text_embedding(query)

    # TODO: user proper async methods
    async def _aget_query_embedding(self, query: str) -> List[float]:
        """Get query embedding."""
        return self._get_query_embedding(query)

    def get_batch_query_embedding(self, embedding_file_url: str) -> Optional[str]:
        """Get batch query embeddings.

        Args:
            embedding_file_url (str): The url of the file to embedding which with lines of text to embedding.

        Returns:
            str: The url of the embedding result, format ref:
                 https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.
        """
        return get_batch_text_embedding(
            self.model_name,
            embedding_file_url,
            api_key=self._api_key,
            text_type=self._text_type,
        )

    def get_batch_text_embedding(self, embedding_file_url: str) -> Optional[str]:
        """Get batch text embeddings.

        Args:
            embedding_file_url (str): The url of the file to embedding which with lines of text to embedding.

        Returns:
            str: The url of the embedding result, format ref:
                 https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.
        """
        return get_batch_text_embedding(
            self.model_name,
            embedding_file_url,
            api_key=self._api_key,
            text_type=self._text_type,
        )

    def _get_image_embedding(self, img_file_path: ImageType) -> List[float]:
        """
        Embed the input image synchronously.
        """
        input = [{"image": img_file_path}]
        return get_multimodal_embedding(
            self.model_name, input=input, api_key=self._api_key
        )

    async def _aget_image_embedding(self, img_file_path: ImageType) -> List[float]:
        """
        Embed the input image asynchronously.

        """
        return self._get_image_embedding(img_file_path=img_file_path)

    def get_multimodal_embedding(
        self, input: List[Dict], auto_truncation: bool = False
    ) -> List[float]:
        """Call DashScope multimodal embedding.
        ref: https://help.aliyun.com/zh/dashscope/developer-reference/one-peace-multimodal-embedding-api-details.

        Args:
            input (str): The input of the multimodal embedding, eg:
                [{'factor': 1, 'text': '你好'},
                {'factor': 2, 'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/cow.flac'},
                {'factor': 3, 'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png'}]

        Raises:
            ImportError: Need install dashscope package.

        Returns:
            List[float]: The embedding result
        """
        return get_multimodal_embedding(
            self.model_name,
            input=input,
            api_key=self._api_key,
            auto_truncation=auto_truncation,
        )

get_batch_query_embedding #

get_batch_query_embedding(embedding_file_url: str) -> Optional[str]

Get batch query embeddings.

Parameters:

Name Type Description Default
embedding_file_url str

The url of the file to embedding which with lines of text to embedding.

required

Returns:

Name Type Description
str Optional[str]

The url of the embedding result, format ref: https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.

Source code in llama-index-integrations/embeddings/llama-index-embeddings-dashscope/llama_index/embeddings/dashscope/base.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def get_batch_query_embedding(self, embedding_file_url: str) -> Optional[str]:
    """Get batch query embeddings.

    Args:
        embedding_file_url (str): The url of the file to embedding which with lines of text to embedding.

    Returns:
        str: The url of the embedding result, format ref:
             https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.
    """
    return get_batch_text_embedding(
        self.model_name,
        embedding_file_url,
        api_key=self._api_key,
        text_type=self._text_type,
    )

get_batch_text_embedding #

get_batch_text_embedding(embedding_file_url: str) -> Optional[str]

Get batch text embeddings.

Parameters:

Name Type Description Default
embedding_file_url str

The url of the file to embedding which with lines of text to embedding.

required

Returns:

Name Type Description
str Optional[str]

The url of the embedding result, format ref: https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.

Source code in llama-index-integrations/embeddings/llama-index-embeddings-dashscope/llama_index/embeddings/dashscope/base.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
def get_batch_text_embedding(self, embedding_file_url: str) -> Optional[str]:
    """Get batch text embeddings.

    Args:
        embedding_file_url (str): The url of the file to embedding which with lines of text to embedding.

    Returns:
        str: The url of the embedding result, format ref:
             https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details.
    """
    return get_batch_text_embedding(
        self.model_name,
        embedding_file_url,
        api_key=self._api_key,
        text_type=self._text_type,
    )

get_multimodal_embedding #

get_multimodal_embedding(input: List[Dict], auto_truncation: bool = False) -> List[float]

Call DashScope multimodal embedding. ref: https://help.aliyun.com/zh/dashscope/developer-reference/one-peace-multimodal-embedding-api-details.

Parameters:

Name Type Description Default
input str

The input of the multimodal embedding, eg: [{'factor': 1, 'text': '你好'}, {'factor': 2, 'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/cow.flac'}, {'factor': 3, 'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png'}]

required

Raises:

Type Description
ImportError

Need install dashscope package.

Returns:

Type Description
List[float]

List[float]: The embedding result

Source code in llama-index-integrations/embeddings/llama-index-embeddings-dashscope/llama_index/embeddings/dashscope/base.py
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
def get_multimodal_embedding(
    self, input: List[Dict], auto_truncation: bool = False
) -> List[float]:
    """Call DashScope multimodal embedding.
    ref: https://help.aliyun.com/zh/dashscope/developer-reference/one-peace-multimodal-embedding-api-details.

    Args:
        input (str): The input of the multimodal embedding, eg:
            [{'factor': 1, 'text': '你好'},
            {'factor': 2, 'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/cow.flac'},
            {'factor': 3, 'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png'}]

    Raises:
        ImportError: Need install dashscope package.

    Returns:
        List[float]: The embedding result
    """
    return get_multimodal_embedding(
        self.model_name,
        input=input,
        api_key=self._api_key,
        auto_truncation=auto_truncation,
    )