Skip to content

Multion

MultionToolSpec #

Bases: BaseToolSpec

Multion tool spec.

Source code in llama-index-integrations/tools/llama-index-tools-multion/llama_index/tools/multion/base.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class MultionToolSpec(BaseToolSpec):
    """Multion tool spec."""

    spec_functions = ["browse"]

    def __init__(self, token_file: Optional[str] = "multion_token.txt") -> None:
        """Initialize with parameters."""
        import multion

        multion.login()
        self.last_tab = None

    def browse(self, instruction: str):
        """
        Browse the web using Multion
        Multion gives the ability for LLMs to control web browsers using natural language instructions.

        You may have to repeat the instruction through multiple steps or update your instruction to get to
        the final desired state. If the status is 'CONTINUE', reissue the same instruction to continue execution

        Args:
            instruction (str): The detailed and specific natural language instructrion for web browsing
        """
        import multion

        if self.last_tab:
            session = multion.update_session(self.last_tab, {"input": instruction})
        else:
            session = multion.new_session(
                {"input": instruction, "url": "https://google.com"}
            )
            self.last_tab = session["tabId"]

        return {
            "url": session["url"],
            "status": session["status"],
            "action_completed": session["message"],
            "content": self._read_screenshot(session["screenshot"]),
        }

    def _read_screenshot(self, screenshot) -> str:
        import pytesseract
        from PIL import Image

        image_bytes = screenshot.replace("data:image/png;base64,", "")
        image = Image.open(self._bytes_to_image(image_bytes))

        return pytesseract.image_to_string(image)

    def _bytes_to_image(self, img_bytes):
        return BytesIO(base64.b64decode(img_bytes))

browse #

browse(instruction: str)

Browse the web using Multion Multion gives the ability for LLMs to control web browsers using natural language instructions.

You may have to repeat the instruction through multiple steps or update your instruction to get to the final desired state. If the status is 'CONTINUE', reissue the same instruction to continue execution

Parameters:

Name Type Description Default
instruction str

The detailed and specific natural language instructrion for web browsing

required
Source code in llama-index-integrations/tools/llama-index-tools-multion/llama_index/tools/multion/base.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def browse(self, instruction: str):
    """
    Browse the web using Multion
    Multion gives the ability for LLMs to control web browsers using natural language instructions.

    You may have to repeat the instruction through multiple steps or update your instruction to get to
    the final desired state. If the status is 'CONTINUE', reissue the same instruction to continue execution

    Args:
        instruction (str): The detailed and specific natural language instructrion for web browsing
    """
    import multion

    if self.last_tab:
        session = multion.update_session(self.last_tab, {"input": instruction})
    else:
        session = multion.new_session(
            {"input": instruction, "url": "https://google.com"}
        )
        self.last_tab = session["tabId"]

    return {
        "url": session["url"],
        "status": session["status"],
        "action_completed": session["message"],
        "content": self._read_screenshot(session["screenshot"]),
    }