LlamaCPP#

pydantic model llama_index.llms.llama_cpp.LlamaCPP#

Show JSON schema

{
   "title": "LlamaCPP",
   "description": "Simple abstract base class for custom LLMs.\n\nSubclasses must implement the `__init__`, `_complete`,\n    `_stream_complete`, and `metadata` methods.",
   "type": "object",
   "properties": {
      "callback_manager": {
         "title": "Callback Manager"
      },
      "system_prompt": {
         "title": "System Prompt",
         "description": "System prompt for LLM calls.",
         "type": "string"
      },
      "messages_to_prompt": {
         "title": "Messages To Prompt"
      },
      "completion_to_prompt": {
         "title": "Completion To Prompt"
      },
      "output_parser": {
         "title": "Output Parser"
      },
      "pydantic_program_mode": {
         "default": "default",
         "allOf": [
            {
               "$ref": "#/definitions/PydanticProgramMode"
            }
         ]
      },
      "query_wrapper_prompt": {
         "title": "Query Wrapper Prompt"
      },
      "model_url": {
         "title": "Model Url",
         "description": "The URL llama-cpp model to download and use.",
         "type": "string"
      },
      "model_path": {
         "title": "Model Path",
         "description": "The path to the llama-cpp model to use.",
         "type": "string"
      },
      "temperature": {
         "title": "Temperature",
         "description": "The temperature to use for sampling.",
         "default": 0.1,
         "gte": 0.0,
         "lte": 1.0,
         "type": "number"
      },
      "max_new_tokens": {
         "title": "Max New Tokens",
         "description": "The maximum number of tokens to generate.",
         "default": 256,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "context_window": {
         "title": "Context Window",
         "description": "The maximum number of context tokens for the model.",
         "default": 3900,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "generate_kwargs": {
         "title": "Generate Kwargs",
         "description": "Kwargs used for generation.",
         "type": "object"
      },
      "model_kwargs": {
         "title": "Model Kwargs",
         "description": "Kwargs used for model initialization.",
         "type": "object"
      },
      "verbose": {
         "title": "Verbose",
         "description": "Whether to print verbose output.",
         "default": true,
         "type": "boolean"
      },
      "class_name": {
         "title": "Class Name",
         "type": "string",
         "default": "LlamaCPP_llm"
      }
   },
   "definitions": {
      "PydanticProgramMode": {
         "title": "PydanticProgramMode",
         "description": "Pydantic program mode.",
         "enum": [
            "default",
            "openai",
            "llm",
            "guidance",
            "lm-format-enforcer"
         ],
         "type": "string"
      }
   }
}

Config

arbitrary_types_allowed: bool = True

Fields

context_window (int)
generate_kwargs (Dict[str, Any])
max_new_tokens (int)
model_kwargs (Dict[str, Any])
model_path (Optional[str])
model_url (Optional[str])
temperature (float)
verbose (bool)

Validators

_validate_callback_manager » callback_manager
set_completion_to_prompt » completion_to_prompt
set_messages_to_prompt » messages_to_prompt

field context_window: int = 3900#

The maximum number of context tokens for the model.

Constraints

exclusiveMinimum = 0

field generate_kwargs: Dict[str, Any] [Optional]#: Kwargs used for generation.

field max_new_tokens: int = 256#

The maximum number of tokens to generate.

Constraints

exclusiveMinimum = 0

field model_kwargs: Dict[str, Any] [Optional]#: Kwargs used for model initialization.

field model_path: Optional[str] = None#: The path to the llama-cpp model to use.

field model_url: Optional[str] = None#: The URL llama-cpp model to download and use.

field temperature: float = 0.1#: The temperature to use for sampling.

field verbose: bool = True#: Whether to print verbose output.

chat(messages: Sequence[ChatMessage], **kwargs: Any) → Any#: Chat endpoint for LLM.

classmethod class_name() → str#

Get the class name, used as a unique ID in serialization.

This provides a key that makes serialization robust against actual class name changes.

complete(*args: Any, **kwargs: Any) → Any#: Completion endpoint for LLM.

stream_chat(messages: Sequence[ChatMessage], **kwargs: Any) → Any#: Streaming chat endpoint for LLM.

stream_complete(*args: Any, **kwargs: Any) → Any#: Streaming completion endpoint for LLM.

property metadata: LLMMetadata#: LLM metadata.