Index

Dataset Module.

BaseLlamaDataExample #

Bases: BaseModel

Base llama dataset example class.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

class BaseLlamaDataExample(BaseModel):
    """Base llama dataset example class."""

    @property
    @abstractmethod
    def class_name(self) -> str:
        """Class name."""
        return "BaseLlamaDataExample"

class_name `abstractmethod` `property` #

class_name: str

Class name.

BaseLlamaDataset #

Bases: BaseModel, Generic[P]

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

class BaseLlamaDataset(BaseModel, Generic[P]):
    _example_type: Type[BaseLlamaDataExample] = BaseLlamaDataExample  # type: ignore[misc]
    examples: List[BaseLlamaDataExample] = Field(
        default=[], description="Data examples of this dataset."
    )
    _predictions_cache: List[BaseLlamaExamplePrediction] = PrivateAttr(
        default_factory=list
    )

    def __getitem__(self, val: Union[slice, int]) -> List[BaseLlamaDataExample]:
        """Enable slicing and indexing.

        Returns the desired slice on `examples`.
        """
        return self.examples[val]

    @abstractmethod
    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""

    def save_json(self, path: str) -> None:
        """Save json."""
        with open(path, "w") as f:
            examples = [self._example_type.dict(el) for el in self.examples]
            data = {
                "examples": examples,
            }

            json.dump(data, f, indent=4)

    @classmethod
    def from_json(cls, path: str) -> "BaseLlamaDataset":
        """Load json."""
        with open(path) as f:
            data = json.load(f)

        examples = [cls._example_type.parse_obj(el) for el in data["examples"]]

        return cls(
            examples=examples,
        )

    @abstractmethod
    def _construct_prediction_dataset(
        self, predictions: List[BaseLlamaExamplePrediction]
    ) -> BaseLlamaPredictionDataset:
        """Construct the specific prediction dataset.

        Args:
            predictions (List[BaseLlamaExamplePrediction]): the list of predictions.

        Returns:
            BaseLlamaPredictionDataset: A dataset of predictions.
        """

    @abstractmethod
    def _predict_example(
        self,
        predictor: P,
        example: BaseLlamaDataExample,
        sleep_time_in_seconds: int = 0,
    ) -> BaseLlamaExamplePrediction:
        """Predict on a single example.

        NOTE: Subclasses need to implement this.

        Args:
            predictor (PredictorType): The predictor to make the prediciton with.
            example (BaseLlamaDataExample): The example to predict on.

        Returns:
            BaseLlamaExamplePrediction: The prediction.
        """

    def make_predictions_with(
        self,
        predictor: P,
        show_progress: bool = False,
        batch_size: int = 20,
        sleep_time_in_seconds: int = 0,
    ) -> BaseLlamaPredictionDataset:
        """Predict with a given query engine.

        Args:
            predictor (PredictorType): The predictor to make predictions with.
            show_progress (bool, optional): Show progress of making predictions.
            batch_size (int): Used to batch async calls, especially to reduce chances
                            of hitting RateLimitError from openai.
            sleep_time_in_seconds (int): Amount of time to sleep between batch call
                            to reduce chance of hitting RateLimitError from openai.

        Returns:
            BaseLlamaPredictionDataset: A dataset of predictions.
        """
        if self._predictions_cache:
            start_example_position = len(self._predictions_cache)
        else:
            start_example_position = 0

        for batch in self._batch_examples(
            batch_size=batch_size, start_position=start_example_position
        ):
            if show_progress:
                example_iterator = tqdm.tqdm(batch)
            else:
                example_iterator = batch
            for example in example_iterator:
                self._predictions_cache.append(
                    self._predict_example(predictor, example, sleep_time_in_seconds)
                )

        return self._construct_prediction_dataset(predictions=self._predictions_cache)

    # async methods
    @abstractmethod
    async def _apredict_example(
        self,
        predictor: P,
        example: BaseLlamaDataExample,
        sleep_time_in_seconds: int,
    ) -> BaseLlamaExamplePrediction:
        """Async predict on a single example.

        NOTE: Subclasses need to implement this.

        Args:
            predictor (PredictorType): The predictor to make the prediciton with.
            example (BaseLlamaDataExample): The example to predict on.

        Returns:
            BaseLlamaExamplePrediction: The prediction.
        """

    def _batch_examples(
        self,
        batch_size: int = 20,
        start_position: int = 0,
    ) -> Generator[List[BaseLlamaDataExample], None, None]:
        """Batches examples and predictions with a given batch_size."""
        num_examples = len(self.examples)
        for ndx in range(start_position, num_examples, batch_size):
            yield self.examples[ndx : min(ndx + batch_size, num_examples)]

    async def amake_predictions_with(
        self,
        predictor: P,
        show_progress: bool = False,
        batch_size: int = 20,
        sleep_time_in_seconds: int = 1,
    ) -> BaseLlamaPredictionDataset:
        """Async predict with a given query engine.

        Args:
            predictor (PredictorType): The predictor to make predictions with.
            show_progress (bool, optional): Show progress of making predictions.
            batch_size (int): Used to batch async calls, especially to reduce chances
                            of hitting RateLimitError from openai.
            sleep_time_in_seconds (int): Amount of time to sleep between batch call
                            to reduce chance of hitting RateLimitError from openai.

        Returns:
            BaseLlamaPredictionDataset: A dataset of predictions.
        """
        if self._predictions_cache:
            start_example_position = len(self._predictions_cache)
        else:
            start_example_position = 0

        for batch in self._batch_examples(
            batch_size=batch_size, start_position=start_example_position
        ):
            tasks = []
            for example in batch:
                tasks.append(
                    self._apredict_example(predictor, example, sleep_time_in_seconds)
                )
            asyncio_mod = asyncio_module(show_progress=show_progress)

            try:
                if show_progress:
                    batch_predictions = await asyncio_mod.gather(
                        *tasks, desc="Batch processing of predictions"
                    )
                else:
                    batch_predictions = await asyncio_mod.gather(*tasks)
            except RateLimitError as err:
                if show_progress:
                    asyncio_mod.close()
                raise ValueError(
                    "You've hit rate limits on your OpenAI subscription. This"
                    " class caches previous predictions after each successful"
                    " batch execution. Based off this cache, when executing this"
                    " command again it will attempt to predict on only the examples "
                    "that have not yet been predicted. Try reducing your batch_size."
                ) from err
            self._predictions_cache += batch_predictions
            # time.sleep(sleep_time_in_seconds)

        prediction_dataset = self._construct_prediction_dataset(
            predictions=self._predictions_cache
        )
        self._predictions_cache = []  # clear cache
        return prediction_dataset

    @property
    @abstractmethod
    def class_name(self) -> str:
        """Class name."""
        return "BaseLlamaDataset"

class_name `abstractmethod` `property` #

class_name: str

Class name.

to_pandas `abstractmethod` #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

@abstractmethod
def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""

save_json #

save_json(path: str) -> None

Save json.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

def save_json(self, path: str) -> None:
    """Save json."""
    with open(path, "w") as f:
        examples = [self._example_type.dict(el) for el in self.examples]
        data = {
            "examples": examples,
        }

        json.dump(data, f, indent=4)

from_json `classmethod` #

from_json(path: str) -> BaseLlamaDataset

Load json.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

@classmethod
def from_json(cls, path: str) -> "BaseLlamaDataset":
    """Load json."""
    with open(path) as f:
        data = json.load(f)

    examples = [cls._example_type.parse_obj(el) for el in data["examples"]]

    return cls(
        examples=examples,
    )

make_predictions_with #

make_predictions_with(predictor: P, show_progress: bool = False, batch_size: int = 20, sleep_time_in_seconds: int = 0) -> BaseLlamaPredictionDataset

Predict with a given query engine.

Parameters:

Name	Type	Description	Default
`predictor`	`PredictorType`	The predictor to make predictions with.	required
`show_progress`	`bool`	Show progress of making predictions.	`False`
`batch_size`	`int`	Used to batch async calls, especially to reduce chances of hitting RateLimitError from openai.	`20`
`sleep_time_in_seconds`	`int`	Amount of time to sleep between batch call to reduce chance of hitting RateLimitError from openai.	`0`

Returns:

Name	Type	Description
`BaseLlamaPredictionDataset`	`BaseLlamaPredictionDataset`	A dataset of predictions.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

def make_predictions_with(
    self,
    predictor: P,
    show_progress: bool = False,
    batch_size: int = 20,
    sleep_time_in_seconds: int = 0,
) -> BaseLlamaPredictionDataset:
    """Predict with a given query engine.

    Args:
        predictor (PredictorType): The predictor to make predictions with.
        show_progress (bool, optional): Show progress of making predictions.
        batch_size (int): Used to batch async calls, especially to reduce chances
                        of hitting RateLimitError from openai.
        sleep_time_in_seconds (int): Amount of time to sleep between batch call
                        to reduce chance of hitting RateLimitError from openai.

    Returns:
        BaseLlamaPredictionDataset: A dataset of predictions.
    """
    if self._predictions_cache:
        start_example_position = len(self._predictions_cache)
    else:
        start_example_position = 0

    for batch in self._batch_examples(
        batch_size=batch_size, start_position=start_example_position
    ):
        if show_progress:
            example_iterator = tqdm.tqdm(batch)
        else:
            example_iterator = batch
        for example in example_iterator:
            self._predictions_cache.append(
                self._predict_example(predictor, example, sleep_time_in_seconds)
            )

    return self._construct_prediction_dataset(predictions=self._predictions_cache)

amake_predictions_with `async` #

amake_predictions_with(predictor: P, show_progress: bool = False, batch_size: int = 20, sleep_time_in_seconds: int = 1) -> BaseLlamaPredictionDataset

Async predict with a given query engine.

Parameters:

Name	Type	Description	Default
`predictor`	`PredictorType`	The predictor to make predictions with.	required
`show_progress`	`bool`	Show progress of making predictions.	`False`
`batch_size`	`int`	Used to batch async calls, especially to reduce chances of hitting RateLimitError from openai.	`20`
`sleep_time_in_seconds`	`int`	Amount of time to sleep between batch call to reduce chance of hitting RateLimitError from openai.	`1`

Returns:

Name	Type	Description
`BaseLlamaPredictionDataset`	`BaseLlamaPredictionDataset`	A dataset of predictions.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

async def amake_predictions_with(
    self,
    predictor: P,
    show_progress: bool = False,
    batch_size: int = 20,
    sleep_time_in_seconds: int = 1,
) -> BaseLlamaPredictionDataset:
    """Async predict with a given query engine.

    Args:
        predictor (PredictorType): The predictor to make predictions with.
        show_progress (bool, optional): Show progress of making predictions.
        batch_size (int): Used to batch async calls, especially to reduce chances
                        of hitting RateLimitError from openai.
        sleep_time_in_seconds (int): Amount of time to sleep between batch call
                        to reduce chance of hitting RateLimitError from openai.

    Returns:
        BaseLlamaPredictionDataset: A dataset of predictions.
    """
    if self._predictions_cache:
        start_example_position = len(self._predictions_cache)
    else:
        start_example_position = 0

    for batch in self._batch_examples(
        batch_size=batch_size, start_position=start_example_position
    ):
        tasks = []
        for example in batch:
            tasks.append(
                self._apredict_example(predictor, example, sleep_time_in_seconds)
            )
        asyncio_mod = asyncio_module(show_progress=show_progress)

        try:
            if show_progress:
                batch_predictions = await asyncio_mod.gather(
                    *tasks, desc="Batch processing of predictions"
                )
            else:
                batch_predictions = await asyncio_mod.gather(*tasks)
        except RateLimitError as err:
            if show_progress:
                asyncio_mod.close()
            raise ValueError(
                "You've hit rate limits on your OpenAI subscription. This"
                " class caches previous predictions after each successful"
                " batch execution. Based off this cache, when executing this"
                " command again it will attempt to predict on only the examples "
                "that have not yet been predicted. Try reducing your batch_size."
            ) from err
        self._predictions_cache += batch_predictions
        # time.sleep(sleep_time_in_seconds)

    prediction_dataset = self._construct_prediction_dataset(
        predictions=self._predictions_cache
    )
    self._predictions_cache = []  # clear cache
    return prediction_dataset

BaseLlamaExamplePrediction #

Bases: BaseModel

Base llama dataset example class.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

class BaseLlamaExamplePrediction(BaseModel):
    """Base llama dataset example class."""

    @property
    @abstractmethod
    def class_name(self) -> str:
        """Class name."""
        return "BaseLlamaPrediction"

class_name `abstractmethod` `property` #

class_name: str

Class name.

BaseLlamaPredictionDataset #

Bases: BaseModel

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

class BaseLlamaPredictionDataset(BaseModel):
    _prediction_type: Type[BaseLlamaExamplePrediction] = BaseLlamaExamplePrediction  # type: ignore[misc]
    predictions: List[BaseLlamaExamplePrediction] = Field(
        default=list, description="Predictions on train_examples."
    )

    def __getitem__(self, val: Union[slice, int]) -> List[BaseLlamaExamplePrediction]:
        """Enable slicing and indexing.

        Returns the desired slice on `predictions`.
        """
        return self.predictions[val]

    @abstractmethod
    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""

    def save_json(self, path: str) -> None:
        """Save json."""
        with open(path, "w") as f:
            predictions = None
            if self.predictions:
                predictions = [
                    self._prediction_type.dict(el) for el in self.predictions
                ]
            data = {
                "predictions": predictions,
            }

            json.dump(data, f, indent=4)

    @classmethod
    def from_json(cls, path: str) -> "BaseLlamaPredictionDataset":
        """Load json."""
        with open(path) as f:
            data = json.load(f)

        predictions = [cls._prediction_type.parse_obj(el) for el in data["predictions"]]

        return cls(
            predictions=predictions,
        )

    @property
    @abstractmethod
    def class_name(self) -> str:
        """Class name."""
        return "BaseLlamaPredictionDataset"

class_name `abstractmethod` `property` #

class_name: str

Class name.

to_pandas `abstractmethod` #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

@abstractmethod
def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""

save_json #

save_json(path: str) -> None

Save json.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

def save_json(self, path: str) -> None:
    """Save json."""
    with open(path, "w") as f:
        predictions = None
        if self.predictions:
            predictions = [
                self._prediction_type.dict(el) for el in self.predictions
            ]
        data = {
            "predictions": predictions,
        }

        json.dump(data, f, indent=4)

from_json `classmethod` #

from_json(path: str) -> BaseLlamaPredictionDataset

Load json.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

@classmethod
def from_json(cls, path: str) -> "BaseLlamaPredictionDataset":
    """Load json."""
    with open(path) as f:
        data = json.load(f)

    predictions = [cls._prediction_type.parse_obj(el) for el in data["predictions"]]

    return cls(
        predictions=predictions,
    )

CreatedByType #

Bases: str, Enum

The kinds of rag data examples.

Source code in llama-index-core/llama_index/core/llama_dataset/base.py

class CreatedByType(str, Enum):
    """The kinds of rag data examples."""

    HUMAN = "human"
    AI = "ai"

    def __str__(self) -> str:
        return self.value

EvaluatorExamplePrediction #

Bases: BaseLlamaExamplePrediction

Evaluation example prediction class.

Parameters:

Name	Type	Description	Default
`feedback`	`Optional[str]`	The evaluator's feedback.	required
`score`	`Optional[float]`	The evaluator's score.	required

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class EvaluatorExamplePrediction(BaseLlamaExamplePrediction):
    """Evaluation example prediction class.

    Args:
        feedback (Optional[str]): The evaluator's feedback.
        score (Optional[float]): The evaluator's score.
    """

    feedback: str = Field(
        default_factory=str,
        description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
    )
    score: Optional[float] = Field(
        default=None,
        description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
    )
    invalid_prediction: bool = Field(
        default=False, description="Whether or not the prediction is a valid one."
    )
    invalid_reason: Optional[str] = Field(
        default=None, description="Reason as to why prediction is invalid."
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "EvaluatorExamplePrediction"

class_name `property` #

class_name: str

Data example class name.

EvaluatorPredictionDataset #

Bases: BaseLlamaPredictionDataset

Evaluation Prediction Dataset Class.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class EvaluatorPredictionDataset(BaseLlamaPredictionDataset):
    """Evaluation Prediction Dataset Class."""

    _prediction_type = EvaluatorExamplePrediction

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {}
        if self.predictions:
            data = {
                "feedback": [t.feedback for t in self.predictions],
                "score": [t.score for t in self.predictions],
            }

        return PandasDataFrame(data)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "EvaluatorPredictionDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {}
    if self.predictions:
        data = {
            "feedback": [t.feedback for t in self.predictions],
            "score": [t.score for t in self.predictions],
        }

    return PandasDataFrame(data)

LabelledEvaluatorDataExample #

Bases: BaseLlamaDataExample

Evaluation example class.

This data class contains the ingredients to perform a new "prediction" i.e., evaluation. Here an evaluator is meant to evaluate a response against an associated query as well as optionally contexts.

Parameters:

Name	Type	Description	Default
`query`	`str`	The user query	required
`query_by`	`CreatedBy`	Query generated by human or ai (model-name)	required
`contexts`	`Optional[List[str]]`	The contexts used for response	required
`answer`	`str`	Answer to the query that is to be evaluated.	required
`answer_by`		The reference answer generated by human or ai (model-name).	required
`ground_truth_answer`	`Optional[str]`		required
`ground_truth_answer_by`	`Optional[CreatedBy]`		required
`reference_feedback`	`str`	The reference feedback evaluation.	required
`reference_score`	`float`	The reference score evaluation.	required
`reference_evaluation_by`	`CreatedBy`	Evaluation generated by human or ai (model-name)	required

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class LabelledEvaluatorDataExample(BaseLlamaDataExample):
    """Evaluation example class.

    This data class contains the ingredients to perform a new "prediction" i.e.,
    evaluation. Here an evaluator is meant to evaluate a response against an
    associated query as well as optionally contexts.

    Args:
        query (str): The user query
        query_by (CreatedBy): Query generated by human or ai (model-name)
        contexts (Optional[List[str]]): The contexts used for response
        answer (str): Answer to the query that is to be evaluated.
        answer_by: The reference answer generated by human or ai (model-name).
        ground_truth_answer (Optional[str]):
        ground_truth_answer_by (Optional[CreatedBy]):
        reference_feedback (str): The reference feedback evaluation.
        reference_score (float): The reference score evaluation.
        reference_evaluation_by (CreatedBy): Evaluation generated by human or ai (model-name)
    """

    query: str = Field(
        default_factory=str, description="The user query for the example."
    )
    query_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the query."
    )
    contexts: Optional[List[str]] = Field(
        default_factory=None,
        description="The contexts used to generate the answer.",
    )
    answer: str = Field(
        default_factory=str,
        description="The provided answer to the example that is to be evaluated.",
    )
    answer_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the answer."
    )
    ground_truth_answer: Optional[str] = Field(
        default=None,
        description="The ground truth answer to the example that is used to evaluate the provided `answer`.",
    )
    ground_truth_answer_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the ground-truth answer."
    )
    reference_feedback: Optional[str] = Field(
        default=None,
        description="The reference feedback (ground-truth).",
    )
    reference_score: float = Field(
        default_factory=float, description="The reference score (ground-truth)."
    )
    reference_evaluation_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the evaluation (feedback and score)."
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "LabelledEvaluatorDataExample"

class_name `property` #

class_name: str

Data example class name.

LabelledEvaluatorDataset #

Bases: BaseLlamaDataset[BaseEvaluator]

LabelledEvalationDataset class.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class LabelledEvaluatorDataset(BaseLlamaDataset[BaseEvaluator]):
    """LabelledEvalationDataset class."""

    _example_type = LabelledEvaluatorDataExample

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {
            "query": [t.query for t in self.examples],
            "answer": [t.answer for t in self.examples],
            "contexts": [t.contexts for t in self.examples],
            "ground_truth_answer": [t.ground_truth_answer for t in self.examples],
            "query_by": [str(t.query_by) for t in self.examples],
            "answer_by": [str(t.answer_by) for t in self.examples],
            "ground_truth_answer_by": [
                str(t.ground_truth_answer_by) for t in self.examples
            ],
            "reference_feedback": [t.reference_feedback for t in self.examples],
            "reference_score": [t.reference_score for t in self.examples],
            "reference_evaluation_by": [
                t.reference_evaluation_by for t in self.examples
            ],
        }

        return PandasDataFrame(data)

    async def _apredict_example(
        self,
        predictor: BaseEvaluator,
        example: LabelledEvaluatorDataExample,
        sleep_time_in_seconds: int,
    ) -> EvaluatorExamplePrediction:
        """Async predict RAG example with a query engine."""
        await asyncio.sleep(sleep_time_in_seconds)
        try:
            eval_result: EvaluationResult = await predictor.aevaluate(
                query=example.query,
                response=example.answer,
                contexts=example.contexts,
                reference=example.ground_truth_answer,
                sleep_time_in_seconds=sleep_time_in_seconds,
            )
        except Exception as err:
            # TODO: raise warning here as well
            return EvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=f"Caught error {err!s}"
            )

        if not eval_result.invalid_result:
            return EvaluatorExamplePrediction(
                feedback=eval_result.feedback, score=eval_result.score
            )
        else:
            return EvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=eval_result.invalid_reason
            )

    def _predict_example(
        self,
        predictor: BaseEvaluator,
        example: LabelledEvaluatorDataExample,
        sleep_time_in_seconds: int = 0,
    ) -> EvaluatorExamplePrediction:
        """Predict RAG example with a query engine."""
        time.sleep(sleep_time_in_seconds)
        try:
            eval_result: EvaluationResult = predictor.evaluate(
                query=example.query,
                response=example.answer,
                contexts=example.contexts,
                reference=example.ground_truth_answer,
                sleep_time_in_seconds=sleep_time_in_seconds,
            )
        except Exception as err:
            # TODO: raise warning here as well
            return EvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=f"Caught error {err!s}"
            )

        if not eval_result.invalid_result:
            return EvaluatorExamplePrediction(
                feedback=eval_result.feedback, score=eval_result.score
            )
        else:
            return EvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=eval_result.invalid_reason
            )

    def _construct_prediction_dataset(
        self, predictions: List[EvaluatorExamplePrediction]
    ) -> EvaluatorPredictionDataset:
        """Construct prediction dataset."""
        return EvaluatorPredictionDataset(predictions=predictions)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "LabelledEvaluatorDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {
        "query": [t.query for t in self.examples],
        "answer": [t.answer for t in self.examples],
        "contexts": [t.contexts for t in self.examples],
        "ground_truth_answer": [t.ground_truth_answer for t in self.examples],
        "query_by": [str(t.query_by) for t in self.examples],
        "answer_by": [str(t.answer_by) for t in self.examples],
        "ground_truth_answer_by": [
            str(t.ground_truth_answer_by) for t in self.examples
        ],
        "reference_feedback": [t.reference_feedback for t in self.examples],
        "reference_score": [t.reference_score for t in self.examples],
        "reference_evaluation_by": [
            t.reference_evaluation_by for t in self.examples
        ],
    }

    return PandasDataFrame(data)

LabelledPairwiseEvaluatorDataExample #

Bases: LabelledEvaluatorDataExample

Labelled pairwise evaluation data example class.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class LabelledPairwiseEvaluatorDataExample(LabelledEvaluatorDataExample):
    """Labelled pairwise evaluation data example class."""

    second_answer: str = Field(
        default_factory=str,
        description="The second answer to the example that is to be evaluated along versus `answer`.",
    )
    second_answer_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the second answer."
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "LabelledPairwiseEvaluatorDataExample"

class_name `property` #

class_name: str

Data example class name.

LabelledPairwiseEvaluatorDataset #

Bases: BaseLlamaDataset[BaseEvaluator]

Labelled pairwise evaluation dataset. For evaluating the evaluator in performing pairwise evaluations.

Parameters:

Name	Type	Description	Default
`BaseLlamaDataset`	`_type_`	description	required

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class LabelledPairwiseEvaluatorDataset(BaseLlamaDataset[BaseEvaluator]):
    """Labelled pairwise evaluation dataset. For evaluating the evaluator in
    performing pairwise evaluations.

    Args:
        BaseLlamaDataset (_type_): _description_
    """

    _example_type = LabelledPairwiseEvaluatorDataExample

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {
            "query": [t.query for t in self.examples],
            "answer": [t.answer for t in self.examples],
            "second_answer": [t.second_answer for t in self.examples],
            "contexts": [t.contexts for t in self.examples],
            "ground_truth_answer": [t.ground_truth_answer for t in self.examples],
            "query_by": [str(t.query_by) for t in self.examples],
            "answer_by": [str(t.answer_by) for t in self.examples],
            "second_answer_by": [str(t.second_answer_by) for t in self.examples],
            "ground_truth_answer_by": [
                str(t.ground_truth_answer_by) for t in self.examples
            ],
            "reference_feedback": [t.reference_feedback for t in self.examples],
            "reference_score": [t.reference_score for t in self.examples],
            "reference_evaluation_by": [
                t.reference_evaluation_by for t in self.examples
            ],
        }

        return PandasDataFrame(data)

    async def _apredict_example(
        self,
        predictor: BaseEvaluator,
        example: LabelledPairwiseEvaluatorDataExample,
        sleep_time_in_seconds: int,
    ) -> PairwiseEvaluatorExamplePrediction:
        """Async predict evaluation example with an Evaluator."""
        await asyncio.sleep(sleep_time_in_seconds)
        try:
            eval_result: EvaluationResult = await predictor.aevaluate(
                query=example.query,
                response=example.answer,
                second_response=example.second_answer,
                contexts=example.contexts,
                reference=example.ground_truth_answer,
                sleep_time_in_seconds=sleep_time_in_seconds,
            )
        except Exception as err:
            # TODO: raise warning here as well
            return PairwiseEvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=f"Caught error {err!s}"
            )

        if not eval_result.invalid_result:
            return PairwiseEvaluatorExamplePrediction(
                feedback=eval_result.feedback,
                score=eval_result.score,
                evaluation_source=eval_result.pairwise_source,
            )
        else:
            return PairwiseEvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=eval_result.invalid_reason
            )

    def _predict_example(
        self,
        predictor: BaseEvaluator,
        example: LabelledPairwiseEvaluatorDataExample,
        sleep_time_in_seconds: int = 0,
    ) -> PairwiseEvaluatorExamplePrediction:
        """Predict RAG example with a query engine."""
        time.sleep(sleep_time_in_seconds)
        try:
            eval_result: EvaluationResult = predictor.evaluate(
                query=example.query,
                response=example.answer,
                second_response=example.second_answer,
                contexts=example.contexts,
                reference=example.ground_truth_answer,
                sleep_time_in_seconds=sleep_time_in_seconds,
            )
        except Exception as err:
            # TODO: raise warning here as well
            return PairwiseEvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=f"Caught error {err!s}"
            )

        if not eval_result.invalid_result:
            return PairwiseEvaluatorExamplePrediction(
                feedback=eval_result.feedback,
                score=eval_result.score,
                evaluation_source=eval_result.pairwise_source,
            )
        else:
            return PairwiseEvaluatorExamplePrediction(
                invalid_prediction=True, invalid_reason=eval_result.invalid_reason
            )

    def _construct_prediction_dataset(
        self, predictions: List[PairwiseEvaluatorExamplePrediction]
    ) -> PairwiseEvaluatorPredictionDataset:
        """Construct prediction dataset."""
        return PairwiseEvaluatorPredictionDataset(predictions=predictions)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "LabelledPairwiseEvaluatorDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {
        "query": [t.query for t in self.examples],
        "answer": [t.answer for t in self.examples],
        "second_answer": [t.second_answer for t in self.examples],
        "contexts": [t.contexts for t in self.examples],
        "ground_truth_answer": [t.ground_truth_answer for t in self.examples],
        "query_by": [str(t.query_by) for t in self.examples],
        "answer_by": [str(t.answer_by) for t in self.examples],
        "second_answer_by": [str(t.second_answer_by) for t in self.examples],
        "ground_truth_answer_by": [
            str(t.ground_truth_answer_by) for t in self.examples
        ],
        "reference_feedback": [t.reference_feedback for t in self.examples],
        "reference_score": [t.reference_score for t in self.examples],
        "reference_evaluation_by": [
            t.reference_evaluation_by for t in self.examples
        ],
    }

    return PandasDataFrame(data)

PairwiseEvaluatorExamplePrediction #

Bases: BaseLlamaExamplePrediction

Pairwise evaluation example prediction class.

Parameters:

Name	Type	Description	Default
`feedback`	`Optional[str]`	The evaluator's feedback.	required
`score`	`Optional[float]`	The evaluator's score.	required
`evaluation_source`	`EvaluationSource`	If the evaluation came from original order or flipped; or inconclusive.	required

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class PairwiseEvaluatorExamplePrediction(BaseLlamaExamplePrediction):
    """Pairwise evaluation example prediction class.

    Args:
        feedback (Optional[str]): The evaluator's feedback.
        score (Optional[float]): The evaluator's score.
        evaluation_source (EvaluationSource): If the evaluation came from original order or flipped; or inconclusive.
    """

    feedback: str = Field(
        default_factory=str,
        description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
    )
    score: Optional[float] = Field(
        default=None,
        description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
    )
    evaluation_source: Optional[EvaluationSource] = Field(
        default=None,
        description=(
            "Whether the evaluation comes from original, or flipped ordering. Can also be neither here indicating inconclusive judgement."
        ),
    )
    invalid_prediction: bool = Field(
        default=False, description="Whether or not the prediction is a valid one."
    )
    invalid_reason: Optional[str] = Field(
        default=None, description="Reason as to why prediction is invalid."
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "PairwiseEvaluatorExamplePrediction"

class_name `property` #

class_name: str

Data example class name.

PairwiseEvaluatorPredictionDataset #

Bases: BaseLlamaPredictionDataset

Pairwise evaluation predictions dataset class.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

class PairwiseEvaluatorPredictionDataset(BaseLlamaPredictionDataset):
    """Pairwise evaluation predictions dataset class."""

    _prediction_type = PairwiseEvaluatorExamplePrediction

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {}
        if self.predictions:
            data = {
                "feedback": [t.feedback for t in self.predictions],
                "score": [t.score for t in self.predictions],
                "ordering": [t.evaluation_source.value for t in self.predictions],
            }

        return PandasDataFrame(data)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "PairwiseEvaluatorPredictionDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/evaluator_evaluation.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {}
    if self.predictions:
        data = {
            "feedback": [t.feedback for t in self.predictions],
            "score": [t.score for t in self.predictions],
            "ordering": [t.evaluation_source.value for t in self.predictions],
        }

    return PandasDataFrame(data)

LabelledRagDataExample #

Bases: BaseLlamaDataExample

RAG example class. Analogous to traditional ML datasets, this dataset contains the "features" (i.e., query + context) to make a prediction and the "label" (i.e., response) to evaluate the prediction.

Parameters:

Name	Type	Description	Default
`query`	`str`	The user query	required
`query_by`	`CreatedBy`	Query generated by human or ai (model-name)	required
`reference_contexts`	`Optional[List[str]]`	The contexts used for response	required
`reference_answer`	`[str]`	Reference answer to the query. An answer that would receive full marks upon evaluation.	required
`reference_answer_by`		The reference answer generated by human or ai (model-name).	required

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

class LabelledRagDataExample(BaseLlamaDataExample):
    """RAG example class. Analogous to traditional ML datasets, this dataset contains
    the "features" (i.e., query + context) to make a prediction and the "label" (i.e., response)
    to evaluate the prediction.

    Args:
        query (str): The user query
        query_by (CreatedBy): Query generated by human or ai (model-name)
        reference_contexts (Optional[List[str]]): The contexts used for response
        reference_answer ([str]): Reference answer to the query. An answer
                                    that would receive full marks upon evaluation.
        reference_answer_by: The reference answer generated by human or ai (model-name).
    """

    query: str = Field(
        default_factory=str, description="The user query for the example."
    )
    query_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the query."
    )
    reference_contexts: Optional[List[str]] = Field(
        default_factory=None,
        description="The contexts used to generate the reference answer.",
    )
    reference_answer: str = Field(
        default_factory=str,
        description="The reference (ground-truth) answer to the example.",
    )
    reference_answer_by: Optional[CreatedBy] = Field(
        default=None, description="What generated the reference answer."
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "LabelledRagDataExample"

class_name `property` #

class_name: str

Data example class name.

LabelledRagDataset #

Bases: BaseLlamaDataset[BaseQueryEngine]

RagDataset class.

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

class LabelledRagDataset(BaseLlamaDataset[BaseQueryEngine]):
    """RagDataset class."""

    _example_type = LabelledRagDataExample

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {
            "query": [t.query for t in self.examples],
            "reference_contexts": [t.reference_contexts for t in self.examples],
            "reference_answer": [t.reference_answer for t in self.examples],
            "reference_answer_by": [str(t.reference_answer_by) for t in self.examples],
            "query_by": [str(t.query_by) for t in self.examples],
        }

        return PandasDataFrame(data)

    async def _apredict_example(
        self,
        predictor: BaseQueryEngine,
        example: LabelledRagDataExample,
        sleep_time_in_seconds: int,
    ) -> RagExamplePrediction:
        """Async predict RAG example with a query engine."""
        await asyncio.sleep(sleep_time_in_seconds)
        response = await predictor.aquery(example.query)
        return RagExamplePrediction(
            response=str(response), contexts=[s.text for s in response.source_nodes]
        )

    def _predict_example(
        self,
        predictor: BaseQueryEngine,
        example: LabelledRagDataExample,
        sleep_time_in_seconds: int = 0,
    ) -> RagExamplePrediction:
        """Predict RAG example with a query engine."""
        time.sleep(sleep_time_in_seconds)
        response = predictor.query(example.query)
        return RagExamplePrediction(
            response=str(response), contexts=[s.text for s in response.source_nodes]
        )

    def _construct_prediction_dataset(
        self, predictions: List[RagExamplePrediction]
    ) -> RagPredictionDataset:
        """Construct prediction dataset."""
        return RagPredictionDataset(predictions=predictions)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "LabelledRagDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {
        "query": [t.query for t in self.examples],
        "reference_contexts": [t.reference_contexts for t in self.examples],
        "reference_answer": [t.reference_answer for t in self.examples],
        "reference_answer_by": [str(t.reference_answer_by) for t in self.examples],
        "query_by": [str(t.query_by) for t in self.examples],
    }

    return PandasDataFrame(data)

RagExamplePrediction #

Bases: BaseLlamaExamplePrediction

RAG example prediction class.

Parameters:

Name	Type	Description	Default
`response`	`str`	The response generated by the LLM.	required
`contexts`	`Optional[List[str]]`	The retrieved context (text) for generating response.	required

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

class RagExamplePrediction(BaseLlamaExamplePrediction):
    """RAG example prediction class.

    Args:
        response (str): The response generated by the LLM.
        contexts (Optional[List[str]]): The retrieved context (text) for generating
                                        response.
    """

    response: str = Field(
        default_factory=str,
        description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
    )
    contexts: Optional[List[str]] = Field(
        default_factory=None,
        description="The contexts in raw text form used to generate the response.",
    )

    @property
    def class_name(self) -> str:
        """Data example class name."""
        return "RagExamplePrediction"

class_name `property` #

class_name: str

Data example class name.

RagPredictionDataset #

Bases: BaseLlamaPredictionDataset

RagDataset class.

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

class RagPredictionDataset(BaseLlamaPredictionDataset):
    """RagDataset class."""

    _prediction_type = RagExamplePrediction

    def to_pandas(self) -> PandasDataFrame:
        """Create pandas dataframe."""
        data = {}
        if self.predictions:
            data = {
                "response": [t.response for t in self.predictions],
                "contexts": [t.contexts for t in self.predictions],
            }

        return PandasDataFrame(data)

    @property
    def class_name(self) -> str:
        """Class name."""
        return "RagPredictionDataset"

class_name `property` #

class_name: str

Class name.

to_pandas #

to_pandas() -> DataFrame

Create pandas dataframe.

Source code in llama-index-core/llama_index/core/llama_dataset/rag.py

def to_pandas(self) -> PandasDataFrame:
    """Create pandas dataframe."""
    data = {}
    if self.predictions:
        data = {
            "response": [t.response for t in self.predictions],
            "contexts": [t.contexts for t in self.predictions],
        }

    return PandasDataFrame(data)

download_llama_dataset #

download_llama_dataset(llama_dataset_class: str, download_dir: str, llama_datasets_url: str = LLAMA_DATASETS_URL, llama_datasets_lfs_url: str = LLAMA_DATASETS_LFS_URL, llama_datasets_source_files_tree_url: str = LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL, show_progress: bool = False, load_documents: bool = True) -> Tuple[Type[BaseLlamaDataset], List[Document]]

Download dataset from datasets-LFS and llamahub.

Parameters:

Name	Type	Description	Default
`dataset_class`		The name of the llamadataset class you want to download, such as `PaulGrahamEssayDataset`.	required
`custom_dir`		Custom dir name to download loader into (under parent folder).	required
`custom_path`		Custom dirpath to download loader into.	required
`llama_datasets_url`	`str`	Url for getting ordinary files from llama_datasets repo	`LLAMA_DATASETS_URL`
`llama_datasets_lfs_url`	`str`	Url for lfs-traced files llama_datasets repo	`LLAMA_DATASETS_LFS_URL`
`llama_datasets_source_files_tree_url`	`str`	Url for listing source_files contents	`LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL`
`refresh_cache`		If true, the local cache will be skipped and the loader will be fetched directly from the remote repo.	required
`source_files_dirpath`		The directory for storing source files	required
`library_path`		File name of the library file.	required
`base_file_name`		The rag dataset json file	required
`disable_library_cache`		Boolean to control library cache	required
`override_path`		Boolean to control overriding path	required
`show_progress`	`bool`	Boolean for showing progress on downloading source files	`False`
`load_documents`	`bool`	Boolean for whether or not source_files for LabelledRagDataset should be loaded.	`True`

Returns:

Type	Description
`Tuple[Type[BaseLlamaDataset], List[Document]]`	a `BaseLlamaDataset` and a `List[Document]`

Source code in llama-index-core/llama_index/core/llama_dataset/download.py

def download_llama_dataset(
    llama_dataset_class: str,
    download_dir: str,
    llama_datasets_url: str = LLAMA_DATASETS_URL,
    llama_datasets_lfs_url: str = LLAMA_DATASETS_LFS_URL,
    llama_datasets_source_files_tree_url: str = LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL,
    show_progress: bool = False,
    load_documents: bool = True,
) -> Tuple[Type[BaseLlamaDataset], List[Document]]:
    """Download dataset from datasets-LFS and llamahub.

    Args:
        dataset_class: The name of the llamadataset class you want to download,
            such as `PaulGrahamEssayDataset`.
        custom_dir: Custom dir name to download loader into (under parent folder).
        custom_path: Custom dirpath to download loader into.
        llama_datasets_url: Url for getting ordinary files from llama_datasets repo
        llama_datasets_lfs_url: Url for lfs-traced files llama_datasets repo
        llama_datasets_source_files_tree_url: Url for listing source_files contents
        refresh_cache: If true, the local cache will be skipped and the
            loader will be fetched directly from the remote repo.
        source_files_dirpath: The directory for storing source files
        library_path: File name of the library file.
        base_file_name: The rag dataset json file
        disable_library_cache: Boolean to control library cache
        override_path: Boolean to control overriding path
        show_progress: Boolean for showing progress on downloading source files
        load_documents: Boolean for whether or not source_files for LabelledRagDataset should
                        be loaded.

    Returns:
        a `BaseLlamaDataset` and a `List[Document]`
    """
    filenames: Tuple[str, str] = download(
        llama_dataset_class,
        llama_datasets_url=llama_datasets_url,
        llama_datasets_lfs_url=llama_datasets_lfs_url,
        llama_datasets_source_files_tree_url=llama_datasets_source_files_tree_url,
        refresh_cache=True,
        custom_path=download_dir,
        library_path="library.json",
        disable_library_cache=True,
        override_path=True,
        show_progress=show_progress,
    )
    dataset_filename, source_files_dir = filenames
    track_download(llama_dataset_class, MODULE_TYPE.DATASETS)

    dataset = _resolve_dataset_class(dataset_filename).from_json(dataset_filename)
    documents = []

    # for now only rag datasets need to provide the documents
    # in order to build an index over them
    if "rag_dataset.json" in dataset_filename and load_documents:
        documents = SimpleDirectoryReader(input_dir=source_files_dir).load_data(
            show_progress=show_progress
        )

    return (dataset, documents)

Index

BaseLlamaDataExample #

class_name abstractmethod property #

BaseLlamaDataset #

class_name abstractmethod property #

to_pandas abstractmethod #

save_json #

from_json classmethod #

make_predictions_with #

amake_predictions_with async #

BaseLlamaExamplePrediction #

class_name abstractmethod property #

BaseLlamaPredictionDataset #

class_name abstractmethod property #

to_pandas abstractmethod #

save_json #

from_json classmethod #

CreatedByType #

EvaluatorExamplePrediction #

class_name property #

EvaluatorPredictionDataset #

class_name property #

to_pandas #

LabelledEvaluatorDataExample #

class_name property #

LabelledEvaluatorDataset #

class_name property #

to_pandas #

LabelledPairwiseEvaluatorDataExample #

class_name property #

LabelledPairwiseEvaluatorDataset #

class_name property #

to_pandas #

PairwiseEvaluatorExamplePrediction #

class_name property #

PairwiseEvaluatorPredictionDataset #

class_name property #

to_pandas #

LabelledRagDataExample #

class_name property #

LabelledRagDataset #

class_name property #

to_pandas #

RagExamplePrediction #

class_name property #

RagPredictionDataset #

class_name property #

to_pandas #

download_llama_dataset #

class_name `abstractmethod` `property` #

class_name `abstractmethod` `property` #

to_pandas `abstractmethod` #

from_json `classmethod` #

amake_predictions_with `async` #

class_name `abstractmethod` `property` #

class_name `abstractmethod` `property` #

to_pandas `abstractmethod` #

from_json `classmethod` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #

class_name `property` #