Skip to content

Metrics

Evaluation modules.

MRR #

Bases: BaseRetrievalMetric

MRR metric.

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class MRR(BaseRetrievalMetric):
    """MRR metric."""

    metric_name: str = "mrr"

    def compute(
        self,
        query: Optional[str] = None,
        expected_ids: Optional[List[str]] = None,
        retrieved_ids: Optional[List[str]] = None,
        expected_texts: Optional[List[str]] = None,
        retrieved_texts: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> RetrievalMetricResult:
        """Compute metric."""
        if retrieved_ids is None or expected_ids is None:
            raise ValueError("Retrieved ids and expected ids must be provided")
        for i, id in enumerate(retrieved_ids):
            if id in expected_ids:
                return RetrievalMetricResult(
                    score=1.0 / (i + 1),
                )
        return RetrievalMetricResult(
            score=0.0,
        )

compute #

compute(query: Optional[str] = None, expected_ids: Optional[List[str]] = None, retrieved_ids: Optional[List[str]] = None, expected_texts: Optional[List[str]] = None, retrieved_texts: Optional[List[str]] = None, **kwargs: Any) -> RetrievalMetricResult

Compute metric.

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def compute(
    self,
    query: Optional[str] = None,
    expected_ids: Optional[List[str]] = None,
    retrieved_ids: Optional[List[str]] = None,
    expected_texts: Optional[List[str]] = None,
    retrieved_texts: Optional[List[str]] = None,
    **kwargs: Any,
) -> RetrievalMetricResult:
    """Compute metric."""
    if retrieved_ids is None or expected_ids is None:
        raise ValueError("Retrieved ids and expected ids must be provided")
    for i, id in enumerate(retrieved_ids):
        if id in expected_ids:
            return RetrievalMetricResult(
                score=1.0 / (i + 1),
            )
    return RetrievalMetricResult(
        score=0.0,
    )

HitRate #

Bases: BaseRetrievalMetric

Hit rate metric.

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class HitRate(BaseRetrievalMetric):
    """Hit rate metric."""

    metric_name: str = "hit_rate"

    def compute(
        self,
        query: Optional[str] = None,
        expected_ids: Optional[List[str]] = None,
        retrieved_ids: Optional[List[str]] = None,
        expected_texts: Optional[List[str]] = None,
        retrieved_texts: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> RetrievalMetricResult:
        """Compute metric."""
        if retrieved_ids is None or expected_ids is None:
            raise ValueError("Retrieved ids and expected ids must be provided")
        is_hit = any(id in expected_ids for id in retrieved_ids)
        return RetrievalMetricResult(
            score=1.0 if is_hit else 0.0,
        )

compute #

compute(query: Optional[str] = None, expected_ids: Optional[List[str]] = None, retrieved_ids: Optional[List[str]] = None, expected_texts: Optional[List[str]] = None, retrieved_texts: Optional[List[str]] = None, **kwargs: Any) -> RetrievalMetricResult

Compute metric.

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def compute(
    self,
    query: Optional[str] = None,
    expected_ids: Optional[List[str]] = None,
    retrieved_ids: Optional[List[str]] = None,
    expected_texts: Optional[List[str]] = None,
    retrieved_texts: Optional[List[str]] = None,
    **kwargs: Any,
) -> RetrievalMetricResult:
    """Compute metric."""
    if retrieved_ids is None or expected_ids is None:
        raise ValueError("Retrieved ids and expected ids must be provided")
    is_hit = any(id in expected_ids for id in retrieved_ids)
    return RetrievalMetricResult(
        score=1.0 if is_hit else 0.0,
    )

RetrievalMetricResult #

Bases: BaseModel

Metric result.

Attributes:

Name Type Description
score float

Score for the metric

metadata Dict[str, Any]

Metadata for the metric result

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics_base.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class RetrievalMetricResult(BaseModel):
    """Metric result.

    Attributes:
        score (float): Score for the metric
        metadata (Dict[str, Any]): Metadata for the metric result

    """

    score: float = Field(..., description="Score for the metric")
    metadata: Dict[str, Any] = Field(
        default_factory=dict, description="Metadata for the metric result"
    )

    def __str__(self) -> str:
        """String representation."""
        return f"Score: {self.score}\nMetadata: {self.metadata}"

    def __float__(self) -> float:
        """Float representation."""
        return self.score

resolve_metrics #

resolve_metrics(metrics: List[str]) -> List[Type[BaseRetrievalMetric]]

Resolve metrics from list of metric names.

Source code in llama-index-core/llama_index/core/evaluation/retrieval/metrics.py
137
138
139
140
141
142
143
def resolve_metrics(metrics: List[str]) -> List[Type[BaseRetrievalMetric]]:
    """Resolve metrics from list of metric names."""
    for metric in metrics:
        if metric not in METRIC_REGISTRY:
            raise ValueError(f"Invalid metric name: {metric}")

    return [METRIC_REGISTRY[metric] for metric in metrics]