Source code for y0.examples.utils

"""Utilities for generating examples."""

from __future__ import annotations

from collections.abc import Sequence
from dataclasses import dataclass
from typing import Protocol, cast

import pandas as pd

from y0.algorithm.identify import Identification, Query
from y0.dsl import Variable
from y0.graph import NxMixedGraph
from y0.struct import DSeparationJudgement, VermaConstraint

__all__ = [
    "DataGenerator",
    "Example",
]


class DataGenerator(Protocol):
    """A data generator function."""

    def __call__(
        self,
        num_samples: int,
        *,
        treatments: dict[Variable, float] | None = None,
        seed: int | None = None,
    ) -> pd.DataFrame:
        """Generate synthetic data."""


[docs] @dataclass class Example: """An example graph packaged with certain pre-calculated data structures.""" name: str reference: str graph: NxMixedGraph description: str | None = None verma_constraints: Sequence[VermaConstraint] | None = None conditional_independencies: Sequence[DSeparationJudgement] | None = None data: pd.DataFrame | None = None identifications: list[dict[str, list[Identification]]] | None = None #: Example queries are just to give an idea to a new user #: what might be interesting to use in the ID algorithm example_queries: list[Query] | None = None generate_data: DataGenerator | None = None
[docs] def generate_ate( self, *, num_samples: int, treatment: Variable, outcome: Variable, treatment_0: float = 0.0, treatment_1: float = 1.0, ) -> float: """Calculate the ATE for a single treatment/outcome pair.""" if self.generate_data is None: raise TypeError(f"no generation method provided in example: {self.name}") data_1 = self.generate_data(num_samples, treatments={treatment: treatment_1}) data_0 = self.generate_data(num_samples, treatments={treatment: treatment_0}) return cast(float, data_1.mean()[outcome.name] - data_0.mean()[outcome.name])