"""Utilities for generating examples."""
from __future__ import annotations
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Protocol, cast
import pandas as pd
from y0.algorithm.identify import Identification, Query
from y0.dsl import Variable
from y0.graph import NxMixedGraph
from y0.struct import DSeparationJudgement, VermaConstraint
__all__ = [
"DataGenerator",
"Example",
]
class DataGenerator(Protocol):
"""A data generator function."""
def __call__(
self,
num_samples: int,
*,
treatments: dict[Variable, float] | None = None,
seed: int | None = None,
) -> pd.DataFrame:
"""Generate synthetic data."""
[docs]
@dataclass
class Example:
"""An example graph packaged with certain pre-calculated data structures."""
name: str
reference: str
graph: NxMixedGraph
description: str | None = None
verma_constraints: Sequence[VermaConstraint] | None = None
conditional_independencies: Sequence[DSeparationJudgement] | None = None
data: pd.DataFrame | None = None
identifications: list[dict[str, list[Identification]]] | None = None
#: Example queries are just to give an idea to a new user
#: what might be interesting to use in the ID algorithm
example_queries: list[Query] | None = None
generate_data: DataGenerator | None = None
[docs]
def generate_ate(
self,
*,
num_samples: int,
treatment: Variable,
outcome: Variable,
treatment_0: float = 0.0,
treatment_1: float = 1.0,
) -> float:
"""Calculate the ATE for a single treatment/outcome pair."""
if self.generate_data is None:
raise TypeError(f"no generation method provided in example: {self.name}")
data_1 = self.generate_data(num_samples, treatments={treatment: treatment_1})
data_0 = self.generate_data(num_samples, treatments={treatment: treatment_0})
return cast(float, data_1.mean()[outcome.name] - data_0.mean()[outcome.name])