Source code for y0.examples

# -*- coding: utf-8 -*-
# type: ignore

"""Examples from CausalFusion."""

from __future__ import annotations

import numpy as np
import pandas as pd

from .backdoor import generate_data_for_backdoor
from .frontdoor import generate_data_for_frontdoor
from .frontdoor_backdoor import generate_data_for_frontdoor_backdoor
from .sars import generate_data_for_covid_case_study
from .smoke_cancer import generate_data_for_smoke_cancer
from .utils import Example
from ..algorithm.identify import Identification, Query
from ..dsl import (
    AA,
    W0,
    W1,
    W2,
    X1,
    X2,
    Y1,
    Y2,
    Z1,
    Z2,
    Z3,
    Z4,
    Z5,
    A,
    B,
    C,
    D,
    E,
    F,
    G,
    M,
    P,
    Q,
    S,
    Sum,
    T,
    Variable,
    W,
    X,
    Y,
    Z,
)
from ..graph import NxMixedGraph
from ..resources import ASIA_PATH
from ..struct import DSeparationJudgement, VermaConstraint

x, y, z, w = -X, -Y, -Z, -W

u_2 = Variable("u_2")
u_3 = Variable("u_3")

#: Treatment: X
#: Outcome: Y
#: Adjusted: N/A
backdoor = NxMixedGraph.from_edges(
    directed=[
        (Z, X),
        (Z, Y),
        (X, Y),
    ]
)

backdoor_example = Example(
    name="Backdoor",
    reference='J. Pearl. 2009. "Causality: Models, Reasoning and Inference.'
    ' 2nd ed." Cambridge University Press, p. 178.',
    graph=backdoor,
    generate_data=generate_data_for_backdoor,
    example_queries=[Query.from_str(treatments="X", outcomes="Y")],
)

#: Treatment: X
#: Outcome: Y
#: Adjusted: N/A
frontdoor = NxMixedGraph.from_edges(
    directed=[
        (X, Z),
        (Z, Y),
    ],
    undirected=[
        (X, Y),
    ],
)
frontdoor_example = Example(
    name="Frontdoor",
    reference='J. Pearl. 2009. "Causality: Models, Reasoning and Inference.'
    ' 2nd ed." Cambridge University Press, p. 81.',
    graph=frontdoor,
    generate_data=generate_data_for_frontdoor,
    example_queries=[Query.from_str(treatments="X", outcomes="Y")],
)

#: Treatment: X
#: Outcome: Y
#: Adjusted: N/A
frontdoor_backdoor = NxMixedGraph.from_edges(
    directed=[
        (X, Z),
        (Z, Y),
        (W, X),
        (W, Y),
    ],
)
frontdoor_backdoor_example = Example(
    name="Frontdoor / Backdoor",
    reference="https://github.com/y0-causal-inference/y0/pull/183",
    graph=frontdoor_backdoor,
    generate_data=generate_data_for_frontdoor_backdoor,
    example_queries=[Query.from_str(treatments="X", outcomes="Y")],
)

#: Treatment: X
#: Outcome: Y
instrumental_variable = NxMixedGraph.from_edges(
    directed=[
        (Z, X),
        (X, Y),
    ],
    undirected=[
        (X, Y),
    ],
)
instrumental_variable_example = Example(
    name="Instrument Variable",
    reference='J. Pearl. 2009. "Causality: Models, Reasoning and Inference.'
    ' 2nd ed." Cambridge University Press, p. 153.',
    graph=instrumental_variable,
)

#: Treatment: X
#: Outcome: Y
napkin = NxMixedGraph.from_edges(
    directed=[
        (Z2, Z1),
        (Z1, X),
        (X, Y),
    ],
    undirected=[
        (Z2, X),
        (Z2, Y),
    ],
)



[docs]
def generate_napkin_data(
    num_samples: int, treatments: dict[Variable, float] | None = None, *, seed: int | None = None
) -> pd.DataFrame:
    """Generate testing data for the napkin graph.

    :param num_samples: The number of samples to generate. Try 1000.
    :param treatments: An optional dictionary of the values to fix each variable
        to. The keys in this dictionary must correspond to variables in the
        napkin graph as defined in :data:`y0.examples.napkin` (i.e.,
        with :data:`y0.dsl.Z1`, :data:`y0.dsl.Z2`, :data:`y0.dsl.X`,
        and :data:`y0.dsl.Y`).
    :param seed: An optional random seed for reproducibility purposes
    :returns: A pandas Dataframe with columns corresponding to the four
        variable names in the Napkin graph (i.e., ``Z1``, ``Z2``, ``X``,
        and ``Y``)

    Generate _observational_ data with the following:

    >>> from y0.examples.napkin_example
    >>> napkin_example.generate_data(1000)

    Generate interventional data on $X=1$ with the following:

    >>> from y0.dsl import X
    >>> napkin_example.generate_data(1000, treatments={X: 1})

    Multiple treatments can be specified:

    >>> from y0.dsl import X, Z1
    >>> napkin_example.generate_data(1000, treatments={X: 1, Z1: 0})
    """
    if treatments is None:
        treatments = {}
    generator = np.random.default_rng(seed)
    # U1 is the latent variable that is a common cause of W and X
    u1 = generator.normal(loc=3, scale=1, size=num_samples)
    # U2 is the latent variable that is a common cause of W and Y
    u2 = generator.normal(loc=5, scale=1, size=num_samples)
    if Z2 in treatments:
        z2 = np.full(num_samples, treatments[Z2])
    else:
        u_linear_combination = 0.3 * u1 + 0.5 * u2
        z2 = generator.gamma(
            shape=u_linear_combination**-2,
            scale=5 * u_linear_combination,
            size=num_samples,
        )
    if Z1 in treatments:
        z1 = np.full(num_samples, treatments[Z1])
    else:
        z1 = generator.normal(loc=z2 * 0.7, scale=6, size=num_samples)
    if X in treatments:
        x = np.full(num_samples, treatments[X])
    else:
        x = generator.binomial(n=1, p=1 / (1 + np.exp(-2 - 0.23 * u1 - 0.1 * z1)), size=num_samples)
    if Y in treatments:
        y = np.full(num_samples, treatments[Y])
    else:
        y = generator.normal(loc=u2 * 0.5 + x * 3, scale=6)
    return pd.DataFrame({Z2.name: z2, Z1.name: z1, X.name: x, Y.name: y})



napkin_example = Example(
    name="Napkin",
    reference='J. Pearl and D. Mackenzie. 2018. "The Book of Why: The New Science of Cause and Effect."'
    " Basic Books, p. 240.",
    graph=napkin,
    generate_data=generate_napkin_data,
    example_queries=[Query.from_str(treatments="X", outcomes="Y")],
    verma_constraints=[
        VermaConstraint(
            lhs_cfactor=Q[X, Y](Z1, X, Y) / Sum[Y](Q[X, Y](Z1, X, Y)),
            lhs_expr=(
                Sum[Z2](P(Y | (Z1, Z2, X)) * P(X | (Z2, Z1)) * P(Z2))
                / Sum[Z2, Y](P(Y | (Z2, Z1, X)) * P(X | (Z2, Z1)) * P(Z2))
            ),
            rhs_cfactor=Q[Y](X, Y),
            rhs_expr=Sum[u_2, X](P(Y | u_2 | X) * P(X) * P(u_2)),
            variables=(Z1,),
        ),
    ],
)

#: Treatment: X
#: Outcome: Y
#: Reference:
m_graph = NxMixedGraph.from_edges(
    directed=[
        (X, Y),
    ],
    undirected=[
        (X, Z),
        (Y, Z),
    ],
)
m_graph_example = Example(
    name="M-Graph",
    reference='S. Greenland, J. Pearl, and J.M. Robins. 1999. "Causal Diagrams for Epidemiologic Research."'
    " Epidemiology Journal, Volume 10, No. 10, pp. 37-48, 1999.",
    graph=m_graph,
)

# NxMixedGraph containing vertices without edges
vertices_without_edges = Example(
    name="Vertices-without-Edges",
    reference="out of the mind of JZ (patent pending). See NFT for details",
    graph=NxMixedGraph.from_adj(
        directed={W: [], X: [Y], Y: [Z], Z: []},
        undirected={W: [], X: [Z], Y: [], Z: []},
    ),
)

# Line 1 example
line_1_example = Example(
    name="Line 1 of ID algorithm",
    reference="out of the mind of JZ",
    graph=NxMixedGraph.from_edges(
        directed=[
            (Z, Y),
        ]
    ),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y),
                    estimand=P(Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y)]),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y),
                    estimand=Sum[Z](P(Y, Z)),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y)]),
                )
            ],
        ),
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y, Z),
                    estimand=P(Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y)]),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y, Z),
                    estimand=P(Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y)]),
                )
            ],
        ),
    ],
)

# Line 2 example
line_2_example = Example(
    name="intervention not ancestral to outcome",
    reference="out of the mind of JZ",
    graph=NxMixedGraph.from_edges(directed=[(Z, Y), (Y, X)], undirected=[(Z, X)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(X, Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y), (Y, X)], undirected=[(Z, X)]),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y),
                    estimand=Sum[X](P(Y, X, Z)),
                    graph=NxMixedGraph.from_edges(directed=[(Z, Y)]),
                )
            ],
        )
    ],
)

line_3_example = Example(
    name="node has no effect on outcome",
    reference="out of the mind of JZ",
    graph=NxMixedGraph.from_edges(directed=[(Z, X), (X, Y)], undirected=[(Z, X)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(X, Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, X), (X, Y)], undirected=[(Z, X)]),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y @ {X, Z}),
                    estimand=P(X, Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(Z, X), (X, Y)], undirected=[(Z, X)]),
                )
            ],
        ),
    ],
)

line_4_example = Example(
    name="graph without X decomposes into multiple C components",
    reference="out of the mind of JZ",
    graph=NxMixedGraph.from_edges(
        directed=[(X, M), (Z, X), (Z, Y), (M, Y)],
        undirected=[(Z, X), (M, Y)],
    ),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(M, X, Y, Z),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, M), (Z, X), (Z, Y), (M, Y)],
                        undirected=[(Z, X), (M, Y)],
                    ),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(M @ {X, Z}, Y @ {X, Z}),
                    estimand=P(M, X, Y, Z),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, M), (Z, X), (Z, Y), (M, Y)],
                        undirected=[(Z, X), (M, Y)],
                    ),
                ),
                Identification.from_expression(
                    query=P(Z @ {M, X, Y}),
                    estimand=P(M, X, Y, Z),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, M), (Z, X), (Z, Y), (M, Y)],
                        undirected=[(Z, X), (M, Y)],
                    ),
                ),
            ],
        ),
    ],
)

line_5_example = Example(
    name="graph containing a hedge",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(directed=[(X, Y)], undirected=[(X, Y)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(X, Y),
                    graph=NxMixedGraph.from_edges(directed=[(X, Y)], undirected=[(X, Y)]),
                )
            ],
        )
    ],
)

line_6_example = Example(
    name="ID Line 6 Example",
    description="If there are no bidirected arcs from X to the other nodes in the"
    " current subproblem under consideration, then we can replace acting"
    " on X by conditioning, and thus solve the subproblem.",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(directed=[(X, Y), (X, Z), (Z, Y)], undirected=[(X, Z)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ [X, Z]),
                    estimand=P(X, Y, Z),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, Y), (X, Z), (Z, Y)],
                        undirected=[(X, Z)],
                    ),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y @ {X, Z}),
                    estimand=P(Y | [X, Z]),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, Y), (X, Z), (Z, Y)],
                        undirected=[(X, Z)],
                    ),
                )
            ],
        ),
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(X, Y),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, Y)],
                    ),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y @ X),
                    estimand=P(Y | X),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, Y)],
                    ),
                )
            ],
        ),
    ],
)

line_7_example = Example(
    name="ID Line 7 example, figure 5a and b",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(directed=[(X, Y1), (W1, Y1)], undirected=[(W1, Y1)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_expression(
                    query=P(Y1 @ [X, W1]),
                    estimand=P(X, Y1, W1),
                    graph=NxMixedGraph.from_edges(
                        directed=[(X, Y1), (W1, X)], undirected=[(W1, Y1)]
                    ),
                )
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y1 @ W1),
                    estimand=P(Y1 | [X, W1]) * P(W1),
                    graph=NxMixedGraph.from_edges(undirected=[(W1, Y1)]),
                )
            ],
        )
    ],
)

figure_6a = Example(
    name="Causal graph with identifiable conditional effect P(y|do(x),z)",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(directed=[(X, Z), (Z, Y)], undirected=[(X, Z)]),
    identifications=[
        dict(
            id_in=[
                Identification.from_parts(
                    outcomes={Y},
                    treatments={X},
                    conditions={Z},
                    estimand=P(X, Y, Z),
                    graph=NxMixedGraph.from_edges(directed=[(X, Z), (Z, Y)], undirected=[(X, Z)]),
                ),
            ],
            id_out=[
                Identification.from_expression(
                    query=P(Y @ (X, Z)),
                    estimand=P(Y | (X, Z)) / Sum.safe(expression=P(Y | (X, Z)), ranges=(Y,)),
                    graph=NxMixedGraph.from_edges(directed=[(X, Z), (Z, Y)], undirected=list()),
                ),
            ],
        )
    ],
)

tikka_unidentifiable_graph = Example(
    name="Tikka's unidentifiable example",
    reference="Tikka, S. (2020). Identifying Counterfactual Queries with the R Package cfid",
    graph=NxMixedGraph.from_edges(
        directed=[(X, W), (W, Y), (D, Z), (Z, Y), (X, Y)], undirected=[(X, Y)]
    ),
)

tikka_unidentifiable_cfgraph = Example(
    name="Tikka's unidentifiable example",
    reference="Tikka, S. (2020). Identifying Counterfactual Queries with the R Package cfid",
    graph=NxMixedGraph.from_edges(
        directed=[(X @ -x, W @ -x), (W @ -x, Y @ -x), (D, Z), (Z, Y @ -x), (X, Y)],
        undirected=[(X, Y @ -x)],
    ),
)


figure_9a = Example(
    name="Original causal diagram",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(directed=[(X, W), (W, Y), (D, Z), (Z, Y)], undirected=[(X, Y)]),
)

figure_9b = Example(
    name="Parallel worlds graph for :math:`P(y_x|x', x_d, d)`",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X @ -X, W @ -X),
            (W @ -X, Y @ -X),
            (D @ -X, Z @ -X),
            (Z @ -X, Y @ -X),
            (X, W),
            (W, Y),
            (D, Z),
            (Z, Y),
            (X @ D, W @ D),
            (W @ D, Y @ D),
            (D @ D, Z @ D),
            (Z @ D, Y @ D),
        ],
        undirected=[
            (X, Y),
            (X @ D, X),
            (Y @ -X, Y),
            (Y, Y @ D),
            (Y @ D, Y @ -X),
            (X, Y @ -X),
            (X @ D, Y),
            (X, Y @ D),
            (X @ D, Y @ -X),
            (X @ D, Y @ D),
            (D @ -X, D),
            (W @ -X, W),
            (W, W @ D),
            (W @ D, W @ -X),
            (Z @ -X, Z),
            (Z, Z @ D),
            (Z @ -X, Z @ D),
        ],
    ),
)

figure_9c = Example(
    name="Counterfactual graph for :math:`P(y_x | x', z_d, d)`",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        directed=[(X @ -X, W @ -X), (W @ -X, Y @ -X), (D, Z), (Z, Y @ -X)],
        undirected=[(X, Y @ -X)],
    ),
)

tikka_figure_2 = Example(
    name=r"Figure 2: A graph for the example on identifiability of a conditional counterfacual "
    r"query :math:`P(y_x|z_x\wedge x')`",
    reference="Tikka, S (2022) Identifiying Counterfactual Queries with the R package cfid",
    graph=NxMixedGraph.from_edges(directed=[(X, Z), (X, Y), (Z, Y)], undirected=[(X, Z)]),
)

tikka_figure_5 = Example(
    name=r"Figure 5: Counterfactual graph :math:`G'` for :math:`y_x\wedge x'\wedge z_d\wedge d`",
    reference="Tikka, S (2022) Identifiying Counterfactual Queries with the R package cfid",
    graph=NxMixedGraph.from_edges(
        nodes=(X, Y @ -x, D, Z, X @ -x, W @ -x),
        directed=[(D, Z), (Z, Y @ -x), (X @ -x, W @ -x), (W @ -x, Y @ -x)],
        undirected=[(X, Y @ -x)],
    ),
)


tikka_figure_6a = Example(
    name=r"Figure 6a: Parallel worlds graph for :math:`y_x\wedge z_x\wedge x'` (the counterfactual graph)",
    reference="Tikka, S (2022) Identifiying Counterfactual Queries with the R package cfid",
    graph=NxMixedGraph.from_edges(
        directed=[(X, Z), (Z, Y), (X, Y), (X @ -x, Z @ -x), (Z @ -x, Y @ -x), (X @ -x, Y @ -x)],
        undirected=[(X, Z), (X, Z @ -x), (Z, Z @ -x), (Y, Y @ -x)],
    ),
)

tikka_figure_6b = Example(
    name=r"Figure 6b: Parallel worlds graph for :math:`y_{x,z}\wedge x'` (the counterfactual graph)",
    reference="Tikka, S (2022) Identifiying Counterfactual Queries with the R package cfid",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X, Z),
            (Z, Y),
            (X, Y),
            (Z @ (-x, -z), Y @ (-x, -z)),
            (X @ (-x, -z), Y @ (-x, -z)),
        ],
        undirected=[(X, Z), (Y, Y @ (-x, -z))],
    ),
)

figure_9d = Example(
    name="Counterfactual graph resulting from application of make_counterfactual_graph() with"
    " joint distribution from which :math:`P(y_{x,z}|x')` is derived, namely  :math:`P(y_{x,z}, x')`",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        nodes=(X, X @ (-X, -Z), Z @ (-X, -Z), W @ (-X, -Z), Y @ (-X, -Z)),
        directed=[
            (X @ (-X, -Z), W @ (-X, -Z)),
            (Z @ (-X, -Z), Y @ (-X, -Z)),
            (W @ (-X, -Z), Y @ (-X, -Z)),
        ],
        undirected=[(X, Y @ (-X, -Z))],
    ),
)

figure_9e = Example(
    name="Counterfactual graph for :math:`P(Y @ (~X, Z) | X)`",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        nodes=(D, X, X @ (~X, Z), Z @ (~X, Z), W @ (~X, Z), Y @ (~X, Z)),
        directed=[(D, Z), (X @ (~X, Z), W @ (~X, Z)), (Z, Y @ (~X, Z)), (W @ (~X, Z), Y @ (~X, Z))],
        undirected=[(X, Y @ (~X, Z))],
    ),
)

figure_11a = Example(
    name="Intermediate graph obtained by **make-cg** in constructing the"
    " counterfactual graph for for :math:`P(y_x|x', z_d, d)` from Figure 9b",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X @ -X, W @ -X),
            (W @ -X, Y @ -X),
            (D, Z @ -X),
            (Z @ -X, Y @ -X),
            (X, W),
            (W, Y),
            (D, Z),
            (Z, Y),
            (X, W @ D),
            (W @ D, Y @ D),
            (D @ D, Z @ D),
            (Z @ D, Y @ D),
        ],
        undirected=[
            (X, Y),
            (Y @ -X, Y),
            (Y, Y @ D),
            (Y @ D, X),
            (X, Y @ -X),
            (Y @ D, Y @ -X),
            (W @ -X, W),
            (W, W @ D),
            (W @ D, W @ -X),
            (Z @ -X, Z),
            (Z, Z @ D),
            (Z @ -X, Z @ D),
        ],
    ),
)

figure_11b = Example(
    name="Intermediate graph obtained by **make-cg** in constructing the"
    " counterfactual graph for for :math:`P(y_x|x', z_d, d)` from Figure 9b",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X @ -X, W @ -X),
            (W @ -X, Y @ -X),
            (D, Z),
            (Z, Y @ -X),
            (Z, Y @ D),
            (Z, Y),
            (X, W),
            (W, Y),
            (W, Y @ D),
        ],
        undirected=[
            (X, Y),
            (Y @ -X, Y),
            (Y, Y @ D),
            (Y @ D, X),
            (Y @ D, Y @ -X),
            (X, Y @ -X),
            (X, Y @ D),
            (W @ -X, W),
        ],
    ),
)

figure_11c = Example(
    name="Intermediate graph obtained by **make-cg** in constructing the counterfactual"
    " graph for for :math:`P(y_x|x', z_d, d)` from Figure 9b",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X @ -X, W @ -X),
            (W @ -X, Y @ -X),
            (D, Z),
            (Z, Y @ -X),
            (Z, Y),
            (X, W),
            (W, Y),
        ],
        undirected=[
            (X, Y),
            (Y @ -X, Y),
            (X, Y @ -X),
            (W @ -X, W),
        ],
    ),
)

cyclic_directed_example = Example(
    name="Cyclic directed graph",
    reference="out of the mind of JZ and ZW",
    graph=NxMixedGraph.from_edges(directed=[(A, B), (A, C), (B, A)]),
)
#: Treatment: X
#: Outcome: Y
identifiability_1 = NxMixedGraph.from_edges(
    directed=[
        (Z1, Z2),
        (Z1, Z3),
        (Z2, X),
        (Z3, X),
        (Z4, X),
        (Z4, Z5),
        (Z3, Y),
        (X, Y),
        (Z3, Y),
    ],
)
identifiability_1_example = Example(
    name="Identifiability 1",
    reference='J. Pearl. 2009. "Causality: Models, Reasoning and Inference.'
    ' 2nd ed." Cambridge University Press, p. 80.',
    graph=identifiability_1,
    conditional_independencies=(
        DSeparationJudgement.create(X, Z1, [Z2, Z3]),
        DSeparationJudgement.create(X, Z5, [Z4]),
        DSeparationJudgement.create(Y, Z1, [X, Z3, Z4]),
        DSeparationJudgement.create(Y, Z2, [X, Z1, Z3]),
        DSeparationJudgement.create(Y, Z4, [X, Z3, Z5]),
        DSeparationJudgement.create(Z1, Z4),
        DSeparationJudgement.create(Z1, Z5),
        DSeparationJudgement.create(Z2, Z3, [Z1]),
        DSeparationJudgement.create(Z2, Z4),
        DSeparationJudgement.create(Z2, Z5),
        DSeparationJudgement.create(Z3, Z5),
        DSeparationJudgement.create(Y, Z5, [X, Z3]),
        DSeparationJudgement.create(Z3, Z4),
    ),
)

#: Treatment: X
#: Outcome: Y
identifiability_2 = NxMixedGraph.from_edges(
    directed=[
        (Z1, Z2),
        (Z1, Z3),
        (Z2, X),
        (Z3, X),
        (X, W0),
        (W0, Y),
        (Z4, Z3),
        (Z4, Z5),
        (Z5, Y),
        (X, W1),
        (W1, W2),
        (W2, Y),
        (Z4, Z3),
        (Z3, Y),
    ],
    undirected=[
        (Z1, X),
        (Z2, Z3),
        (Z3, Z5),
        (Z4, Y),
    ],
)

identifiability_2_example = Example(
    name="Identifiability 2",
    reference="E. Bareinboim modification of Identifiability 1.",
    graph=identifiability_2,
    verma_constraints=[
        VermaConstraint(
            rhs_cfactor=Q[Z5](Z4, Z5),
            rhs_expr=Sum[u_3, Z4](P(Z5 | (u_3, Z4)) * P(Z4) * P(u_3)),
            lhs_cfactor=Sum[Z3](Q[Z3, Z5](Z1, Z4, Z3, Z5)),
            lhs_expr=Sum[Z3](P(Z5 | (Z1, Z2, Z3, Z4)) * P(Z3 | (Z1, Z2, Z4))),
            variables=(Z1,),
        ),
        VermaConstraint(
            rhs_cfactor=Q[Z5](Z4, Z5),
            rhs_expr=Sum[u_3, Z4](P(Z5 | (u_3, Z4)) * P(Z4) * P(u_3)),
            lhs_cfactor=(Q[Z2, Z5](Z1, Z4, Z2, Z5) / Sum[Z5](Q[Z2, Z5](Z1, Z4, Z2, Z5))),
            lhs_expr=(
                Sum[Z3](P(Z5 | (Z1, Z2, Z3, Z4)) * P(Z3 | (Z1, Z4, Z2)) * P(Z2 | (Z1, Z4)))
                / Sum[Z3, Z5](P(Z5 | (Z1, Z4, Z2, Z3)) * P(Z3 | (Z1, Z4, Z2)) * P(Z2 | (Z1, Z4)))
            ),
            variables=(Z1, Z2),
        ),
    ],
    conditional_independencies=[
        DSeparationJudgement.create(W0, W1, [X]),
        DSeparationJudgement.create(W0, W2, [X]),
        DSeparationJudgement.create(W0, Z1, [X]),
        DSeparationJudgement.create(W0, Z2, [X]),
        DSeparationJudgement.create(W0, Z3, [X]),
        DSeparationJudgement.create(W0, Z4, [X]),
        DSeparationJudgement.create(W0, Z5, [X]),
        DSeparationJudgement.create(W1, Y, [W0, W2, Z3, Z4, Z5]),
        DSeparationJudgement.create(W1, Z1, [X]),
        DSeparationJudgement.create(W1, Z2, [X]),
        DSeparationJudgement.create(W1, Z3, [X]),
        DSeparationJudgement.create(W1, Z4, [X]),
        DSeparationJudgement.create(W1, Z5, [X]),
        DSeparationJudgement.create(W2, X, [W1]),
        DSeparationJudgement.create(W2, Z1, [W1]),
        DSeparationJudgement.create(W2, Z2, [W1]),
        DSeparationJudgement.create(W2, Z3, [W1]),
        DSeparationJudgement.create(W2, Z4, [W1]),
        DSeparationJudgement.create(W2, Z5, [W1]),
        DSeparationJudgement.create(X, Y, [W0, W2, Z3, Z4, Z5]),
        DSeparationJudgement.create(X, Z4, [Z1, Z2, Z3]),
        DSeparationJudgement.create(X, Z5, [Z1, Z2, Z3]),
        DSeparationJudgement.create(Y, Z1, [W0, W2, Z3, Z4, Z5]),
        DSeparationJudgement.create(Y, Z2, [W0, W2, Z3, Z4, Z5]),
        DSeparationJudgement.create(Z1, Z4),
        DSeparationJudgement.create(Z1, Z5),
        DSeparationJudgement.create(Z2, Z4),
        DSeparationJudgement.create(Z2, Z5),
    ],
)

#: The Identifiability 3 example
#: Treatment: X
#: Outcome: Y
#: Reference: J. Pearl. 2009. "Causality: Models, Reasoning and Inference. 2nd ed." Cambridge University Press, p. 92.
identifiability_3 = NxMixedGraph.from_edges(
    directed=[
        (Z2, X),
        (Z2, Z1),
        (Z2, Z3),
        (X, Z1),
        (Z3, Y),
        (Z1, Y),
    ],
    undirected=[
        (Z2, X),
        (Z2, Y),
        (X, Z3),
        (X, Y),
    ],
)

#: The Identifiability 4 example
#: Treatment: X
#: Outcome: Y
#: Reference: J. Pearl. 2009. "Causality: Models, Reasoning and Inference. 2nd ed." Cambridge University Press, p. 92.
identifiability_4 = NxMixedGraph.from_edges(
    directed=[
        (X, Z1),
        (X, Y),
        (Z1, Z2),
        (Z1, Y),
        (Z2, Y),
    ],
    undirected=[
        (X, Z2),
        (Z1, Y),
    ],
)

#: The Identifiability 5 example
#: Treatment: X1, X2
#: Outcome: Y
#: Reference: J. Pearl. 2009. "Causality: Models, Reasoning and Inference. 2nd ed." Cambridge University Press, p. 119.
identifiability_5 = NxMixedGraph.from_edges(
    directed=[
        ("X1", Z),
        ("X1", Y),
        ("X1", "X2"),
        (Z, "X2"),
        ("X2", Y),
    ],
    undirected=[
        ("X1", Z),
        (Z, Y),
    ],
)

#: The Identifiability 6 example
#: Treatment: X1, X2
#: Outcome: Y
#: Reference: J. Pearl. 2009. "Causality: Models, Reasoning and Inference. 2nd ed." Cambridge University Press, p. 125.
identifiability_6 = NxMixedGraph.from_edges(
    directed=[
        (Z1, "X1"),
        ("X1", "X2"),
        ("X2", Y),
        (Z2, Y),
    ],
    undirected=[
        (Z1, Z2),
        (Z1, "X2"),
        (Z2, "X2"),
    ],
)

#: The Identifiability 7 example
#: Treatment: X
#: Outcome: Y
#: Reference: J. Tian. 2002. "Studies in Causal Reasoning and Learning." p. 90.
identifiability_7 = NxMixedGraph.from_edges(
    directed=[
        (W1, W2),
        ("W3", "W4"),
        (W2, X),
        ("W4", X),
        (X, Y),
    ],
    undirected=[
        (W1, X),
        (W1, Y),
        (W1, "W3"),
        ("W3", W2),
        ("W3", "W5"),
        ("W5", "W4"),
    ],
)

# TODO Recoverability 1/2 - what is the S node?
# TODO Transportability 1/2 - what are the box nodes?
# TODO g-Identifiability examples
# TODO g-Transportability examples


#: The Verma 1 example
#: Treatment: V3
#: Outcome: V4
#: Reference: T. Verma and J. Pearl. 1990. "Equivalence and Synthesis of Causal Models." In P. Bonissone et al., eds.,
#: Proceedings of the 6th Conference on Uncertainty in Artificial Intelligence. Cambridge, MA: AUAI Press, p. 257.
verma_1 = NxMixedGraph.from_str_edges(
    directed=[
        ("V1", "V2"),
        ("V2", "V3"),
        ("V3", "V4"),
    ],
    undirected=[
        ("V2", "V4"),
    ],
)

#: The Verma 2 example
#: Treatment: V1
#: Outcome: V5
#: Reference: J. Tian. 2002. "Studies in Causal Reasoning and Learning." p. 70.
verma_2 = NxMixedGraph.from_str_edges(
    directed=[
        ("V1", "V2"),
        ("V2", "V3"),
        ("V3", "V4"),
        ("V4", "V5"),
    ],
    undirected=[
        ("V1", "V3"),
        ("V2", "V4"),
        ("V3", "V5"),
    ],
)

#: The Verma 3 example
#: Treatment: V1
#: Outcome: V5
#: Reference: J. Tian. 2002. "Studies in Causal Reasoning and Learning." p. 59.
verma_3 = NxMixedGraph.from_str_edges(
    directed=[
        ("V1", "V2"),
        ("V2", "V3"),
        ("V3", "V4"),
        ("V4", "V5"),
    ],
    undirected=[
        ("V1", "V5"),
        ("V1", "V3"),
        ("V2", "V4"),
    ],
)

#: The Verma 4 example
#: Treatment: V1
#: Outcome: V5
#: Reference: E. Bareinboim modification of Verma 2.
verma_4 = NxMixedGraph.from_str_edges(
    directed=[
        ("V1", "V2"),
        ("V2", "V3"),
        ("V3", "V4"),
        ("V4", "V5"),
    ],
    undirected=[
        ("V1", "V5"),
        ("V1", "V3"),
        ("V2", "V4"),
        ("V3", "V5"),
    ],
)

#: The Verma 5 example
#: Treatment: V1
#: Outcome: V5
#: Reference: E. Bareinboim modification of Verma 2.
verma_5 = NxMixedGraph.from_str_edges(
    directed=[
        ("V1", "V2"),
        ("V2", "V3"),
        ("V3", "V4"),
        ("V4", "V5"),
        ("V5", "V6"),
    ],
    undirected=[
        ("V0", "V1"),
        ("V0", "V6"),
        ("V1", "V5"),
        ("V1", "V3"),
        ("V2", "V4"),
    ],
)

#: The z-Identifiability 1 example
#: Treatment: X
#: Outcome: Y
#: Z*: Z
#: Reference: E. Bareinboim and J. Pearl. 2012. "Causal Inference by Surrogate Experiments: z-Identifiability." In
#: Nando de Freitas and K. Murphy., eds., Proceedings of the 28th Conference on Uncertainty in Artificial Intelligence.
#: Corvallis, OR: AUAI Press, p. 114.
z_identifiability_1 = NxMixedGraph.from_edges(
    directed=[
        (Z, X),
        (X, Y),
    ],
    undirected=[
        (Z, X),
        (Z, Y),
    ],
)

#: The z-Identifiability 2 example
#: Treatment: X
#: Outcome: Y
#: Z*: Z
#: Reference: E. Bareinboim and J. Pearl. 2012. "Causal Inference by Surrogate Experiments: z-Identifiability." In
#: Nando de Freitas and K. Murphy., eds., Proceedings of the 28th Conference on Uncertainty in Artificial Intelligence.
#: Corvallis, OR: AUAI Press, p. 114.
z_identifiability_2 = NxMixedGraph.from_edges(
    directed=[
        (Z, X),
        (X, Y),
    ],
    undirected=[
        (X, Y),
        (Z, Y),
    ],
)

#: The z-Identifiability 3 example
#: Treatment: X
#: Outcome: Y
#: Z*: Z
#: Reference: E. Bareinboim and J. Pearl. 2012. "Causal Inference by Surrogate Experiments: z-Identifiability." In
#: Nando de Freitas and K. Murphy., eds., Proceedings of the 28th Conference on Uncertainty in Artificial Intelligence.
#: Corvallis, OR: AUAI Press, p. 114.
z_identifiability_3 = NxMixedGraph.from_edges(
    directed=[
        (Z, Y),
        (X, Y),
    ],
    undirected=[
        (X, Z),
        (Z, Y),
    ],
)

#: The Identifiability (Linear) 1 example
#: Treatment: X
#: Outcome: Y
#: Reference: J. Pearl. 2009. "Causality: Models, Reasoning and Inference. 2nd ed." Cambridge University Press, p. 153.
identifiability_linear_1 = NxMixedGraph.from_edges(
    directed=[
        (X, Z),
        (X, W),
        (W, Y),
        (Z, Y),
    ],
    undirected=[
        (X, Z),
        (W, Y),
    ],
)

d_separation_example = Example(
    name="D-separation example",
    reference="http://web.mit.edu/jmn/www/6.034/d-separation.pdf",
    graph=NxMixedGraph.from_edges(
        directed=[
            (AA, C),
            (B, C),
            (C, D),
            (C, E),
            (D, F),
            (F, G),
        ],
    ),
    conditional_independencies=[
        DSeparationJudgement.create(AA, B),
        DSeparationJudgement.create(AA, D, [C]),
        DSeparationJudgement.create(AA, E, [C]),
        DSeparationJudgement.create(AA, F, [C]),
        DSeparationJudgement.create(AA, G, [C]),
        DSeparationJudgement.create(B, D, [C]),
        DSeparationJudgement.create(B, E, [C]),
        DSeparationJudgement.create(B, F, [C]),
        DSeparationJudgement.create(B, G, [C]),
        DSeparationJudgement.create(C, F, [D]),
        DSeparationJudgement.create(C, G, [D]),
        DSeparationJudgement.create(D, E, [C]),
        DSeparationJudgement.create(D, G, [F]),
        DSeparationJudgement.create(E, F, [C]),
        DSeparationJudgement.create(E, G, [C]),
    ],
)


asia_df = pd.read_csv(ASIA_PATH).replace({"yes": 1, "no": -1})
del asia_df[asia_df.columns[0]]

asia_example = Example(
    name="Asia dataset",
    reference="https://www.bnlearn.com/documentation/man/asia.html",
    graph=NxMixedGraph.from_edges(
        directed=[
            (Variable(u), Variable(v))
            for u, v in [
                ("Asia", "Tub"),
                ("Smoke", "Lung"),
                ("Smoke", "Bronc"),
                ("Tub", "Either"),
                ("Lung", "Either"),
                ("Either", "Xray"),
                ("Either", "Dysp"),
                ("Bronc", "Dysp"),
            ]
        ],
    ),
    data=asia_df,
)

figure_2a_example = Example(
    name="Shpitser et al. (2008), Figure 2A",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy. "
    "Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[(X, Y)],
    ),
)

figure_2b_example = Example(
    name="Shpitser et al. (2008), Figure 2B",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy. "
    "Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[(X, Y), (X, Z), (Z, Y)],
        undirected=[(Y, Z)],
    ),
)

complete_hierarchy_figure_2c_example = Example(
    name="Shpitser et al (2008) figure 2d",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy. "
    "Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X, Y),
            (Z, X),
            (Z, Y),
        ],
        undirected=[(X, Z)],
    ),
)

complete_hierarchy_figure_2d_example = Example(
    name="Shpitser et al (2008) figure 2d",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy. "
    "Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X, Y),
            (Z, X),
            (Z, Y),
        ],
        undirected=[(X, Z)],
    ),
)

complete_hierarchy_figure_2e_example = Example(
    name="Shpitser et al (2008) figure 2e",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy. "
    "Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[
            (X, Z),
            (Z, Y),
        ],
        undirected=[(X, Y)],
    ),
)

complete_hierarchy_figure_3a_example = Example(
    name="Shpitser et al 2008 figure 3a",
    reference="Shpitser, I., & Pearl, J. (2008). Complete Identification Methods for the Causal Hierarchy."
    " Journal of Machine Learning Research.",
    graph=NxMixedGraph.from_edges(
        directed=[(X, Y1), (W1, X), (W2, Y2)],
        undirected=[(W1, W2), (W1, Y1), (W1, Y2), (X, W2)],
    ),
)

id_sir_example = Example(
    name="Identifiable SIR",
    reference="ASKEM",
    graph=NxMixedGraph.from_str_edges(
        directed=[
            ("Infected", "Hospitalized"),
            ("Hospitalized", "Died"),
        ],
        undirected=[("Infected", "Died")],
    ),
)

nonid_sir_example = Example(
    name="Non-Identifiable SIR",
    reference="ASKEM",
    graph=NxMixedGraph.from_str_edges(
        directed=[
            ("Infected", "Died"),
        ],
        undirected=[("Infected", "Died")],
    ),
)

igf_example = Example(
    name="IGF Graph",
    reference="Jeremy Zucker, Sara Mohammad-Taheri, Kaushal Paneri, Somya Bhargava, Pallavi Kolambkar"
    ", Craig Bakker, Jeremy Teuton, Charles Tapley Hoyt, Kristie Oxford, Robert Ness, and Olga Vitek. 2021."
    "Leveraging Structured Biological Knowledge for Counterfactual Inference: a Case Study of Viral Pathogenesis"
    "- IEEE Journals & Magazine. IEEE Transactions on Big Data (January 2021).",
    graph=NxMixedGraph.from_str_edges(
        nodes=["SOS", "Ras", "Raf", "AKT", "Mek", "Erk", "PI3K"],
        directed=[
            ("SOS", "Ras"),
            ("Ras", "PI3K"),
            ("Ras", "Raf"),
            ("PI3K", "AKT"),
            ("AKT", "Raf"),
            ("Raf", "Mek"),
            ("Mek", "Erk"),
        ],
        undirected=[("SOS", "PI3K")],
    ),
    example_queries=[Query.from_str(treatments="SOS", outcomes="Erk")],
)

sars_large_example = Example(
    name="SARS-CoV-2 Graph",
    reference="Jeremy Zucker, Sara Mohammad-Taheri, Kaushal Paneri, Somya Bhargava, Pallavi Kolambkar"
    ", Craig Bakker, Jeremy Teuton, Charles Tapley Hoyt, Kristie Oxford, Robert Ness, and Olga Vitek. 2021."
    "Leveraging Structured Biological Knowledge for Counterfactual Inference: a Case Study of Viral Pathogenesis"
    "- IEEE Journals & Magazine. IEEE Transactions on Big Data (January 2021).",
    graph=NxMixedGraph.from_str_edges(
        nodes=[
            "SARS_COV2",
            "ACE2",
            "Ang",
            "AGTR1",
            "ADAM17",
            "Toci",
            "Sil6r",
            "EGF",
            "TNF",
            "EGFR",
            "PRR",
            "NFKB",
            "IL6STAT3",
            "IL6AMP",
            "cytok",
            "Gefi",
        ],
        directed=[
            ("SARS_COV2", "ACE2"),
            ("ACE2", "Ang"),
            ("Ang", "AGTR1"),
            ("AGTR1", "ADAM17"),
            ("ADAM17", "EGF"),
            ("ADAM17", "TNF"),
            ("ADAM17", "Sil6r"),
            ("SARS_COV2", "PRR"),
            ("PRR", "NFKB"),
            ("EGFR", "NFKB"),
            ("TNF", "NFKB"),
            ("Sil6r", "IL6STAT3"),
            ("Toci", "Sil6r"),
            ("NFKB", "IL6AMP"),
            ("IL6AMP", "cytok"),
            ("IL6STAT3", "IL6AMP"),
            ("EGF", "EGFR"),
            ("Gefi", "EGFR"),
        ],
        undirected=[
            ("SARS_COV2", "Ang"),
            ("ADAM17", "Sil6r"),
            ("PRR", "NFKB"),
            ("EGF", "EGFR"),
            ("EGFR", "TNF"),
            ("EGFR", "IL6STAT3"),
        ],
    ),
    example_queries=[
        Query.from_str(treatments="Sil6r", outcomes="cytok"),
        Query.from_str(treatments="EGFR", outcomes="cytok"),
    ],
)

SARS_SMALL_GRAPH = NxMixedGraph.from_str_edges(
    directed=[
        ("ADAM17", "EGFR"),
        ("ADAM17", "TNF"),
        ("ADAM17", "Sil6r"),
        ("EGFR", "cytok"),
        ("TNF", "cytok"),
        ("Sil6r", "IL6STAT3"),
        ("IL6STAT3", "cytok"),
    ],
    undirected=[
        ("ADAM17", "cytok"),
        ("ADAM17", "Sil6r"),
        ("EGFR", "TNF"),
        ("EGFR", "IL6STAT3"),
    ],
)

sars_small_example = Example(
    name="SARS-CoV-2 Small Graph",
    reference="Sara!",  # FIXME
    graph=SARS_SMALL_GRAPH,
    generate_data=generate_data_for_covid_case_study,
    example_queries=[Query.from_str(outcomes="cytok", treatments="EGFR")],
)

tikka_trso_figure_8_graph = NxMixedGraph.from_edges(
    undirected=[(X1, Y1), (Z, W), (Z, X2)],
    directed=[
        (X1, Y1),
        (X1, Y2),
        (W, Y1),
        (W, Y2),
        (Z, Y1),
        (Z, X2),
        (X2, Y2),
        (Z, Y2),
    ],
)
tikka_trso_figure_8 = Example(
    name="Tikka TRSO Figure 8",
    reference="https://arxiv.org/abs/1806.07172",
    graph=tikka_trso_figure_8_graph,
)


cancer_example = Example(
    name="Smoking and Cancer",
    reference="https://github.com/y0-causal-inference/y0/pull/183",
    graph=NxMixedGraph.from_edges(directed=[(S, T), (T, C), (S, C)], undirected=[(S, T)]),
    generate_data=generate_data_for_smoke_cancer,
    example_queries=[Query.from_str(outcomes="C", treatments="S")],
)


examples = [v for name, v in locals().items() if name.endswith("_example")]