Source code for ananke.estimation.counterfactual_mean

"""
Class that provides an interface to estimation strategies for the counterfactual mean E[Y(t)]
"""

import copy

import numpy as np
import statsmodels.api as sm
from scipy.stats import norm

from ananke.identification import OneLineID


[docs]class CausalEffect:
    """
    Provides an interface to various estimation strategies for the ACE: E[Y(1) - Y(0)].
    """

    def __init__(self, graph, treatment, outcome):
        """
        Constructor.

        :param graph: ADMG corresponding to substantive knowledge/model.
        :param treatment: name of vertex corresponding to the treatment.
        :param outcome: name of vertex corresponding to the outcome.
        """

        self.graph = copy.deepcopy(graph)
        self.treatment = treatment
        self.outcome = outcome
        self.strategy = None
        self.is_mb_shielded = self.graph.mb_shielded()

        # a dictionary of names for available estimators
        self.estimators = {
            "ipw": self._ipw,
            "gformula": self._gformula,
            "aipw": self._aipw,
            "eff-aipw": self._eff_augmented_ipw,
            "p-ipw": self._primal_ipw,
            "d-ipw": self._dual_ipw,
            "apipw": self._augmented_primal_ipw,
            "eff-apipw": self._eff_augmented_primal_ipw,
            "n-ipw": self._nested_ipw,
            "anipw": self._augmented_nested_ipw,
        }

        # a dictionary of names for available modeling strategies
        self.models = {
            "glm-binary": self._fit_binary_glm,
            "glm-continuous": self._fit_continuous_glm,
        }

        # check if the query is ID
        self.one_id = OneLineID(graph, [treatment], [outcome])

        # check the fixability criteria for the treatment
        if (
            len(
                self.graph.district(treatment).intersection(
                    self.graph.descendants([treatment])
                )
            )
            == 1
        ):
            self.strategy = "a-fixable"
            if self.is_mb_shielded:
                print(
                    "\n Treatment is a-fixable and graph is mb-shielded. \n\n Available estimators are:\n \n"
                    + "1. IPW (ipw)\n"
                    + "2. Outcome regression (gformula)\n"
                    + "3. Generalized AIPW (aipw)\n"
                    + "4. Efficient Generalized AIPW (eff-aipw) \n \n"
                    + "Suggested estimator is Efficient Generalized AIPW \n"
                )
            else:
                print(
                    "\n Treatment is a-fixable.\n\n Available estimators are :\n \n"
                    + "1. IPW (ipw)\n"
                    + "2. Outcome regression (gformula)\n"
                    + "3. Generalized AIPW (aipw)\n \n"
                    + "Suggested estimator is Generalized AIPW \n"
                )

        elif (
            len(
                self.graph.district(treatment).intersection(
                    self.graph.children([treatment])
                )
            )
            == 0
        ):
            self.strategy = "p-fixable"
            if self.is_mb_shielded:
                print(
                    "\n Treatment is p-fixable and graph is mb-shielded. \n\n Available estimators are:\n\n"
                    + "1. Primal IPW (p-ipw)\n"
                    + "2. Dual IPW (d-ipw)\n"
                    + "3. APIPW (apipw)\n"
                    + "4. Efficient APIPW (eff-apipw) \n \n"
                    + "Suggested estimator is Efficient APIPW \n"
                )
            else:
                print(
                    "\n Treatment is p-fixable. \n\n Available estimators are:\n \n"
                    + "1. Primal IPW (p-ipw)\n"
                    + "2. Dual IPW (d-ipw)\n"
                    + "3. APIPW (apipw) \n\n"
                    + "Suggested estimator is APIPW \n"
                )

        elif self.one_id.id():
            self.strategy = "nested-fixable"
            print(
                "\n Effect is identified. \n \n Available estimators:\n \n"
                + "1. Nested IPW (n-ipw)\n"
                + "2. Augmented NIPW (anipw) \n\n"
                + "Suggested estimator is Augmented NIPW \n"
            )

        else:
            self.strategy = "Not ID"
            print("Effect is not identified! \n")

        # finally, fix a valid topological order for estimation queries
        self.p_order = self._find_valid_order(
            "p-fixable"
        )  # used for a-fixable/p-fixable strategies
        self.n_order = self._find_valid_order(
            "nested-fixable"
        )  # used for nested-fixable strategies
        self.state_space_map_ = (
            {}
        )  # maps from variable names to state space of the variable (binary/continuous)

    def _find_valid_order(self, order_type):
        """
        Utility function to find a valid order that satisfies the requirements in
        Semiparametric Inference (Bhattacharya, Nabi, & Shpitser 2020) paper.

        :param order_type: string specifying if the topological order is required for p-fixable or n-fixable problems.
        :return: list corresponding to a valid topological order.
        """

        # if we are a-fixing or p-fixing, just ensure that the
        # treatment appears after all its non-descendants
        if order_type != "nested-fixable":
            focal_vertices = {self.treatment}

        # otherwise we ensure the vertices in Y* that intersect with district
        # of the treatment appear after all their non-descendants
        else:
            focal_vertices = self.one_id.ystar.intersection(
                self.graph.district(self.treatment)
            )

        # perform a sort on the nondescendants first
        nondescendants = set(self.graph.vertices) - self.graph.descendants(
            focal_vertices
        )
        order = self.graph.subgraph(nondescendants).topological_sort()

        # then sort the focal vertices and its descendants
        order += self.graph.subgraph(
            self.graph.descendants(focal_vertices)
        ).topological_sort()

        return order

    def _fit_binary_glm(self, data, formula, weights=None):
        """
        Fit a binary GLM to data given a formula.

        :param data: pandas data frame containing the data.
        :param formula: string encoding an R-style formula e.g: Y ~ X1 + X2.
        :return: the fitted model.
        """

        return sm.GLM.from_formula(
            formula,
            data=data,
            family=sm.families.Binomial(),
            freq_weights=weights,
        ).fit()

    def _fit_continuous_glm(self, data, formula, weights=None):
        """
        Fit a linear GLM to data given a formula.

        :param data: pandas data frame containing the data.
        :param formula: string encoding an R-style formula: e.g. Y ~ X1 + X2.
        :return: the fitted model.
        """

        return sm.GLM.from_formula(
            formula,
            data=data,
            family=sm.families.Gaussian(),
            freq_weights=weights,
        ).fit()

    def _ipw(self, data, assignment, model_binary=None, model_continuous=None):
        """
        IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for propensity score: e.g. glm-binary.
        :param model_continuous: this argument is ignored for IPW.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "a-fixable":
            raise RuntimeError(
                "IPW will not return valid estimates as treatment is not a-fixable"
            )

        # extract outcome from data frame and compute Markov pillow of treatment
        Y = data[self.outcome]
        mp_T = self.graph.markov_pillow([self.treatment], self.p_order)

        if len(mp_T) != 0:
            # fit T | mp(T) and compute probability of treatment for each sample
            formula = self.treatment + " ~ " + "+".join(mp_T)  # + "+ ones"
            model = model_binary(data, formula)
            prob_T = model.predict(data)
        else:
            prob_T = np.ones(len(data)) * np.mean(data[self.treatment])

        indices_T0 = data.index[data[self.treatment] == 0]
        prob_T[indices_T0] = 1 - prob_T[indices_T0]

        # compute IPW estimate
        indices = data[self.treatment] == assignment
        return np.mean((indices / prob_T) * Y)

    def _gformula(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Outcome regression estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for outcome regression: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "a-fixable":
            raise RuntimeError(
                "g-formula will not return valid estimates as treatment is not a-fixable"
            )

        # create a dataset where T=t
        data_assign = data.copy()
        data_assign[self.treatment] = assignment

        # fit Y | T=t, mp(T)
        mp_T = self.graph.markov_pillow([self.treatment], self.p_order)
        if len(mp_T) != 0:
            formula = (
                self.outcome + " ~ " + self.treatment + "+" + "+".join(mp_T)
            )
            # predict outcome appropriately depending on binary vs continuous
        else:
            formula = self.outcome + " ~ " + self.treatment

        if self.state_space_map_[self.outcome] == "binary":
            model = model_binary(data, formula)
        else:
            model = model_continuous(data, formula)

        return np.mean(model.predict(data_assign))

    def _aipw(self, data, assignment, model_binary=None, model_continuous=None):
        """
        Augmented IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "a-fixable":
            raise RuntimeError(
                "Augmented IPW will not return valid estimates as treatment is not a-fixable"
            )

        # extract the outcome and get Markov pillow of the treatment
        Y = data[self.outcome]
        mp_T = self.graph.markov_pillow([self.treatment], self.p_order)

        if len(mp_T) != 0:
            # fit T | mp(T) and predict treatment probabilities
            formula_T = self.treatment + " ~ " + "+".join(mp_T)  # + "+ ones"
            model = model_binary(data, formula_T)
            prob_T = model.predict(data)
            formula_Y = (
                self.outcome + " ~ " + self.treatment + "+" + "+".join(mp_T)
            )
        else:
            prob_T = np.ones(len(data)) * np.mean(data[self.treatment])
            formula_Y = self.outcome + " ~ " + self.treatment

        indices_T0 = data.index[data[self.treatment] == 0]
        prob_T[indices_T0] = 1 - prob_T[indices_T0]
        indices = data[self.treatment] == assignment

        # fit Y | T=t, mp(T) and predict outcomes under assignment T=t
        data_assign = data.copy()
        data_assign[self.treatment] = assignment
        if self.state_space_map_[self.outcome] == "binary":
            model = model_binary(data, formula_Y)
        else:
            model = model_continuous(data, formula_Y)
        Yhat_vec = model.predict(data_assign)

        # return AIPW estimate
        return np.mean((indices / prob_T) * (Y - Yhat_vec) + Yhat_vec)

    def _eff_augmented_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Efficient augmented IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "a-fixable":
            raise RuntimeError(
                "Augmented IPW will not return valid estimates as treatment is not a-fixable"
            )
        if not self.is_mb_shielded:
            raise RuntimeError(
                "EIF will not return valid estimates as graph is not mb-shielded"
            )

        # extract the outcome and get Markov pillow of treatment
        Y = data[self.outcome]
        mp_T = self.graph.markov_pillow([self.treatment], self.p_order)

        # fit T | mp(T) and compute treatment probabilities
        if len(mp_T) != 0:
            formula = self.treatment + " ~ " + "+".join(mp_T)  # + "+ ones"
            model = model_binary(data, formula)
            prob_T = model.predict(data)
        else:
            prob_T = np.ones(len(data)) * np.mean(data[self.treatment])

        indices_T0 = data.index[data[self.treatment] == 0]
        prob_T[indices_T0] = 1 - prob_T[indices_T0]
        indices = data[self.treatment] == assignment

        # compute the primal estimates that we use to compute projections
        primal = (indices / prob_T) * Y
        data["primal"] = primal
        eif_vec = 0

        # get the variables that are actually involved in the efficient influence function
        # TODO: prune extra variables
        eif_vars = [V for V in self.graph.vertices]
        eif_vars.remove(self.treatment)

        # iterate over variables
        for V in eif_vars:

            # get the Markov pillow of the variable
            mpV = self.graph.markov_pillow([V], self.p_order)

            # compute E[primal | mp(V)]
            formula = "primal ~ " + "+".join(mpV)

            # special case if mp(V) is empty, then E[primal | mp(V)] = E[primal]
            if len(mpV) != 0:
                model_mpV = model_continuous(
                    data, formula
                )  # primal is a continuous r.v.
                primal_mpV = model_mpV.predict(data)
            else:
                primal_mpV = np.mean(primal)

            # compute E[primal | V, mp(V)]
            formula = formula + "+" + V
            model_VmpV = model_continuous(data, formula)
            primal_VmpV = model_VmpV.predict(data)

            # add contribution of current variable: E[primal | V, mp(V)] - E[primal | mp(V)]
            eif_vec += primal_VmpV - primal_mpV

        # re-add the primal so final result is not mean zero
        eif_vec = eif_vec + np.mean(primal)

        # return efficient AIPW estimate
        return np.mean(eif_vec)

    def _beta_primal(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Utility function to compute primal estimates for a dataset.

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: numpy array of floats corresponding to primal estimates for each sample.
        """

        # extract the outcome
        Y = data[self.outcome]

        # C := pre-treatment vars and L := post-treatment vars in district of treatment
        C = self.graph.pre([self.treatment], self.p_order)
        post = set(self.graph.vertices).difference(C)
        L = post.intersection(self.graph.district(self.treatment))

        # create copies of the data with treatment assignments T=0 and T=1
        data_T1 = data.copy()
        data_T1[self.treatment] = 1
        data_T0 = data.copy()
        data_T0[self.treatment] = 0

        indices = data[self.treatment] == assignment

        # prob: stores \prod_{Li in L} p(Li | mp(Li))
        # prob_T1: stores \prod_{Li in L} p(Li | mp(Li)) at T=1
        # prob_T0: stores \prod_{Li in L} p(Li | mp(Li)) at T=0

        mp_T = self.graph.markov_pillow([self.treatment], self.p_order)
        indices_T0 = data.index[data[self.treatment] == 0]

        if len(mp_T) != 0:
            formula = self.treatment + " ~ " + "+".join(mp_T)
            model = model_binary(data, formula)
            prob = model.predict(data)
            prob[indices_T0] = 1 - prob[indices_T0]
            prob_T1 = model.predict(data)
            prob_T0 = 1 - prob_T1
        else:
            prob = np.ones(len(data)) * np.mean(data[self.treatment])
            prob[indices_T0] = 1 - prob[indices_T0]
            prob_T1 = np.ones(len(data)) * np.mean(data[self.treatment])
            prob_T0 = 1 - prob_T1

        # iterate over vertices in L (except the outcome)
        for V in L.difference([self.treatment, self.outcome]):

            # fit V | mp(V)
            mp_V = self.graph.markov_pillow([V], self.p_order)
            formula = V + " ~ " + "+".join(mp_V)

            # p(V =v | .), p(V = v | . , T=1), p(V = v | ., T=0)
            if self.state_space_map_[V] == "binary":
                model = model_binary(data, formula)
                prob_V = model.predict(data)
                prob_V_T1 = model.predict(data_T1)
                prob_V_T0 = model.predict(data_T0)

                indices_V0 = data.index[data[V] == 0]

                # p(V | .), p(V | ., T=t)
                prob_V[indices_V0] = 1 - prob_V[indices_V0]
                prob_V_T1[indices_V0] = 1 - prob_V_T1[indices_V0]
                prob_V_T0[indices_V0] = 1 - prob_V_T0[indices_V0]

            else:
                model = model_continuous(data, formula)
                E_V = model.predict(data)
                E_V_T1 = model.predict(data_T1)
                E_V_T0 = model.predict(data_T0)

                std = np.std(data[V] - E_V)
                prob_V = norm.pdf(data[V], loc=E_V, scale=std)
                prob_V_T1 = norm.pdf(data[V], loc=E_V_T1, scale=std)
                prob_V_T0 = norm.pdf(data[V], loc=E_V_T0, scale=std)

            prob *= prob_V
            prob_T1 *= prob_V_T1
            prob_T0 *= prob_V_T0

        # special case when the outcome is in L
        if self.outcome in L:

            # fit a binary/continuous model for Y | mp(Y)
            mp_Y = self.graph.markov_pillow([self.outcome], self.p_order)
            formula = self.outcome + " ~ " + "+".join(mp_Y)
            if self.state_space_map_[self.outcome] == "binary":
                model = model_binary(data, formula)
            else:
                model = model_continuous(data, formula)

            # predict the outcome and adjust numerator of primal accordingly
            Yhat_T1 = model.predict(data_T1)
            Yhat_T0 = model.predict(data_T0)
            prob_sumT = prob_T1 * Yhat_T1 + prob_T0 * Yhat_T0
            beta_primal = indices * (prob_sumT / prob)
        else:
            prob_sumT = prob_T1 + prob_T0
            beta_primal = indices * (prob_sumT / prob) * Y

        return beta_primal

    def _primal_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Primal IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "p-fixable" and self.strategy != "a-fixable":
            raise RuntimeError(
                "Primal IPW will not return valid estimates as treatment is not p-fixable"
            )

        # return primal IPW estimate
        return np.mean(
            self._beta_primal(data, assignment, model_binary, model_continuous)
        )

    def _beta_dual(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Utility function to compute dual estimates for a dataset.

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: numpy array of floats corresponding to dual estimates for each sample.
        """

        # extract the outcome
        Y = data[self.outcome]

        # M := inverse Markov pillow of the treatment
        M = set(
            [
                m
                for m in self.graph.vertices
                if self.treatment in self.graph.markov_pillow([m], self.p_order)
            ]
        )
        M = M.difference(self.graph.district(self.treatment))

        # create a dataset where T=t
        data_assigned = data.copy()
        data_assigned[self.treatment] = assignment

        prob = 1  # stores \prod_{Mi in M} p(Mi | mp(Mi))|T=t / p(Mi | mp(Mi))
        for V in M.difference([self.outcome]):

            # Fit V | mp(V)
            mp_V = self.graph.markov_pillow([V], self.p_order)
            formula = V + " ~ " + "+".join(mp_V)

            # p(V = 1 | .), p(V = 1 | . , T=assigned)
            if self.state_space_map_[V] == "binary":
                model = model_binary(data, formula)
                prob_V = model.predict(data)
                prob_V_assigned = model.predict(data_assigned)

                indices_V0 = data.index[data[V] == 0]

                # p(V | .) and p(V | ., T=assignment)
                prob_V[indices_V0] = 1 - prob_V[indices_V0]
                prob_V_assigned[indices_V0] = 1 - prob_V_assigned[indices_V0]

            else:
                model = model_continuous(data, formula)
                E_V = model.predict(data)
                E_V_assigned = model.predict(data_assigned)

                std = np.std(data[V] - E_V)
                prob_V = norm.pdf(data[V], loc=E_V, scale=std)
                prob_V_assigned = norm.pdf(data[V], loc=E_V_assigned, scale=std)

            prob *= prob_V_assigned / prob_V

        # special case for if the outcome is in M
        if self.outcome in M:
            mp_Y = self.graph.markov_pillow([self.outcome], self.p_order)
            formula = self.outcome + " ~ " + "+".join(mp_Y)
            if self.state_space_map_[self.outcome] == "binary":
                model = model_binary(data, formula)
            else:
                model = model_continuous(data, formula)
            Yhat_assigned = model.predict(data_assigned)
        else:
            Yhat_assigned = Y

        return prob * Yhat_assigned

    def _dual_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Dual IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "p-fixable" and self.strategy != "a-fixable":
            raise RuntimeError(
                "Dual IPW will not return valid estimates as treatment is not p-fixable"
            )

        # return primal dual IPW estimate
        return np.mean(
            self._beta_dual(data, assignment, model_binary, model_continuous)
        )

    def _augmented_primal_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Augmented primal IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "p-fixable" and self.strategy != "a-fixable":
            raise RuntimeError(
                "Augmented primal IPW will not return valid estimates as treatment is not p-fixable"
            )

        # compute the primal and dual estimates and them to the data frame
        beta_primal = self._beta_primal(
            data, assignment, model_binary, model_continuous
        )
        beta_dual = self._beta_dual(
            data, assignment, model_binary, model_continuous
        )
        data.loc[:, "beta_primal"] = beta_primal
        data.loc[:, "beta_dual"] = beta_dual

        # C := pre-treatment vars
        # L := post-treatment vars in the district of T
        # M := post treatment vars not in L (a.k.a. the rest)
        C = self.graph.pre([self.treatment], self.p_order)
        post = set(self.graph.vertices).difference(C)
        L = post.intersection(self.graph.district(self.treatment))
        M = post - L

        IF = 0

        # iterate over all post-treatment variables
        for V in post:

            # compute all predecessors according to the topological order
            pre_V = self.graph.pre([V], self.p_order)

            # if the variables is in M, project using the primal otherwise use the dual
            # to fit E[beta | pre(V)]
            if V in M:
                formula = "beta_primal" + " ~ " + "+".join(pre_V)
            elif V in L:
                formula = "beta_dual" + " ~ " + "+".join(pre_V)

            # special logic for if there are no predecessors for the variable (which could only happen to T)
            if len(pre_V) != 0:
                model_preV = model_continuous(
                    data, formula
                )  # primal/dual is a continuous r.v.
                pred_preV = model_preV.predict(data)
            else:
                pred_preV = 0

            # fit E[beta | V, pre(V)]
            formula = formula + " + " + V
            model_VpreV = model_continuous(data, formula)
            pred_VpreV = model_VpreV.predict(data)

            # add contribution of current variable as E[beta | V, pre(V)] - E[beta | pre(V)]
            IF += pred_VpreV - pred_preV

        # final contribution from E[beta | C] (if C is not empty)
        if len(C) != 0:
            formula = "beta_dual" + " ~ " + "+".join(C)
            model_C = model_continuous(
                data, formula
            )  # dual is a continuous r.v.
            IF += model_C.predict(data)

        # return APIPW estimate
        return np.mean(IF)

    def _eff_augmented_primal_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Efficient augmented primal IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy != "p-fixable" and self.strategy != "a-fixable":
            raise RuntimeError(
                "Augmented primal IPW will not return valid estimates as treatment is not p-fixable"
            )
        if not self.is_mb_shielded:
            raise RuntimeError(
                "EIF will not return valid estimates as graph is not mb-shielded"
            )

        # compute primal and dual estimates and add them to the data frame
        beta_primal = self._beta_primal(
            data, assignment, model_binary, model_continuous
        )
        beta_dual = self._beta_dual(
            data, assignment, model_binary, model_continuous
        )
        data.loc[:, "beta_primal"] = beta_primal
        data.loc[:, "beta_dual"] = beta_dual

        # C := pre-treatment vars
        # L := post-treatment vars in the district of T
        # M := post treatment vars not in L (a.k.a. the rest)
        C = self.graph.pre([self.treatment], self.p_order)
        post = set(self.graph.vertices).difference(C)
        L = post.intersection(self.graph.district(self.treatment))
        M = post - L

        IF = 0

        # iterate over all variables
        for V in self.graph.vertices:

            # get the Markov pillow
            mp_V = self.graph.markov_pillow([V], self.p_order)

            # if the variables is in M, project using the primal otherwise use the dual
            # to fit E[beta | mp(V)]
            if V in M:
                formula = "beta_primal" + " ~ " + "+".join(mp_V)
            else:
                formula = "beta_dual" + " ~ " + "+".join(mp_V)

            # special logic for if there the Markov pillow is empty
            if len(mp_V) != 0:
                model_mpV = model_continuous(data, formula)
                pred_mpV = model_mpV.predict(data)
            else:
                pred_mpV = np.mean(beta_dual)

            # fit E[beta | V, mp(V)]
            formula = formula + " + " + V
            model_VmpV = model_continuous(data, formula)
            pred_VmpV = model_VmpV.predict(data)

            # add contribution of current variable as E[beta | V, mp(V)] - E[beta | mp(V)]
            IF += pred_VmpV - pred_mpV

        # add final contribution so that estimator is not mean-zero
        IF += np.mean(beta_dual)

        # return efficient APIPW estimate
        return np.mean(IF)

    def _fit_intrinsic_kernel(
        self, data, district, model_binary=None, model_continuous=None
    ):
        """
        Get estimates of an intrinsic kernel q_D(D|pa(D)) for each sample.

        :param data: pandas data frame containing the data.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: numpy array of estimated probabilities of the kernel for each sample.
        """

        fixing_prob = np.ones(len(data))

        # get parents of the district
        parents = self.graph.parents(district) - district
        # initialize with ancestral margin as fixing descendants are just sums
        G = self.graph.subgraph(self.graph.ancestors(district))

        # we know D is intrinsic because of prior checks
        remaining_vertices = (
            set(G.vertices) - district - parents
        )  # is subtracting parents here ok? think so..

        while not G.fixable(parents)[0]:

            fixed = False

            # at every step try to simplify as much as possible by fixing
            # childless vertices as these correspond to just sums
            # also find a backup fixable vertex in case there are no childless ones
            iter_gen = iter(remaining_vertices)
            i = 0

            while not fixed and i < len(remaining_vertices):
                i += 1
                V = next(iter_gen)
                if len(G.children([V])) == 0:
                    G.fix([V])
                    fixed = True
                    remaining_vertices.remove(V)
                elif G.fixable([V])[0]:
                    fixable_V = V

            # if we fixed something go back and see if we can fix another childless vertex
            # or if the requirement that parents are fixable is satisfied
            if fixed:
                continue

            # otherwise do a true fix corresponding to reweighting
            # using the fixable V that we found
            V = fixable_V
            mb_V = G.markov_blanket([V])
            formula = V + " ~ " + "+".join(mb_V)

            # p(V = 1 | .)
            if self.state_space_map_[V] == "binary":
                model = model_binary(data, formula, 1 / fixing_prob)
                prob_V = model.predict(data)
                indices_V0 = data.index[data[V] == 0]

                # p(V | .)
                prob_V[indices_V0] = 1 - prob_V[indices_V0]

            # handling continuous data
            else:
                model = model_continuous(data, formula, 1 / fixing_prob)
                E_V = model.predict(data)
                std = np.std(data[V] - E_V)
                prob_V = norm.pdf(data[V], loc=E_V, scale=std)

            fixing_prob *= prob_V
            G.fix([V])
            remaining_vertices.remove(V)

        # get the Markov blanket of the parents in this final graph
        mb_parents = G.markov_blanket(parents)

        # iterate over each parent and get weights required to fix them
        for V in parents:

            # p(V = 1 | .)
            mp_V = mb_parents.intersection(self.graph.pre([V], self.n_order))
            if len(mp_V) != 0:
                formula = V + " ~ " + "+".join(mp_V)
            else:
                formula = V + " ~ -1 + 1"

            if self.state_space_map_[V] == "binary":
                model = model_binary(data, formula, 1 / fixing_prob)
                prob_V = model.predict(data)
                indices_V0 = data.index[data[V] == 0]

                # p(V | .)
                prob_V[indices_V0] = 1 - prob_V[indices_V0]

            # handling continuous data
            else:
                model = model_continuous(data, formula, 1 / fixing_prob)
                E_V = model.predict(data)
                std = np.std(data[V] - E_V)
                prob_V = norm.pdf(data[V], loc=E_V, scale=std)

            fixing_prob *= prob_V

        kernel_prob = np.ones(len(data))

        # iterate over member of the intrinsic set and get the final kernel weights
        for V in district:

            # p(V = 1 | .)
            mp_V = parents.union(
                district.intersection(self.graph.pre(V, self.n_order))
            )
            if len(mp_V) != 0:
                formula = V + " ~ " + "+".join(mp_V)
            else:
                formula = V + " ~ -1 + 1"

            if self.state_space_map_[V] == "binary":
                model = model_binary(data, formula, 1 / fixing_prob)
                prob_V = model.predict(data)
                indices_V0 = data.index[data[V] == 0]

                # p(V | .)
                prob_V[indices_V0] = 1 - prob_V[indices_V0]

            # handling continuous data
            else:
                model = model_continuous(data, formula, 1 / fixing_prob)
                E_V = model.predict(data)
                std = np.std(data[V] - E_V)
                prob_V = norm.pdf(data[V], loc=E_V, scale=std)

            kernel_prob *= prob_V

        return kernel_prob

    def _get_nested_rebalanced_weights(
        self, data, model_binary=None, model_continuous=None
    ):
        """
        Get the rebalancing weights required for nested IPW and augmented nested IPW

        :param data: pandas data frame containing the data.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: numpy array corresponding to rebalancing weights.
        """

        # first get all districts of GY* that intersect with district of T
        district_T = self.graph.district(self.treatment)
        modified_districts = [
            district
            for district in self.one_id.Gystar.districts
            if len(district.intersection(district_T)) != 0
        ]

        # placeholder for the rebalancing probability
        rebalance_prob = np.ones(len(data))

        # iterate over each modified district
        for district in modified_districts:

            # first compute weights in the denominator of \prod_{Vi in district} p(Vi | mp(Vi))
            for V in district:

                # Fit V | mp(V)
                mp_V = self.graph.markov_pillow([V], self.n_order)
                formula = V + " ~ " + "+".join(mp_V)

                # p(V = 1 | .)
                if self.state_space_map_[V] == "binary":
                    model = model_binary(data, formula)
                    prob_V = model.predict(data)
                    indices_V0 = data.index[data[V] == 0]

                    # p(V | .)
                    prob_V[indices_V0] = 1 - prob_V[indices_V0]

                # handling continuous data
                else:
                    model = model_continuous(data, formula)
                    E_V = model.predict(data)
                    std = np.std(data[V] - E_V)
                    prob_V = norm.pdf(data[V], loc=E_V, scale=std)

                rebalance_prob *= 1 / prob_V

            # now compute the q_D(D | pa(D))
            rebalance_prob *= self._fit_intrinsic_kernel(
                data, district, model_binary, model_continuous
            )

        return 1 / rebalance_prob

    def _nested_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Nested IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        # pedantic checks to make sure the method returns valid estimates
        if self.strategy == "Not ID":
            raise RuntimeError(
                "Nested IPW will not return valid estimates as causal effect is not identified"
            )

        # fit T | mp(T) with the rebalanced weights and compute the nested IPW
        rebalance_weights = self._get_nested_rebalanced_weights(
            data, model_binary, model_continuous
        )
        # extract outcome from data frame and compute Markov pillow of treatment
        Y = data[self.outcome]
        mp_T = self.graph.markov_pillow([self.treatment], self.n_order)

        if len(mp_T) != 0:
            # fit T | mp(T) and compute probability of treatment for each sample
            formula = self.treatment + " ~ " + "+".join(mp_T)
            model = model_binary(data, formula, weights=rebalance_weights)
            prob_T = model.predict(data)
        else:
            prob_T = np.ones(len(data)) * np.average(
                data[self.treatment], weights=rebalance_weights
            )

        indices_T0 = data.index[data[self.treatment] == 0]
        prob_T[indices_T0] = 1 - prob_T[indices_T0]

        # compute nested IPW estimate
        indices = data[self.treatment] == assignment
        return np.mean((indices / prob_T) * Y)

    def _augmented_nested_ipw(
        self, data, assignment, model_binary=None, model_continuous=None
    ):
        """
        Augmented nested IPW estimator for the counterfactual mean E[Y(t)].

        :param data: pandas data frame containing the data.
        :param assignment: assignment value for treatment.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :return: float corresponding to computed E[Y(t)].
        """

        if self.strategy == "Not ID":
            raise RuntimeError(
                "Nested IPW will not return valid estimates as causal effect is not identified"
            )

        # get the rebalancing weights
        rebalance_weights = self._get_nested_rebalanced_weights(
            data, model_binary, model_continuous
        )

        # extract the outcome and get Markov pillow of the treatment
        Y = data[self.outcome]
        mp_T = self.graph.markov_pillow([self.treatment], self.n_order)

        if len(mp_T) != 0:
            # fit T | mp(T) and predict treatment probabilities
            formula_T = self.treatment + " ~ " + "+".join(mp_T)  # + "+ ones"
            model = model_binary(data, formula_T, weights=rebalance_weights)
            prob_T = model.predict(data)
            formula_Y = (
                self.outcome + " ~ " + self.treatment + "+" + "+".join(mp_T)
            )
        else:
            prob_T = np.ones(len(data)) * np.average(
                data[self.treatment], weights=rebalance_weights
            )
            formula_Y = self.outcome + " ~ " + self.treatment

        indices_T0 = data.index[data[self.treatment] == 0]
        prob_T[indices_T0] = 1 - prob_T[indices_T0]
        indices = data[self.treatment] == assignment

        # fit Y | T=t, mp(T) and predict outcomes under assignment T=t
        data_assign = data.copy()
        data_assign[self.treatment] = assignment
        if self.state_space_map_[self.outcome] == "binary":
            model = model_binary(data, formula_Y, weights=rebalance_weights)
        else:
            model = model_continuous(data, formula_Y, weights=rebalance_weights)
        Yhat_vec = model.predict(data_assign)

        # return ANIPW estimate
        return np.mean((indices / prob_T) * (Y - Yhat_vec) + Yhat_vec)

[docs]    def compute_effect(
        self,
        data,
        estimator,
        model_binary=None,
        model_continuous=None,
        n_bootstraps=0,
        alpha=0.05,
        report_log_odds=True,
    ):
        """
        Bootstrap functionality to compute the Average Causal Effect if the outcome is continuous
        or the Causal Odds Ratio if the outcome is binary. Returns the point estimate
        as well as lower and upper quantiles for a user specified confidence level.

        :param data: pandas data frame containing the data.
        :param estimator: string indicating what estimator to use: e.g. eff-apipw.
        :param model_binary: string specifying modeling strategy to use for binary variables: e.g. glm-binary.
        :param model_continuous: string specifying modeling strategy to use for continuous variables: e.g. glm-continuous.
        :param n_bootstraps: number of bootstraps.
        :param alpha: the significance level with the default value of 0.05.
        :param report_log_odds: if set to True, and the outcome variable is binary, then reports the log odds ratio.
        :return: one float corresponding to ACE/OR if n_bootstraps=0, else three floats corresponding to ACE/OR, lower quantile, upper quantile.
        """
        df = data.copy()

        # instantiate modeling strategy with defaults
        if not model_binary:
            model_binary = self._fit_binary_glm
        if not model_continuous:
            model_continuous = self._fit_continuous_glm

        # add a column of ones to fit intercept terms
        df.loc[:, "ones"] = np.ones(len(df))

        # get state space of all variables
        for colname, colvalues in df.iteritems():
            if set([0, 1]).issuperset(colvalues.unique()):
                self.state_space_map_[colname] = "binary"
            else:
                self.state_space_map_[colname] = "continuous"

        # instantiate estimator and get point estimate of ACE
        method = self.estimators[estimator]
        point_estimate_T1 = method(df, 1, model_binary, model_continuous)
        point_estimate_T0 = method(df, 0, model_binary, model_continuous)

        # if Y is binary report log of odds ration, if Y is continuous report ACE
        if report_log_odds and self.state_space_map_[self.outcome] == "binary":
            ace = np.log(
                (point_estimate_T1 / (1 - point_estimate_T1))
                / (point_estimate_T0 / (1 - point_estimate_T0))
            )
        else:
            ace = point_estimate_T1 - point_estimate_T0

        if n_bootstraps > 0:
            ace_vec = []

            Ql = alpha / 2
            Qu = 1 - alpha / 2

            # iterate over bootstraps
            for iter in range(n_bootstraps):

                # resample the data with replacement
                df_sampled = df.sample(len(df), replace=True)
                df_sampled.reset_index(drop=True, inplace=True)

                # estimate ACE in resampled data
                estimate_T1 = method(
                    df_sampled, 1, model_binary, model_continuous
                )
                estimate_T0 = method(
                    df_sampled, 0, model_binary, model_continuous
                )

                # if Y is binary report log of odds ration, if Y is continuous report ACE
                if self.state_space_map_[self.outcome] == "binary":
                    ace_vec.append(
                        np.log(
                            (estimate_T1 / (1 - estimate_T1))
                            / (estimate_T0 / (1 - estimate_T0))
                        )
                    )
                else:
                    ace_vec.append(estimate_T1 - estimate_T0)

            # calculate the quantiles
            quantiles = np.quantile(ace_vec, q=[Ql, Qu])
            q_low = quantiles[0]
            q_up = quantiles[1]
            return ace, q_low, q_up

        return ace