Source code for ananke.datasets.helpers

"""
Helper functions that help load datasets in Ananke.
"""

import os

import pandas as pd

MODULE_PATH = os.path.dirname(__file__)


[docs]def load_conditionally_ignorable_data(): """ Load toy data for the conditionally ignorable model where the confounder is Viral Load, T is the treatment and the outcome is CD4 counts. :return: pandas dataframe. """ path = os.path.join(MODULE_PATH, "simulated/conditionally_ignorable.csv") return pd.read_csv(path)
[docs]def load_afixable_data(): """ Load toy data for an adjustment fixable setting where T is the treatment and the outcome is CD4 counts. :return: pandas dataframe. """ path = os.path.join(MODULE_PATH, "simulated/a_fixable.csv") return pd.read_csv(path)
[docs]def load_frontdoor_data(): """ Load toy data for frontdoor setting where T is the treatment and the outcome is CD4 counts. :return: pandas dataframe. """ path = os.path.join(MODULE_PATH, "simulated/frontdoor.csv") return pd.read_csv(path)
[docs]def load_wisconsin_health_study(): """ Load the dataset extract from the Wisconsin Health Study presented in [1]. Columns are defined as follows: X: an indicator of whether family income in 1957 was above \$5k; Y: an indicator of whether the respondents income in 1992 was above \$37k; M: an indicator of whether the respondent was drafted into the military; E: an indicator of whether the respondent had education beyond high school. count: the count of each event in (X, Y, M, E) [1] R. J. Evans and T. S. Richardson, “Smooth, identifiable supermodels of discrete DAG models with latent variables,” Bernoulli, vol. 25, no. 2, pp. 848–876, May 2019, doi: 10.3150/17-BEJ1005. """ path = os.path.join(MODULE_PATH, "real/evans_richardson_wisconsin.csv") return pd.read_csv(path)