Source code for pycausal_explorer.meta._doubleML

import numpy as np
from sklearn.model_selection import cross_val_predict, train_test_split
from sklearn.utils.validation import check_is_fitted, check_X_y

from pycausal_explorer.base import BaseCausalModel

from sklearn.base import clone


[docs]class DoubleMLLinear(BaseCausalModel): """ Double Machine Learning model. Estimates causal effect using two different models: one models outcome, and another models treatment. Linear version. Should be used when you believe treatment effect is linear, and the treatment variable is continuous Parameters ---------- outcome_learner : estimator object base learner to use when predicting outcome. Should implement fit and predict methods. treatment_learner : estimator object base learner to use when predicting treatment. Should implement fit and predict methods. score : basestring Which score function to use. One of "partial-out" and "orthogonal" """ def __init__( self, outcome_learner, treatment_learner, score="partial-out", k_fold=5 ): valid_scores = ["partial-out", "orthogonal"] if score not in valid_scores: raise ValueError("Score has to be one of " + str(valid_scores)) self.outcome_leaner = clone(outcome_learner) self.treatment_learner = clone(treatment_learner) self.k_fold = k_fold self.score = score self.is_fitted_ = False
[docs] def fit(self, X, y, *, treatment): X, y = check_X_y(X, y) X, w = check_X_y(X, treatment) pred_outcome = cross_val_predict(self.outcome_leaner, X, y, cv=self.k_fold) pred_treatment = cross_val_predict(self.treatment_learner, X, w, cv=self.k_fold) if self.score == "partial-out": self._psi_a = (-w * (w - pred_treatment)).mean() elif self.score == "orthogonal": self._psi_a = (-np.square(w - pred_treatment)).mean() self._psi_b = ((y - pred_outcome) * (w - pred_treatment)).mean() self.is_fitted_ = True
[docs] def predict_ite(self, X): check_is_fitted(self) return np.full(X.shape[0], -self._psi_b / self._psi_a)
[docs]class DoubleMLBinaryTreatment(BaseCausalModel): """ Double Machine Learning model. Estimates causal effect using two different models: one models outcome, and another models treatment. Binary treatment version. Should be used when treatment is a binary variable. Parameters ---------- outcome_learner : estimator object base learner to use when predicting outcome. Should implement fit and predict methods. treatment_learner : estimator object base learner to use when predicting treatment probability. Should be a classifier learner that implements fit and predict_proba methods. """ def __init__(self, outcome_learner, treatment_learner): self.outcome_leaner = clone(outcome_learner) self.treatment_learner = clone(treatment_learner) self.is_fitted_ = False
[docs] def fit(self, X, y, *, treatment): X, y = check_X_y(X, y) X, w = check_X_y(X, treatment) X_t, X_r, y_t, y_r, w_t, w_r = train_test_split(X, y, w, train_size=0.5) reg_size = X_r.shape[0] self.outcome_leaner.fit(np.column_stack([w_t, X_t]), y_t) pred_y_treat = self.outcome_leaner.predict( np.column_stack([np.ones(reg_size), X_r]) ) pred_y_cont = self.outcome_leaner.predict( np.column_stack([np.zeros(reg_size), X_r]) ) self.treatment_learner.fit(X_t, w_t) pred_w = self.treatment_learner.predict_proba(X_r)[:, 1] self._ate = ( pred_y_treat - pred_y_cont + w_r * (y_r - pred_y_treat) / pred_w - (np.ones(reg_size) - w_r) * (y_r - pred_y_cont) / (np.ones(reg_size) - pred_w) ).mean() self.is_fitted_ = True
[docs] def predict_ite(self, X): check_is_fitted(self) return np.full(X.shape[0], self._ate)