dswah · dswah · Oct 3, 2018 · Oct 18, 2018 · Oct 24, 2018
diff --git a/pygam/pygam.py b/pygam/pygam.py
@@ -2436,7 +2436,7 @@ def predict(self, X):
 
     def predict_proba(self, X):
         """
-        preduct targets given model and input X
+        predict target probabilities given model and input X
 
         Parameters
         ---------
@@ -2445,10 +2445,31 @@ def predict_proba(self, X):
 
         Returns
         -------
-        y : np.array of shape (n_samples,)
+        y : np.array of shape (n_samples, 2)
             containing expected values under the model
         """
-        return self.predict_mu(X)
+        p = self.predict_mu(X).ravel()
+        return np.c_[1-p, p]
+
+    def decision_function(self, X, *args, **kwargs):
+        """
+        minimal alias of the linear prediction, for compatibility with sklearn
+        multiclass classification classes.
+
+        Parameters
+        ---------
+        X : array-like of shape (n_samples, m_features), optional (default=None
+            containing the input dataset
+
+        Returns
+        -------
+        lp : np.array of shape (n_samples,)
+
+        See Also
+        --------
+        _linear_predictor : for clarification
+        """
+        return self._linear_predictor(X, *args, **kwargs)
 
 
 class PoissonGAM(GAM):

diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py
@@ -29,6 +29,17 @@ def test_LogisticGAM_accuracy(default_X_y):
     acc1 = gam.accuracy(X, y)
     assert(acc0 == acc1)
 
+def test_LogisticGAM_decision_function(default_X_y):
+    """
+    check that we can compute sklearn's decision function
+    """
+    X, y = default_X_y
+    gam = LogisticGAM().fit(X, y)
+
+    lin_pred = gam._linear_predictor(X)
+    dec_func = gam.decision_function(X)
+    assert(lin_pred == dec_func).all()
+
 def test_PoissonGAM_exposure(coal_X_y):
     """
     check that we can fit a Poisson GAM with exposure, and it scales predictions
@@ -532,3 +543,27 @@ def test_r_squared_for_new_dataset(self, mcycle_gam, mcycle_X_y):
         """
         X, y = mcycle_X_y
         mcycle_gam._estimate_r2(X, y)
+
+    def test_predict_proba(self, toy_classification_X_y):
+        """
+        regression test
+
+        in order to conform to sklearn's API, predict_proba should emit a vector
+        for each prediction, where the first value is the probability of belonging to class 0,
+        and the second is the probability of belonging to class 1
+        """
+        X, y = toy_classification_X_y
+        X = X[:500]
+        y = y[:500]
+
+        gam = LogisticGAM().fit(X, y)
+        ps = gam.predict_proba(X)
+
+        # check shape
+        assert ps.shape == (500, 2)
+
+        # check ordering of probabilities
+        assert (np.argmax(ps, axis=1) == y).mean() > 0.5
+
+        # check sum to 1
+        assert np.allclose(ps.sum(axis=1), 1)