Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions pygam/pygam.py
Original file line number Diff line number Diff line change
Expand Up @@ -2436,7 +2436,7 @@ def predict(self, X):

def predict_proba(self, X):
"""
preduct targets given model and input X
predict target probabilities given model and input X

Parameters
---------
Expand All @@ -2445,10 +2445,31 @@ def predict_proba(self, X):

Returns
-------
y : np.array of shape (n_samples,)
y : np.array of shape (n_samples, 2)
containing expected values under the model
"""
return self.predict_mu(X)
p = self.predict_mu(X).ravel()
return np.c_[1-p, p]

def decision_function(self, X, *args, **kwargs):
"""
minimal alias of the linear prediction, for compatibility with sklearn
multiclass classification classes.

Parameters
---------
X : array-like of shape (n_samples, m_features), optional (default=None
containing the input dataset

Returns
-------
lp : np.array of shape (n_samples,)

See Also
--------
_linear_predictor : for clarification
"""
return self._linear_predictor(X, *args, **kwargs)


class PoissonGAM(GAM):
Expand Down
35 changes: 35 additions & 0 deletions pygam/tests/test_GAM_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ def test_LogisticGAM_accuracy(default_X_y):
acc1 = gam.accuracy(X, y)
assert(acc0 == acc1)

def test_LogisticGAM_decision_function(default_X_y):
"""
check that we can compute sklearn's decision function
"""
X, y = default_X_y
gam = LogisticGAM().fit(X, y)

lin_pred = gam._linear_predictor(X)
dec_func = gam.decision_function(X)
assert(lin_pred == dec_func).all()

def test_PoissonGAM_exposure(coal_X_y):
"""
check that we can fit a Poisson GAM with exposure, and it scales predictions
Expand Down Expand Up @@ -532,3 +543,27 @@ def test_r_squared_for_new_dataset(self, mcycle_gam, mcycle_X_y):
"""
X, y = mcycle_X_y
mcycle_gam._estimate_r2(X, y)

def test_predict_proba(self, toy_classification_X_y):
"""
regression test

in order to conform to sklearn's API, predict_proba should emit a vector
for each prediction, where the first value is the probability of belonging to class 0,
and the second is the probability of belonging to class 1
"""
X, y = toy_classification_X_y
X = X[:500]
y = y[:500]

gam = LogisticGAM().fit(X, y)
ps = gam.predict_proba(X)

# check shape
assert ps.shape == (500, 2)

# check ordering of probabilities
assert (np.argmax(ps, axis=1) == y).mean() > 0.5

# check sum to 1
assert np.allclose(ps.sum(axis=1), 1)