Skip to content
16 changes: 14 additions & 2 deletions discretesampling/base/algorithms/mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import copy
from tqdm.auto import tqdm
from discretesampling.base.random import RNG
from discretesampling.base.output import MCMCOutput


class DiscreteVariableMCMC():
Expand All @@ -12,15 +13,20 @@ def __init__(self, variableType, target, initialProposal):
self.initialProposal = initialProposal
self.target = target

def sample(self, N, seed=0, verbose=True):
def sample(self, N, N_warmup=None, seed=0, include_warmup=True, verbose=True):
rng = RNG(seed)

if N_warmup is None:
N_warmup = int(N/2)

initialSample = self.initialProposal.sample(rng)
current = initialSample

samples = []

display_progress_bar = verbose
progress_bar = tqdm(total=N, desc="MCMC sampling", disable=not display_progress_bar)
acceptance_probabilities = []

for i in range(N):
forward_proposal = self.proposalType(current, rng)
Expand Down Expand Up @@ -49,7 +55,13 @@ def sample(self, N, seed=0, verbose=True):
pass

samples.append(copy.copy(current))
acceptance_probabilities.append(acceptance_probability)
progress_bar.update(1)

if not include_warmup:
samples = samples[(N_warmup-1):N]
acceptance_probabilities = acceptance_probabilities[(N_warmup-1):N]

results = MCMCOutput(samples, acceptance_probabilities, include_warmup, N, N_warmup)
progress_bar.close()
return samples
return results
10 changes: 8 additions & 2 deletions discretesampling/base/algorithms/smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from discretesampling.base.algorithms.smc_components.normalisation import normalise
from discretesampling.base.algorithms.smc_components.effective_sample_size import ess
from discretesampling.base.algorithms.smc_components.resampling import systematic_resampling
from discretesampling.base.output import SMCOutput


class DiscreteVariableSMC():
Expand All @@ -29,7 +30,7 @@ def __init__(self, variableType, target, initialProposal,
self.initialProposal = initialProposal
self.target = target

def sample(self, Tsmc, N, seed=0, verbose=True):
def sample(self, Tsmc, N, seed=0, gather_results=True, verbose=True):
loc_n = int(N/self.exec.P)
rank = self.exec.rank
mvrs_rng = RNG(seed)
Expand Down Expand Up @@ -79,5 +80,10 @@ def sample(self, Tsmc, N, seed=0, verbose=True):
current_particles = new_particles
progress_bar.update(1)

if gather_results:
current_particles = self.exec.gather_all(current_particles)
logWeights = self.exec.gather(logWeights, [N])

results = SMCOutput(current_particles, logWeights)
progress_bar.close()
return current_particles
return results
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np
from discretesampling.base.executor import Executor


def ess(logw, exec):
def ess(logw, exec: Executor = Executor()):
"""
Description
-----------
Expand Down
3 changes: 3 additions & 0 deletions discretesampling/base/executor/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ def redistribute(self, particles, ncopies):
[[particles[i]]*ncopies[i] for i in range(len(particles))]
))
return particles

def gather_all(self, particles):
return particles
5 changes: 5 additions & 0 deletions discretesampling/base/executor/executor_MPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
from scipy.special import logsumexp
from discretesampling.base.executor import Executor
from discretesampling.base.util import gather_all
from discretesampling.base.executor.MPI.distributed_fixed_size_redistribution.prefix_sum import (
inclusive_prefix_sum
)
Expand Down Expand Up @@ -61,3 +62,7 @@ def cumsum(self, x):

def redistribute(self, particles, ncopies):
return variable_size_redistribution(particles, ncopies, self)

def gather_all(self, particles):
particles = gather_all(particles, exec=self)
return particles
3 changes: 3 additions & 0 deletions discretesampling/base/output/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .base import BaseOutput # noqa
from .smc_output import SMCOutput # noqa
from .mcmc_output import MCMCOutput # noqa
13 changes: 13 additions & 0 deletions discretesampling/base/output/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from abc import ABC
import numpy as np


class BaseOutput(ABC):
def __init__(self, samples):
self.samples = samples
self.log_weights = np.repeat([-len(samples)], len(samples))

def __eq__(self, other):
samples_eq = np.array_equal(self.samples, other.samples)
weights_eq = np.array_equal(self.log_weights, other.log_weights)
return samples_eq and weights_eq
24 changes: 24 additions & 0 deletions discretesampling/base/output/mcmc_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
from discretesampling.base.types import DiscreteVariable
from discretesampling.base.output import BaseOutput


class MCMCOutput(BaseOutput):
def __init__(
self,
samples: list[DiscreteVariable],
acceptance_probabilities=None,
include_warmup=None,
N=None, N_warmup=None
):
super().__init__(samples)
self.acceptance_probabilities = acceptance_probabilities
self.include_warmup = include_warmup
if N is None:
N = len(samples)
self.N = N
self.N_warmup = N_warmup
acceptance_rate = None
if acceptance_probabilities is not None:
acceptance_rate = np.mean(acceptance_probabilities)
self.acceptance_rate = acceptance_rate
17 changes: 17 additions & 0 deletions discretesampling/base/output/smc_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
from typing import Union
from discretesampling.base.types import DiscreteVariable
from discretesampling.base.output import BaseOutput
from discretesampling.base.algorithms.smc_components import ess


class SMCOutput(BaseOutput):
def __init__(
self,
samples: list[DiscreteVariable],
log_weights: Union[np.ndarray, None] = None
):
super().__init__(samples)
if log_weights is not None:
self.log_weights = log_weights
self.ess = ess(self.log_weights)
6 changes: 3 additions & 3 deletions examples/additive_structure_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def func(x):


# Comparison between the three methods based on sampling the true structure
mcmc = [x.discrete_set for x in asSamplesMCMC]
lkern = [x.discrete_set for x in asSamplesLk]
smc = [x.discrete_set for x in asSamplesSMC]
mcmc = [x.discrete_set for x in asSamplesMCMC.samples]
lkern = [x.discrete_set for x in asSamplesLk.samples]
smc = [x.discrete_set for x in asSamplesSMC.samples]

mcmc.count(true_str)
lkern.count(true_str)
Expand Down
4 changes: 2 additions & 2 deletions examples/decision_tree_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
try:
treeSamples = dtMCMC.sample(500)

mcmcLabels = dt.stats(treeSamples, X_test).predict(X_test, use_majority=True)
mcmcLabels = dt.stats(treeSamples.samples, X_test).predict(X_test, use_majority=True)
mcmcAccuracy = [dt.accuracy(y_test, mcmcLabels)]
print("MCMC mean accuracy: ", (mcmcAccuracy))
except ZeroDivisionError:
Expand All @@ -40,7 +40,7 @@
try:
treeSMCSamples = dtSMC.sample(10, 1000)

smcLabels = dt.stats(treeSMCSamples, X_test).predict(X_test, use_majority=True)
smcLabels = dt.stats(treeSMCSamples.samples, X_test).predict(X_test, use_majority=True)
smcAccuracy = [dt.accuracy(y_test, smcLabels)]
print("SMC mean accuracy: ", np.mean(smcAccuracy))

Expand Down
2 changes: 1 addition & 1 deletion examples/mcmc_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

try:
treeSamples = dtMCMC.sample(N=1000)
mcmcLabels = dt.stats(treeSamples[500:999], X_test).predict(X_test)
mcmcLabels = dt.stats(treeSamples.samples[500:999], X_test).predict(X_test)
mcmc_acc = dt.accuracy(y_test, mcmcLabels)
print(numpy.mean(mcmc_acc))
except ZeroDivisionError:
Expand Down
6 changes: 1 addition & 5 deletions examples/mpi/decision_tree_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from discretesampling.base.algorithms import DiscreteVariableSMC
from discretesampling.domain import decision_tree as dt
from discretesampling.base.executor.executor_MPI import Executor_MPI
from discretesampling.base.util import gather_all

data = datasets.load_wine()

Expand Down Expand Up @@ -35,10 +34,7 @@
MPI.COMM_WORLD.Barrier()
end = MPI.Wtime()

if MPI.COMM_WORLD.Get_size() > 1:
treeSMCSamples = gather_all(treeSMCSamples, exec)

smcLabels = dt.stats(treeSMCSamples, X_test).predict(X_test)
smcLabels = dt.stats(treeSMCSamples.samples, X_test).predict(X_test)
smcAccuracy = dt.accuracy(y_test, smcLabels)

if MPI.COMM_WORLD.Get_rank() == 0:
Expand Down
5 changes: 1 addition & 4 deletions examples/mpi/regression_tree_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from discretesampling.base.algorithms import DiscreteVariableSMC
from discretesampling.domain import decision_tree as dt
from discretesampling.base.executor.executor_MPI import Executor_MPI
from discretesampling.base.util import gather_all

data = datasets.load_diabetes()

Expand All @@ -32,10 +31,8 @@
treeSMCSamples = dtSMC.sample(T, N, seed)
MPI.COMM_WORLD.Barrier()
end = MPI.Wtime()
if MPI.COMM_WORLD.Get_size() > 1:
treeSMCSamples = gather_all(treeSMCSamples, exec)

smcLabels = dt.RegressionStats(treeSMCSamples, X_test).predict(X_test)
smcLabels = dt.RegressionStats(treeSMCSamples.samples, X_test).predict(X_test)
smcAccuracy = dt.accuracy_mse(y_test, smcLabels)

if MPI.COMM_WORLD.Get_rank() == 0:
Expand Down
4 changes: 2 additions & 2 deletions examples/regression_decision_tree_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
try:
treeSamples = dtMCMC.sample(100)

mcmcLabels = dt.RegressionStats(treeSamples, X_test).predict(X_test, use_majority=True)
mcmcLabels = dt.RegressionStats(treeSamples.samples, X_test).predict(X_test, use_majority=True)
mcmcAccuracy = [dt.accuracy_mse(y_test, mcmcLabels)]

print("MCMC mean MSE: ", (mcmcAccuracy))
Expand All @@ -31,7 +31,7 @@
try:
treeSMCSamples = dtSMC.sample(10, 10)

smcLabels = dt.RegressionStats(treeSMCSamples, X_test).predict(X_test, use_majority=True)
smcLabels = dt.RegressionStats(treeSMCSamples.samples, X_test).predict(X_test, use_majority=True)
smcAccuracy = [dt.accuracy_mse(y_test, smcLabels)]

print("SMC mean MSE: ", (smcAccuracy))
Expand Down
2 changes: 1 addition & 1 deletion examples/smc_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

try:
treeSamples = dtSMC.sample(10, 10)
smcLabels = dt.stats(treeSamples, X_test).predict(X_test, use_majority=True)
smcLabels = dt.stats(treeSamples.samples, X_test).predict(X_test, use_majority=True)
smc_acc = dt.accuracy(y_test, smcLabels)
print(numpy.mean(smc_acc))
except ZeroDivisionError:
Expand Down
10 changes: 8 additions & 2 deletions tests/test_mcmc.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import pytest
from discretesampling.base.algorithms import DiscreteVariableMCMC
from discretesampling.domain import spectrum
from discretesampling.base.output import MCMCOutput


@pytest.mark.parametrize(
"seed,T,expected",
[(0, 10, [spectrum.SpectrumDimension(i) for i in [31, 30, 30, 30, 30, 29, 28, 27, 28, 27]]),
(1, 10, [spectrum.SpectrumDimension(i) for i in [27, 28, 27, 26, 25, 26, 27, 26, 25, 24]])]
[
(0, 10, MCMCOutput(
[spectrum.SpectrumDimension(i) for i in [31, 30, 30, 30, 30, 29, 28, 27, 28, 27]]
)),
(1, 10, MCMCOutput(
[spectrum.SpectrumDimension(i) for i in [27, 28, 27, 26, 25, 26, 27, 26, 25, 24]]))
]

)
def test_mcmc(seed, T, expected):
Expand Down
42 changes: 35 additions & 7 deletions tests/test_smc.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,31 @@
import pytest
import numpy as np
from discretesampling.base.algorithms import DiscreteVariableSMC
from discretesampling.base.util import gather_all
from discretesampling.base.executor.executor_MPI import Executor_MPI
from discretesampling.domain import spectrum
from discretesampling.base.output import SMCOutput

# Test reproducibility


@pytest.mark.parametrize(
"seed,T,N,expected",
[(0, 3, 16, [spectrum.SpectrumDimension(i) for i in [15, 13, 15, 6, 6, 14, 8, 8, 8, 6, 6, 6, 14, 14, 12, 10]]),
(1, 3, 16, [spectrum.SpectrumDimension(i) for i in [13, 15, 15, 6, 2, 6, 18, 8, 6, 6, 6, 8, 14, 12, 10, 12]])]
[
(0, 3, 16, SMCOutput(
[spectrum.SpectrumDimension(i) for i in [15, 13, 15, 6, 6, 14, 8, 8, 8, 6, 6, 6, 14, 14, 12, 10]],
np.array([-3.3423949417970995, -2.6927167512983843, -3.3423949417970995, -2.5485352600349263, -2.5485352600349263,
-1.7791312350561306, -2.807685770214025, -2.807685770214025, -2.807685770214025, -3.2906867661536747,
-3.2906867661536747, -3.2906867661536747, -3.286288420776657, -3.286288420776657, -2.753770144750778,
-2.4895429738738315])
)),
(1, 3, 16, SMCOutput(
[spectrum.SpectrumDimension(i) for i in [13, 15, 15, 6, 2, 6, 18, 8, 6, 6, 6, 8, 14, 12, 10, 12]],
np.array([-2.577088421206954, -3.2267666117056693, -3.2267666117056693, -2.432906929943496, -5.389379226012296,
-2.432906929943496, -3.37139647864896, -2.692057440122595, -3.1750584360622445, -3.1750584360622445,
-3.1750584360622445, -2.692057440122595, -3.1706600906852267, -2.6381418146593476, -2.3739146437824012,
-2.6381418146593476])
)
)]
)
def test_smc(seed, T, N, expected):
target = spectrum.SpectrumDimensionTarget(10, 3.4) # NB with mean 10 and variance 3.4^2
Expand All @@ -25,15 +40,28 @@ def test_smc(seed, T, N, expected):
@pytest.mark.mpi
@pytest.mark.parametrize(
"seed,T,N,expected",
[(0, 3, 16, [spectrum.SpectrumDimension(i) for i in [15, 13, 15, 6, 6, 14, 8, 8, 8, 6, 6, 6, 14, 14, 12, 10]]),
(1, 3, 16, [spectrum.SpectrumDimension(i) for i in [13, 15, 15, 6, 2, 6, 18, 8, 6, 6, 6, 8, 14, 12, 10, 12]])]
[
(0, 3, 16, SMCOutput(
[spectrum.SpectrumDimension(i) for i in [15, 13, 15, 6, 6, 14, 8, 8, 8, 6, 6, 6, 14, 14, 12, 10]],
np.array([-3.3423949417970995, -2.6927167512983843, -3.3423949417970995, -2.5485352600349263, -2.5485352600349263,
-1.7791312350561306, -2.807685770214025, -2.807685770214025, -2.807685770214025, -3.2906867661536747,
-3.2906867661536747, -3.2906867661536747, -3.286288420776657, -3.286288420776657, -2.753770144750778,
-2.4895429738738315])
)),
(1, 3, 16, SMCOutput(
[spectrum.SpectrumDimension(i) for i in [13, 15, 15, 6, 2, 6, 18, 8, 6, 6, 6, 8, 14, 12, 10, 12]],
np.array([-2.577088421206954, -3.2267666117056693, -3.2267666117056693, -2.432906929943496, -5.389379226012296,
-2.432906929943496, -3.37139647864896, -2.692057440122595, -3.1750584360622445, -3.1750584360622445,
-3.1750584360622445, -2.692057440122595, -3.1706600906852267, -2.6381418146593476, -2.3739146437824012,
-2.6381418146593476])
))
]
)
def test_smc_MPI(seed, T, N, expected):
target = spectrum.SpectrumDimensionTarget(10, 3.4) # NB with mean 10 and variance 3.4^2
initialProposal = spectrum.SpectrumDimensionInitialProposal(50) # Uniform sampling from 0-50

exec = Executor_MPI()
specSMC = DiscreteVariableSMC(spectrum.SpectrumDimension, target, initialProposal, exec=exec)
local_samples = specSMC.sample(T, N, seed=seed)
samples = gather_all(local_samples, exec)
samples = specSMC.sample(T, N, seed=seed)
assert samples == expected