4. Examples

The following examples are available in a checkout of the repository in the examples/ directory.

4.1. MCMC

'''This example illustrates how to run a Markov Chain using pypmc'''

import numpy as np
import pypmc

# define a proposal
prop_dof   = 1.
prop_sigma = np.array([[0.1 , 0.  ]
                      ,[0.  , 0.02]])
prop = pypmc.density.student_t.LocalStudentT(prop_sigma, prop_dof)

# define the target; i.e., the function you want to sample from.
# In this case, it is a Gaussian with mean "target_mean" and
# covariance "target_sigma".
#
# Note that the target function "log_target" returns the log of the
# unnormalized gaussian density.
target_sigma = np.array([[0.01 , 0.003 ]
                        ,[0.003, 0.0025]])
inv_target_sigma = np.linalg.inv(target_sigma)
target_mean  = np.array([4.3, 1.1])

def unnormalized_log_pdf_gauss(x, mu, inv_sigma):
    diff = x - mu
    return -0.5 * diff.dot(inv_sigma).dot(diff)

log_target = lambda x: unnormalized_log_pdf_gauss(x, target_mean, inv_target_sigma)

# choose a bad initialization
start = np.array([-2., 10.])

# define the markov chain object
mc = pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, prop, start)

# run burn-in
mc.run(10**4)

# delete burn-in from samples
mc.clear()

# run 100,000 steps adapting the proposal every 500 steps
# hereby save the accept count which is returned by mc.run
accept_count = 0
for i in range(200):
    accept_count += mc.run(500)
    mc.adapt()

# extract a reference to the history of all visited points
values = mc.samples[:]
accept_rate = float(accept_count) / len(values)
print("The chain accepted %4.2f%% of the proposed points" % (accept_rate * 100) )

# plot the result
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

plt.hexbin(values[:,0], values[:,1], gridsize = 40, cmap='gray_r')
plt.show()

(Source code, png, hires.png, pdf)

4.2. PMC

4.2.1. Serial

'''This example shows how to use importance sampling and how to
adapt the proposal density using the pmc algorithm.

'''

import numpy as np
import pypmc


# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate


# define the initial proposal density
# In this case a three-modal gaussian used
# the initial covariances are set to the unit-matrix
# the initial component weights are set equal
initial_prop_means = []
initial_prop_means.append( np.array([ 4.0, 0.0]) )
initial_prop_means.append( np.array([-5.0, 0.0]) )
initial_prop_means.append( np.array([ 0.0, 0.0]) )
initial_prop_covariance = np.eye(2)

initial_prop_components = []
for i in range(3):
    initial_prop_components.append(pypmc.density.gauss.Gauss(initial_prop_means[i], initial_prop_covariance))

initial_proposal = pypmc.density.mixture.MixtureDensity(initial_prop_components)


# define an ImportanceSampler object
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, initial_proposal)


# draw 10,000 samples adapting the proposal every 1,000 samples
# hereby save the generating proposal component for each sample which is
# returned by sampler.run
# Note: With too few samples components may die out, and one mode might be lost.
generating_components = []
for i in range(10):
    print("\rstep", i, "...\n\t", end='')

    # draw 1,000 samples and save the generating component
    generating_components.append(sampler.run(10**3, trace_sort=True))

    # get a reference to the weights and samples that have just been generated
    samples = sampler.samples[-1]
    weights = sampler.weights[-1][:,0]

    # update the proposal using the pmc algorithm in the non Rao-Blackwellized form
    pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, weights, generating_components[-1],
                                     mincount=20, rb=True, copy=False)

print("\rsampling finished")
print(  '-----------------')
print('\n')

# print information about the adapted proposal
print('initial component weights:', initial_proposal.weights)
print('final   component weights:', sampler.proposal.weights)
print('target  component weights:', component_weights)
print()
for k, m in enumerate([mean0, mean1, None]):
    print('initial mean of component %i:' %k, initial_proposal.components[k].mu)
    print('final   mean of component %i:' %k, sampler.proposal.components[k].mu)
    print('target  mean of component %i:' %k, m)
    print()
print()
for k, c in enumerate([covariance0, covariance1, None]):
    print('initial covariance of component %i:\n' %k, initial_proposal.components[k].sigma, sep='')
    print()
    print('final   covariance of component %i:\n' %k, sampler.proposal.components[k].sigma, sep='')
    print()
    print('target  covariance of component %i:\n' %k, c, sep='')
    print('\n')


# plot results
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.xlim(-6.0, +6.000)
    plt.ylim(-0.2, +1.401)

plt.subplot(221)
plt.title('target mixture')
pypmc.tools.plot_mixture(target_mixture, cmap='jet')
set_axlimits()

plt.subplot(222)
plt.title('pmc fit')
pypmc.tools.plot_mixture(sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
set_axlimits()

plt.subplot(223)
plt.title('target mixture and pmc fit')
pypmc.tools.plot_mixture(target_mixture, cmap='jet')
pypmc.tools.plot_mixture(sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
set_axlimits()

plt.subplot(224)
plt.title('weighted samples')
plt.hist2d(sampler.samples[-1][:,0], sampler.samples[-1][:,1], weights=sampler.weights[-1][:,0], cmap='gray_r', bins=200)
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

4.2.2. Parallel

'''This example shows how to use importance sampling and how to
adapt the proposal density using the pmc algorithm in an MPI
parallel environment.
In order to have a multiprocessing enviroment invoke this script with
"mpirun -n 10 python pmc_mpi.py".

'''


from mpi4py.MPI import COMM_WORLD as comm

import numpy as np
import pypmc
import pypmc.tools.parallel_sampler # this submodule is NOT imported by ``import pypmc``

# This script is a parallelized version of the PMC example ``pmc.py``.
# The following lines just define a target density and an initial proposal.
# These steps are exactly the same as in ``pmc.py``:

# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate


# define the initial proposal density
# In this case it has three Gaussians:
# the initial covariances are set to the unit-matrix,
# the initial component weights are set equal
initial_prop_means = []
initial_prop_means.append( np.array([ 4.0, 0.0]) )
initial_prop_means.append( np.array([-5.0, 0.0]) )
initial_prop_means.append( np.array([ 0.0, 0.0]) )
initial_prop_covariance = np.eye(2)

initial_prop_components = []
for i in range(3):
    initial_prop_components.append(pypmc.density.gauss.Gauss(initial_prop_means[i], initial_prop_covariance))

initial_proposal = pypmc.density.mixture.MixtureDensity(initial_prop_components)

# -----------------------------------------------------------------------------------------------------------------------

# In ``pmc.py`` the following line defines the sequential single process sampler:
# sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, initial_proposal)
#
# We now use the parallel MPISampler instead:
SequentialIS = pypmc.sampler.importance_sampling.ImportanceSampler
parallel_sampler = pypmc.tools.parallel_sampler.MPISampler(SequentialIS, target=log_target, proposal=initial_proposal)

# Draw 10,000 samples adapting the proposal every 1,000 samples:

# make sure that every process has a different random number generator seed
if comm.Get_rank() == 0:
    seed = np.random.randint(1e5)
else:
    seed = None
seed = comm.bcast(seed)
np.random.seed(seed + comm.Get_rank())

generating_components = []
for i in range(10):
    # With the invocation "mpirun -n 10 python pmc_mpi.py", there are
    # 10 processes which means in order to draw 1,000 samples
    # ``parallel_sampler.run(1000//comm.Get_size())`` makes each process draw
    # 100 samples.
    # Hereby the generating proposal component for each sample in each process
    # is returned by ``parallel_sampler.run``.
    # In the master process, ``parallel_sampler.run`` is a list containing the
    # return values of the sequential ``run`` method of every process.
    # In all other processes, ``parallel_sampler.run`` returns the generating
    # component for its own samples only.
    last_generating_components = parallel_sampler.run(1000//comm.Get_size(), trace_sort=True)

    # In addition to the generating components, the ``sampler.run``
    # method automatically sends all samples to the master
    # process i.e. the process which fulfills comm.Get_rank() == 0.
    if comm.Get_rank() == 0:
        print("\rstep", i, "...\n\t", end='')

        # Now let PMC run only in the master process:

        # ``sampler.samples_list`` and ``sampler.weights_list`` store the weighted samples
        # sorted by the resposible process:
        # The History objects that are held by process i can be accessed via
        # ``sampler.<samples/weights>_list[i]``. The master process (i=0) also produces samples.

        # Combine the weights and samples to two arrays of 1,000 samples
        samples = np.vstack([history_item[-1] for history_item in parallel_sampler.samples_list])
        weights = np.vstack([history_item[-1] for history_item in parallel_sampler.weights_list])[:,0]

        # The latent variables are stored in ``last_generating_components``.
        # ``last_generating_components[i]`` returns an array with the generating
        # components of the samples produced by process number "i".
        # ``np.hstack(last_generating_components)`` combines the generating components
        # from all processes to one array holding all 1,000 entries.
        generating_components.append(  np.hstack(last_generating_components)  )

        # adapt the proposal using the samples from all processes
        new_proposal =  pypmc.mix_adapt.pmc.gaussian_pmc(samples, parallel_sampler.sampler.proposal,
                                                         weights, generating_components[-1],
                                                         mincount=20, rb=True)
    else:
        # In order to broadcast the ``new_proposal``, define a dummy variable in the other processes
        # see "MPI4Py tutorial", section "Collective Communication": http://mpi4py.scipy.org/docs/usrman/tutorial.html
        new_proposal = None

    # broadcast the ``new_proposal``
    new_proposal = comm.bcast(new_proposal)

    # replace the old proposal
    parallel_sampler.sampler.proposal = new_proposal

# only the master process shall print out any final information
if comm.Get_rank() == 0:
    all_samples  = np.vstack([history_item[ :] for history_item in parallel_sampler.samples_list])
    all_weights  = np.vstack([history_item[ :] for history_item in parallel_sampler.weights_list])
    last_samples = np.vstack([history_item[-1] for history_item in parallel_sampler.samples_list])
    last_weights = np.vstack([history_item[-1] for history_item in parallel_sampler.weights_list])
    print("\rsampling finished", end=', ')
    print("collected " + str(len(all_samples)) + " samples")
    print(  '------------------------------------------')
    print('\n')

    # print information about the adapted proposal
    print('initial component weights:', initial_proposal.weights)
    print('final   component weights:', parallel_sampler.sampler.proposal.weights)
    print('target  component weights:', component_weights)
    print()
    for k, m in enumerate([mean0, mean1, None]):
        print('initial mean of component %i:' %k, initial_proposal.components[k].mu)
        print('final   mean of component %i:' %k, parallel_sampler.sampler.proposal.components[k].mu)
        print('target  mean of component %i:' %k, m)
        print()
    print()
    for k, c in enumerate([covariance0, covariance1, None]):
        print('initial covariance of component %i:\n' %k, initial_proposal.components[k].sigma, sep='')
        print()
        print('final   covariance of component %i:\n' %k, parallel_sampler.sampler.proposal.components[k].sigma, sep='')
        print()
        print('target  covariance of component %i:\n' %k, c, sep='')
        print('\n')

    if comm.Get_size() == 1:
        print('******************************************************')
        print('********** NOTE: There is only one process. **********')
        print('******** try "mpirun -n 10 python pmc_mpi.py" ********')
        print('******************************************************')

    # plot results
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        print('For plotting "matplotlib" needs to be installed')
        exit(1)

    def set_axlimits():
        plt.xlim(-6.0, +6.000)
        plt.ylim(-0.2, +1.401)

    plt.subplot(221)
    plt.title('target mixture')
    pypmc.tools.plot_mixture(target_mixture, cmap='jet')
    set_axlimits()

    plt.subplot(222)
    plt.title('pmc fit')
    pypmc.tools.plot_mixture(parallel_sampler.sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
    set_axlimits()

    plt.subplot(223)
    plt.title('target mixture and pmc fit')
    pypmc.tools.plot_mixture(target_mixture, cmap='jet')
    pypmc.tools.plot_mixture(parallel_sampler.sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
    set_axlimits()

    plt.subplot(224)
    plt.title('weighted samples')
    plt.hist2d(last_samples[:,0], last_samples[:,1], weights=last_weights[:,0], cmap='gray_r', bins=200)
    set_axlimits()

    plt.tight_layout()
    plt.show()

(Source code, png, hires.png, pdf)

4.3. Grouping by Gelman-Rubin R value

'''This example illustrates how to group Markov Chains according to the
Gelman-Rubin R value (see [GR92]_).

'''

import numpy as np
import pypmc

# A Markov Chain can only explore a local mode of the target function.
# The Gelman-Rubin R value can be used to determine whether N chains
# explored the same mode. Pypmc offers a function which groups chains
# with a common R value less than some ``critical_r``.
#
# In this example, we run five Markov Chains initialized in different
# modes and then group those chains together that explored same mode.


# define a proposal
# this defines the same initial proposal for all chains
prop_dof   = 50.
prop_sigma = np.array([[0.1 , 0.  ]
                      ,[0.  , 0.02]])
prop = pypmc.density.student_t.LocalStudentT(prop_sigma, prop_dof)


# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian with well separated modes.
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate

# choose initializations for the chains
# Here we place two chains into the mode at [5, 0.01] and three into the mode at [-4,1].
# In such a setup, the chains will only explore the mode where they are initialized.
# Different random numbers are used in each chain.
starts = [np.array([4.999, 0.])] * 2   +   [np.array([-4.0001, 0.999])] * 3

# define the markov chain objects
mcs = [pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, prop, start) for start in starts]

# run and discard burn-in
for mc in mcs:
    mc.run(10**2)
    mc.clear()

# run 10,000 steps adapting the proposal every 500 steps
for mc in mcs:
    for i in range(20):
        mc.run(500)
        mc.adapt()

# extract a reference to the samples from all chains
stacked_values = [mc.samples[:] for mc in mcs]

# find the chain groups
# chains 0 and 1 are initialized in the same mode (at [5, 0.01])
# chains 2, 3 and 4 are initialized in the same mode (at [-4, 0])
# expect chain groups:
expected_groups = [[0,1], [2,3,4]]

# R value calculation only needs the means, variances (diagonal
# elements of covariance matrix) and number of samples,
# axis=0 ensures that we get variances separately for each parameter.
# critical_r can be set manually, here the default value is used
found_groups = pypmc.mix_adapt.r_value.r_group([np.mean(chain, axis=0) for chain in stacked_values],
                                               [np.var (chain, axis=0) for chain in stacked_values],
                                               len(stacked_values[0]))

# print the result
print("Expect %s" % expected_groups)
print("Have   %s"    % found_groups)

# Hint: ``stacked_values`` is an example of what `pypmc.mix_adapt.r_value.make_r_gaussmix()` expects as ``data``
result = pypmc.mix_adapt.r_value.make_r_gaussmix(stacked_values)

try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

pypmc.tools.plot_mixture(result, cmap='jet')
plt.show()

(Source code, png, hires.png, pdf)

4.4. Variational Bayes

'''This example shows how to generate a "best fit" Gaussian mixture density
from data using variational Bayes.

'''

## in this example, we will:
## 1. Define a Gaussian mixture
## 2. Generate demo data from that Gaussian mixture
## 3. Generate a Gaussian mixture out of the data
## 4. Plot the original and the generated mixture


import numpy as np
import pypmc



# -------------------- 1. Define a Gaussian mixture --------------------

component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mix = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)



# -------------------- 2. Generate demo data ---------------------------

data = target_mix.propose(500)



# -------------------- 3. Adapt a Gaussian mixture ---------------------

# maximum number of components
K = 20

# Create a "GaussianInference" object.
# The following command passes just the two essential arguments to "GaussianInference":
# The ``data`` and a maximum number of ``components``.
# For reasonable results in more complicated settings, a careful choice for ``W0``
# is crucial. As a rule of thumb, choose ``inv(W0)`` much smaller than the expected
# covariance. In this case, however, the default (``W0`` = unit matrix) is good enough.
vb = pypmc.mix_adapt.variational.GaussianInference(data, K)

# adapt the variational parameters
converged = vb.run(100, verbose=True)
print('-----------------------------')

# generate a Gaussian mixture with the most probable parameters
fit_mixture = vb.make_mixture()


# -------------------- 4. Plot/print results ---------------------------

if converged is None:
    print('\nThe adaptation did not converge.\n')
else:
    print('\nConverged after %i iterations.\n' %converged)

print("final  component weights: " + str(fit_mixture.weights))
print("target component weights: " + str( target_mix.weights))

try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.xlim(-6.0, +6.000)
    plt.ylim(-0.2, +1.401)

plt.subplot(221)
plt.title('target mixture')
pypmc.tools.plot_mixture(target_mix, cmap='winter')
set_axlimits()

plt.subplot(222)
plt.title('"best fit"')
pypmc.tools.plot_mixture(fit_mixture, cmap='nipy_spectral')
set_axlimits()

plt.subplot(223)
plt.title('target mixture and "best fit"')
pypmc.tools.plot_mixture(target_mix, cmap='winter')
pypmc.tools.plot_mixture(fit_mixture, cmap='nipy_spectral')
set_axlimits()

plt.subplot(224)
plt.title('data')
plt.hexbin(data[:,0], data[:,1], cmap='gray_r')
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

4.5. Mixture reduction

'''Demonstrate the usage of hierarchical clustering and variational
Bayes (VBMerge) to reduce a given Gaussian mixture to a Gaussian
mixture with a reduced number of components.

'''

import numpy as np
from scipy.stats import chi2
import pypmc

# dimension
D = 2

# number of components
K = 400

# Wishart parameters: mean W, degree of freedom nu
W = np.eye(D)
nu = 5

# "draw" covariance matrices from Wishart distribution
def wishart(nu, W):
    dim = W.shape[0]
    chol = np.linalg.cholesky(W)
    tmp = np.zeros((dim,dim))
    for i in range(dim):
        for j in range(i+1):
            if i == j:
                tmp[i,j] = np.sqrt(chi2.rvs(nu-(i+1)+1))
            else:
                tmp[i,j] = np.random.normal(0,1)
    return np.dot(chol, np.dot(tmp, np.dot(tmp.T, chol.T)))

covariances = [wishart(nu, W) for k in range(K)]

# put components at positions drawn from a Gaussian around mu
mu = np.zeros(D)
means = [np.random.multivariate_normal(mu, sigma) for sigma in covariances]

# equal weights for every component
weights = np.ones(K)

# weights are automatically normalized
input_mixture = pypmc.density.mixture.create_gaussian_mixture(means, covariances, weights)

# create initial guess from first K_out components
K_out = 10
initial_guess = pypmc.density.mixture.create_gaussian_mixture(means[:K_out], covariances[:K_out], weights[:K_out])

###
# hierarchical clustering
#
# - the output closely resembles the initial guess
# - components laid out spherically symmetric
# - every component is preserved
###
h = pypmc.mix_adapt.hierarchical.Hierarchical(input_mixture, initial_guess)
h.run(verbose=True)

###
# VBMerge
#
# - N is the number of samples that gave rise to the input mixture. It
#   is arbitrary, so play around with it. You might have to adjust the
#   ``prune`` parameter in the ``run()`` method
# - only one component survives, again it is spherically symmetric
###
vb = pypmc.mix_adapt.variational.VBMerge(input_mixture, N=1000,
                                         initial_guess=initial_guess)
print()
print("Start variational Bayes:")
vb.run(verbose=True)

# plot results
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.gca().set_aspect('equal')
    plt.xlim(-12.0, +12.0)
    plt.ylim(-12.0, +12.0)

plt.subplot(221)
plt.title('input mixture')
pypmc.tools.plot_mixture(input_mixture)
set_axlimits()

plt.subplot(222)
plt.title('initial guess')
pypmc.tools.plot_mixture(initial_guess)
set_axlimits()

plt.subplot(223)
plt.title('variational Bayes')
pypmc.tools.plot_mixture(vb.make_mixture(), cmap='autumn')
set_axlimits()

plt.subplot(224)
plt.title('hierarchical output')
pypmc.tools.plot_mixture(h.g)
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

4.6. MCMC + variational Bayes

'''This example illustrates how pypmc can be used to integrate a
non-negative function. The presented algorithm needs very little
analytical knowledge about the function.

'''

import numpy as np
import pypmc

# The idea is to find a good proposal function for importance sampling
# with as little information about the target function as possible.
#
# In this example we will first map out regions of interest using Markov
# chains, then we use the variational Bayes to approximate the target
# with a Gaussian mixture.

# *************************** Important: ***************************
# * The target function must be defined such that it returns the   *
# * log of the function of interest. The methods we use imply that *
# * the function is interpreted as an unnormalized probability     *
# * density.                                                       *
# ******************************************************************

# Define the target; i.e., the function you want to sample from.  In
# this case, it is a Student's t mixture of three components with
# different degrees of freedom. They are located close to each other.
# If you want a multimodal target, adjust the means.

dim = 2

mean0       = np.array ([-6.0,  7.3  ])
covariance0 = np.array([[ 0.8, -0.3 ],
                        [-0.3,  1.25]])

mean1       = np.array ([-7.0,  8.0   ])
covariance1 = np.array([[ 0.5,  0.    ],
                        [ 0. ,  0.2  ]])

mean2       = np.array ([-8.5,  7.5   ])
covariance2 = np.array([[ 0.5,  0.2   ],
                        [ 0.2,  0.2  ]])

component_weights = np.array([0.3, 0.4, 0.3])
component_means = [mean0, mean1, mean2]
component_covariances = [covariance0, covariance1, covariance2]
dofs = [13, 17, 5]

target_mixture = pypmc.density.mixture.create_t_mixture(component_means, component_covariances, dofs, component_weights)
log_target = target_mixture.evaluate

# Now we suppose that we only have the following knowledge about the
# target function: its regions of interest are at a distance of no more
# than order ten from zero.

# Now we try to find these with Markov chains.  We have to deal with
# the fact that there may be modes separated by regions of very low
# probability. It is thus unlikely that a single chain explores more
# than one mode in such a case. To deal with this multimodality, we
# start several chains and hope that they find all modes.  We will
# start ten Markov chains at random positions in the square
# [(-10,-10), (+10,+10)].
starts = [np.random.uniform(-10,10, size=dim) for i in range(10)]

# For a local-random-walk Markov chain, we also need an initial
# proposal.  Here, we take a gaussian with initial covariance
# diag(1e-3).  The initial covariance should be chosen such that it is
# of the same order as the real covariance of the mode to be mapped
# out. For a Gaussian target, the overall scale should
# decrease as 2.38^2/d as the dimension d increases to achieve an
# acceptance rate around 20%.
mc_prop = pypmc.density.gauss.LocalGauss(np.eye(dim) * 2.38**2 / dim)
mcs = [pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, mc_prop, start) for start in starts]

print('running Markov chains ...')

# In general we need to let the chain move to regions of high
# probability, these samples are not representative, so we discard them
# as burn-in. Then we let the Markov chains map out the regions of
# interest. The samples are used to adapt the proposal covariance to
# yield a satisfactory acceptance rate.
for mc in mcs:
    for i in range(20):
        mc.run(500)
        mc.adapt()
        if i == 0:
            mc.clear()

mc_samples_sorted_by_chain = [mc.samples[:] for mc in mcs]
mc_samples = np.vstack(mc_samples_sorted_by_chain)

means = np.zeros((len(mcs), dim))
variances = np.zeros_like(means)

for i,mc in enumerate(mc_samples_sorted_by_chain):
    means[i] = mc.mean(axis=0)
    variances[i] = mc.var(axis=0)

# Now we use the Markov chain samples to generate a mixture proposal
# function for importance sampling. For this purpose, we choose the
# variational Bayes algorithm that takes samples and an initial guess
# of the mixture as input. To create the initial guess, we group all
# chains that mixed, and create 10 components per group. For a
# unimodal target, all chains should mix. For more information about
# the following call, check the example "Grouping by Gelman-Rubin R
# value"(r-group.py) or the reference documentation.
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mc_samples_sorted_by_chain, K_g=10)

# Comments on arguments:
#   o mc_samples[::100]   - Samples in the Markov chains are strongly correlated
#                           => thin the samples to get approx. independent samples
#   o W0=np.eye(dim)*1e10 - The resulting covariance matrices can be very
#                           sensitive to W0. Its inverse should be chosen much
#                           smaller than the actual covariance. If it is too small,
#                           W0 will dominate the resulting covariances and
#                           usually lead to very bad results.
vb = pypmc.mix_adapt.variational.GaussianInference(mc_samples[::100], initial_guess=long_patches, W0=np.eye(dim)*1e10)

# When we run variational Bayes, we want unneccessary components to be
# automatically pruned. The prune parameter sets how many samples a
# component must effectively have to be considered important. The rule
# of thumb employed here proved good in our experiments.
vb_prune = 0.5 * len(vb.data) / vb.K

# Run the variational Bayes for at most 1,000 iterations.  But if the
# lower bound of the model evidence changes by less than `rel_tol`,
# convergence is declared before. If we increase `rel_tol` to 1e-4, it
# takes less iterations but potentially more (useless) components
# survive the pruning. The trade-off depends on the complexity of the
# problem.
print('running variational Bayes ...')
vb.run(1000, rel_tol=1e-8, abs_tol=1e-5, prune=vb_prune, verbose=True)

# extract the most probable Gaussian mixture given the samples
vbmix = vb.make_mixture()

# Now we can instantiate an importance sampler. We draw 1,000
# importance samples and use these for a proposal update using
# variational Bayes again. In case there are multiple modes and
# the chains did not mix, we need this step to infer the right
# component weights because the component weight is given by
# how many chains it attracted, which could be highly dependent
# on the starting points and independent of the correct
# probability mass.
print('running importance sampling ...')
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
sampler.run(1000)

# The variational Bayes allows us, unlike PMC, to include the
# information gained by the Markov chains in subsequent proposal
# updates. We know that we cannot trust the component weights obtained
# by the chains. Nevertheless, we can rely on the means and covariances. The
# following lines show how to code that into the variational Bayes by
# removing the hyperparameter `alpha0` that encodes the component
# weights.
prior_for_proposal_update = vb.posterior2prior()
prior_for_proposal_update.pop('alpha0')
vb2 = pypmc.mix_adapt.variational.GaussianInference(sampler.samples[:],
                                                    initial_guess=vbmix,
                                                    weights=sampler.weights[:][:,0],
                                                    **prior_for_proposal_update)

# Note: This time we leave "prune" at the default value "1" because we
#       want to keep all components that are expected to contribute
#       with at least one effective sample per importance sampling run.
print('running variational Bayes ...')
vb2.run(1000, rel_tol=1e-8, abs_tol=1e-5, verbose=True)
vb2mix = vb2.make_mixture()

# Now we draw another 10,000 samples with the updated proposal
sampler.proposal = vb2mix
print('running importance sampling ...')
sampler.run(10**4)

# We can combine the samples and weights from the two runs, see reference [Cor+12].
weights = pypmc.sampler.importance_sampling.combine_weights([samples[:]      for samples in sampler.samples],
                                                            [weights[:][:,0] for weights in sampler.weights],
                                                            [vbmix, vb2mix]                                 ) \
                                                            [:][:,0]
samples = sampler.samples[:]

# The integral can then be estimated from the weights. The error is also
# estimated from the weights. By the central limit theorem, the integral
# estimator has a gaussian distribution.
integral_estimator = weights.sum() / len(weights)
integral_uncertainty_estimator = np.sqrt((weights**2).sum() / len(weights) - integral_estimator**2) / np.sqrt(len(weights)-1)

print('analytical integral = 1')
print('estimated  integral =', integral_estimator, '+-', integral_uncertainty_estimator)

# Let's see how good the proposal matches the target density: the closer
# the values of perplexity and effective sample size (ESS) are to 1,
# the better.  Outliers, or samples out in the tails of the target
# with a very large weight, show up in the 2D marginal and reduce the
# ESS significantly. For the above integral estimate to be right on
# average, they are 'needed'. Without outliers (most of the time), the
# integral is a tad too small.
print('perplexity', pypmc.tools.convergence.perp(weights))
print('effective sample size', pypmc.tools.convergence.ess(weights))

# As mentioned in the very beginning, the methods we applied reinterpret
# the target function as an unnormalized probability density.
# In addition to the integral, we also get weighted samples distributed according
# to that probability density.
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

plt.figure()
plt.hist2d(samples[:,0], samples[:,1], weights=weights, bins=100, cmap='gray_r')
pypmc.tools.plot_mixture(sampler.proposal, visualize_weights=True, cmap='jet')
plt.colorbar()
plt.clim(0.0, 1.0)
plt.title('colors visualize component weights')
plt.show()

(Source code, png, hires.png, pdf)

_images/uniting_markov_chains_and_variational_bayes.png