4. Examples

The following examples are available in a checkout of the repository in the examples/ directory.

4.1. MCMC

'''This example illustrates how to run a Markov Chain using pypmc'''

import numpy as np
import pypmc

# define a proposal
prop_dof   = 1.
prop_sigma = np.array([[0.1 , 0.  ]
                      ,[0.  , 0.02]])
prop = pypmc.density.student_t.LocalStudentT(prop_sigma, prop_dof)

# define the target; i.e., the function you want to sample from.
# In this case, it is a Gaussian with mean "target_mean" and
# covariance "target_sigma".
#
# Note that the target function "log_target" returns the log of the
# unnormalized gaussian density.
target_sigma = np.array([[0.01 , 0.003 ]
                        ,[0.003, 0.0025]])
inv_target_sigma = np.linalg.inv(target_sigma)
target_mean  = np.array([4.3, 1.1])

def unnormalized_log_pdf_gauss(x, mu, inv_sigma):
    diff = x - mu
    return -0.5 * diff.dot(inv_sigma).dot(diff)

log_target = lambda x: unnormalized_log_pdf_gauss(x, target_mean, inv_target_sigma)

# choose a bad initialization
start = np.array([-2., 10.])

# define the markov chain object
mc = pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, prop, start)

# run burn-in
mc.run(10**4)

# delete burn-in from samples
mc.clear()

# run 100,000 steps adapting the proposal every 500 steps
# hereby save the accept count which is returned by mc.run
accept_count = 0
for i in range(200):
    accept_count += mc.run(500)
    mc.adapt()

# extract a reference to the history of all visited points
values = mc.samples[:]
accept_rate = float(accept_count) / len(values)
print("The chain accepted %4.2f%% of the proposed points" % (accept_rate * 100) )

# plot the result
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

plt.hexbin(values[:,0], values[:,1], gridsize = 40, cmap='gray_r')
plt.show()

(Source code, png, hires.png, pdf)

_images/markov_chain.png

4.2. PMC

4.2.1. Serial

'''This example shows how to use importance sampling and how to
adapt the proposal density using the pmc algorithm.

'''

import numpy as np
import pypmc


# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate


# define the initial proposal density
# In this case a three-modal gaussian used
# the initial covariances are set to the unit-matrix
# the initial component weights are set equal
initial_prop_means = []
initial_prop_means.append( np.array([ 4.0, 0.0]) )
initial_prop_means.append( np.array([-5.0, 0.0]) )
initial_prop_means.append( np.array([ 0.0, 0.0]) )
initial_prop_covariance = np.eye(2)

initial_prop_components = []
for i in range(3):
    initial_prop_components.append(pypmc.density.gauss.Gauss(initial_prop_means[i], initial_prop_covariance))

initial_proposal = pypmc.density.mixture.MixtureDensity(initial_prop_components)


# define an ImportanceSampler object
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, initial_proposal)


# draw 10,000 samples adapting the proposal every 1,000 samples
# hereby save the generating proposal component for each sample which is
# returned by sampler.run
# Note: With too few samples components may die out, and one mode might be lost.
generating_components = []
for i in range(10):
    print("\rstep", i, "...\n\t", end='')

    # draw 1,000 samples and save the generating component
    generating_components.append(sampler.run(10**3, trace_sort=True))

    # get a reference to the weights and samples that have just been generated
    samples = sampler.samples[-1]
    weights = sampler.weights[-1][:,0]

    # update the proposal using the pmc algorithm in the non Rao-Blackwellized form
    pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, weights, generating_components[-1],
                                     mincount=20, rb=True, copy=False)

print("\rsampling finished")
print(  '-----------------')
print('\n')

# print information about the adapted proposal
print('initial component weights:', initial_proposal.weights)
print('final   component weights:', sampler.proposal.weights)
print('target  component weights:', component_weights)
print()
for k, m in enumerate([mean0, mean1, None]):
    print('initial mean of component %i:' %k, initial_proposal.components[k].mu)
    print('final   mean of component %i:' %k, sampler.proposal.components[k].mu)
    print('target  mean of component %i:' %k, m)
    print()
print()
for k, c in enumerate([covariance0, covariance1, None]):
    print('initial covariance of component %i:\n' %k, initial_proposal.components[k].sigma, sep='')
    print()
    print('final   covariance of component %i:\n' %k, sampler.proposal.components[k].sigma, sep='')
    print()
    print('target  covariance of component %i:\n' %k, c, sep='')
    print('\n')


# plot results
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.xlim(-6.0, +6.000)
    plt.ylim(-0.2, +1.401)

plt.subplot(221)
plt.title('target mixture')
pypmc.tools.plot_mixture(target_mixture, cmap='jet')
set_axlimits()

plt.subplot(222)
plt.title('pmc fit')
pypmc.tools.plot_mixture(sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
set_axlimits()

plt.subplot(223)
plt.title('target mixture and pmc fit')
pypmc.tools.plot_mixture(target_mixture, cmap='jet')
pypmc.tools.plot_mixture(sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
set_axlimits()

plt.subplot(224)
plt.title('weighted samples')
plt.hist2d(sampler.samples[-1][:,0], sampler.samples[-1][:,1], weights=sampler.weights[-1][:,0], cmap='gray_r', bins=200)
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

_images/pmc.png

4.2.2. Parallel

'''This example shows how to use importance sampling and how to
adapt the proposal density using the pmc algorithm in an MPI
parallel environment.
In order to have a multiprocessing enviroment invoke this script with
"mpirun -n 10 python pmc_mpi.py".

'''


from mpi4py.MPI import COMM_WORLD as comm

import numpy as np
import pypmc
import pypmc.tools.parallel_sampler # this submodule is NOT imported by ``import pypmc``

# This script is a parallelized version of the PMC example ``pmc.py``.
# The following lines just define a target density and an initial proposal.
# These steps are exactly the same as in ``pmc.py``:

# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate


# define the initial proposal density
# In this case it has three Gaussians:
# the initial covariances are set to the unit-matrix,
# the initial component weights are set equal
initial_prop_means = []
initial_prop_means.append( np.array([ 4.0, 0.0]) )
initial_prop_means.append( np.array([-5.0, 0.0]) )
initial_prop_means.append( np.array([ 0.0, 0.0]) )
initial_prop_covariance = np.eye(2)

initial_prop_components = []
for i in range(3):
    initial_prop_components.append(pypmc.density.gauss.Gauss(initial_prop_means[i], initial_prop_covariance))

initial_proposal = pypmc.density.mixture.MixtureDensity(initial_prop_components)

# -----------------------------------------------------------------------------------------------------------------------

# In ``pmc.py`` the following line defines the sequential single process sampler:
# sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, initial_proposal)
#
# We now use the parallel MPISampler instead:
SequentialIS = pypmc.sampler.importance_sampling.ImportanceSampler
parallel_sampler = pypmc.tools.parallel_sampler.MPISampler(SequentialIS, target=log_target, proposal=initial_proposal)

# Draw 10,000 samples adapting the proposal every 1,000 samples:

# make sure that every process has a different random number generator seed
if comm.Get_rank() == 0:
    seed = np.random.randint(1e5)
else:
    seed = None
seed = comm.bcast(seed)
np.random.seed(seed + comm.Get_rank())

generating_components = []
for i in range(10):
    # With the invocation "mpirun -n 10 python pmc_mpi.py", there are
    # 10 processes which means in order to draw 1,000 samples
    # ``parallel_sampler.run(1000//comm.Get_size())`` makes each process draw
    # 100 samples.
    # Hereby the generating proposal component for each sample in each process
    # is returned by ``parallel_sampler.run``.
    # In the master process, ``parallel_sampler.run`` is a list containing the
    # return values of the sequential ``run`` method of every process.
    # In all other processes, ``parallel_sampler.run`` returns the generating
    # component for its own samples only.
    last_generating_components = parallel_sampler.run(1000//comm.Get_size(), trace_sort=True)

    # In addition to the generating components, the ``sampler.run``
    # method automatically sends all samples to the master
    # process i.e. the process which fulfills comm.Get_rank() == 0.
    if comm.Get_rank() == 0:
        print("\rstep", i, "...\n\t", end='')

        # Now let PMC run only in the master process:

        # ``sampler.samples_list`` and ``sampler.weights_list`` store the weighted samples
        # sorted by the resposible process:
        # The History objects that are held by process i can be accessed via
        # ``sampler.<samples/weights>_list[i]``. The master process (i=0) also produces samples.

        # Combine the weights and samples to two arrays of 1,000 samples
        samples = np.vstack([history_item[-1] for history_item in parallel_sampler.samples_list])
        weights = np.vstack([history_item[-1] for history_item in parallel_sampler.weights_list])[:,0]

        # The latent variables are stored in ``last_generating_components``.
        # ``last_generating_components[i]`` returns an array with the generating
        # components of the samples produced by process number "i".
        # ``np.hstack(last_generating_components)`` combines the generating components
        # from all processes to one array holding all 1,000 entries.
        generating_components.append(  np.hstack(last_generating_components)  )

        # adapt the proposal using the samples from all processes
        new_proposal =  pypmc.mix_adapt.pmc.gaussian_pmc(samples, parallel_sampler.sampler.proposal,
                                                         weights, generating_components[-1],
                                                         mincount=20, rb=True)
    else:
        # In order to broadcast the ``new_proposal``, define a dummy variable in the other processes
        # see "MPI4Py tutorial", section "Collective Communication": http://mpi4py.scipy.org/docs/usrman/tutorial.html
        new_proposal = None

    # broadcast the ``new_proposal``
    new_proposal = comm.bcast(new_proposal)

    # replace the old proposal
    parallel_sampler.sampler.proposal = new_proposal

# only the master process shall print out any final information
if comm.Get_rank() == 0:
    all_samples  = np.vstack([history_item[ :] for history_item in parallel_sampler.samples_list])
    all_weights  = np.vstack([history_item[ :] for history_item in parallel_sampler.weights_list])
    last_samples = np.vstack([history_item[-1] for history_item in parallel_sampler.samples_list])
    last_weights = np.vstack([history_item[-1] for history_item in parallel_sampler.weights_list])
    print("\rsampling finished", end=', ')
    print("collected " + str(len(all_samples)) + " samples")
    print(  '------------------------------------------')
    print('\n')

    # print information about the adapted proposal
    print('initial component weights:', initial_proposal.weights)
    print('final   component weights:', parallel_sampler.sampler.proposal.weights)
    print('target  component weights:', component_weights)
    print()
    for k, m in enumerate([mean0, mean1, None]):
        print('initial mean of component %i:' %k, initial_proposal.components[k].mu)
        print('final   mean of component %i:' %k, parallel_sampler.sampler.proposal.components[k].mu)
        print('target  mean of component %i:' %k, m)
        print()
    print()
    for k, c in enumerate([covariance0, covariance1, None]):
        print('initial covariance of component %i:\n' %k, initial_proposal.components[k].sigma, sep='')
        print()
        print('final   covariance of component %i:\n' %k, parallel_sampler.sampler.proposal.components[k].sigma, sep='')
        print()
        print('target  covariance of component %i:\n' %k, c, sep='')
        print('\n')

    if comm.Get_size() == 1:
        print('******************************************************')
        print('********** NOTE: There is only one process. **********')
        print('******** try "mpirun -n 10 python pmc_mpi.py" ********')
        print('******************************************************')

    # plot results
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        print('For plotting "matplotlib" needs to be installed')
        exit(1)

    def set_axlimits():
        plt.xlim(-6.0, +6.000)
        plt.ylim(-0.2, +1.401)

    plt.subplot(221)
    plt.title('target mixture')
    pypmc.tools.plot_mixture(target_mixture, cmap='jet')
    set_axlimits()

    plt.subplot(222)
    plt.title('pmc fit')
    pypmc.tools.plot_mixture(parallel_sampler.sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
    set_axlimits()

    plt.subplot(223)
    plt.title('target mixture and pmc fit')
    pypmc.tools.plot_mixture(target_mixture, cmap='jet')
    pypmc.tools.plot_mixture(parallel_sampler.sampler.proposal, cmap='nipy_spectral', cutoff=0.01)
    set_axlimits()

    plt.subplot(224)
    plt.title('weighted samples')
    plt.hist2d(last_samples[:,0], last_samples[:,1], weights=last_weights[:,0], cmap='gray_r', bins=200)
    set_axlimits()

    plt.tight_layout()
    plt.show()

(Source code, png, hires.png, pdf)

_images/pmc_mpi.png

4.3. Grouping by Gelman-Rubin R value

'''This example illustrates how to group Markov Chains according to the
Gelman-Rubin R value (see [GR92]_).

'''

import numpy as np
import pypmc

# A Markov Chain can only explore a local mode of the target function.
# The Gelman-Rubin R value can be used to determine whether N chains
# explored the same mode. Pypmc offers a function which groups chains
# with a common R value less than some ``critical_r``.
#
# In this example, we run five Markov Chains initialized in different
# modes and then group those chains together that explored same mode.


# define a proposal
# this defines the same initial proposal for all chains
prop_dof   = 50.
prop_sigma = np.array([[0.1 , 0.  ]
                      ,[0.  , 0.02]])
prop = pypmc.density.student_t.LocalStudentT(prop_sigma, prop_dof)


# define the target; i.e., the function you want to sample from.
# In this case, it is a bimodal Gaussian with well separated modes.
#
# Note that the target function "log_target" returns the log of the
# target function.
component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])
inv_covariance0 = np.linalg.inv(covariance0)

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])
inv_covariance1 = np.linalg.inv(covariance1)

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mixture = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)

log_target = target_mixture.evaluate

# choose initializations for the chains
# Here we place two chains into the mode at [5, 0.01] and three into the mode at [-4,1].
# In such a setup, the chains will only explore the mode where they are initialized.
# Different random numbers are used in each chain.
starts = [np.array([4.999, 0.])] * 2   +   [np.array([-4.0001, 0.999])] * 3

# define the markov chain objects
mcs = [pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, prop, start) for start in starts]

# run and discard burn-in
for mc in mcs:
    mc.run(10**2)
    mc.clear()

# run 10,000 steps adapting the proposal every 500 steps
for mc in mcs:
    for i in range(20):
        mc.run(500)
        mc.adapt()

# extract a reference to the samples from all chains
stacked_values = [mc.samples[:] for mc in mcs]

# find the chain groups
# chains 0 and 1 are initialized in the same mode (at [5, 0.01])
# chains 2, 3 and 4 are initialized in the same mode (at [-4, 0])
# expect chain groups:
expected_groups = [[0,1], [2,3,4]]

# R value calculation only needs the means, variances (diagonal
# elements of covariance matrix) and number of samples,
# axis=0 ensures that we get variances separately for each parameter.
# critical_r can be set manually, here the default value is used
found_groups = pypmc.mix_adapt.r_value.r_group([np.mean(chain, axis=0) for chain in stacked_values],
                                               [np.var (chain, axis=0) for chain in stacked_values],
                                               len(stacked_values[0]))

# print the result
print("Expect %s" % expected_groups)
print("Have   %s"    % found_groups)

# Hint: ``stacked_values`` is an example of what `pypmc.mix_adapt.r_value.make_r_gaussmix()` expects as ``data``
result = pypmc.mix_adapt.r_value.make_r_gaussmix(stacked_values)

try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

pypmc.tools.plot_mixture(result, cmap='jet')
plt.show()

(Source code, png, hires.png, pdf)

_images/r_group.png

4.4. Variational Bayes

'''This example shows how to generate a "best fit" Gaussian mixture density
from data using variational Bayes.

'''

## in this example, we will:
## 1. Define a Gaussian mixture
## 2. Generate demo data from that Gaussian mixture
## 3. Generate a Gaussian mixture out of the data
## 4. Plot the original and the generated mixture


import numpy as np
import pypmc



# -------------------- 1. Define a Gaussian mixture --------------------

component_weights = np.array([0.3, 0.7])

mean0       = np.array ([ 5.0  , 0.01  ])
covariance0 = np.array([[ 0.01 , 0.003 ],
                        [ 0.003, 0.0025]])

mean1       = np.array ([-4.0  , 1.0   ])
covariance1 = np.array([[ 0.1  , 0.    ],
                        [ 0.   , 0.02  ]])

component_means = [mean0, mean1]
component_covariances = [covariance0, covariance1]

target_mix = pypmc.density.mixture.create_gaussian_mixture(component_means, component_covariances, component_weights)



# -------------------- 2. Generate demo data ---------------------------

data = target_mix.propose(500)



# -------------------- 3. Adapt a Gaussian mixture ---------------------

# maximum number of components
K = 20

# Create a "GaussianInference" object.
# The following command passes just the two essential arguments to "GaussianInference":
# The ``data`` and a maximum number of ``components``.
# For reasonable results in more complicated settings, a careful choice for ``W0``
# is crucial. As a rule of thumb, choose ``inv(W0)`` much smaller than the expected
# covariance. In this case, however, the default (``W0`` = unit matrix) is good enough.
vb = pypmc.mix_adapt.variational.GaussianInference(data, K)

# adapt the variational parameters
converged = vb.run(100, verbose=True)
print('-----------------------------')

# generate a Gaussian mixture with the most probable parameters
fit_mixture = vb.make_mixture()


# -------------------- 4. Plot/print results ---------------------------

if converged is None:
    print('\nThe adaptation did not converge.\n')
else:
    print('\nConverged after %i iterations.\n' %converged)

print("final  component weights: " + str(fit_mixture.weights))
print("target component weights: " + str( target_mix.weights))

try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.xlim(-6.0, +6.000)
    plt.ylim(-0.2, +1.401)

plt.subplot(221)
plt.title('target mixture')
pypmc.tools.plot_mixture(target_mix, cmap='winter')
set_axlimits()

plt.subplot(222)
plt.title('"best fit"')
pypmc.tools.plot_mixture(fit_mixture, cmap='nipy_spectral')
set_axlimits()

plt.subplot(223)
plt.title('target mixture and "best fit"')
pypmc.tools.plot_mixture(target_mix, cmap='winter')
pypmc.tools.plot_mixture(fit_mixture, cmap='nipy_spectral')
set_axlimits()

plt.subplot(224)
plt.title('data')
plt.hexbin(data[:,0], data[:,1], cmap='gray_r')
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

_images/variational.png

4.5. Mixture reduction

'''Demonstrate the usage of hierarchical clustering and variational
Bayes (VBMerge) to reduce a given Gaussian mixture to a Gaussian
mixture with a reduced number of components.

'''

import numpy as np
from scipy.stats import chi2
import pypmc

# dimension
D = 2

# number of components
K = 400

# Wishart parameters: mean W, degree of freedom nu
W = np.eye(D)
nu = 5

# "draw" covariance matrices from Wishart distribution
def wishart(nu, W):
    dim = W.shape[0]
    chol = np.linalg.cholesky(W)
    tmp = np.zeros((dim,dim))
    for i in range(dim):
        for j in range(i+1):
            if i == j:
                tmp[i,j] = np.sqrt(chi2.rvs(nu-(i+1)+1))
            else:
                tmp[i,j] = np.random.normal(0,1)
    return np.dot(chol, np.dot(tmp, np.dot(tmp.T, chol.T)))

covariances = [wishart(nu, W) for k in range(K)]

# put components at positions drawn from a Gaussian around mu
mu = np.zeros(D)
means = [np.random.multivariate_normal(mu, sigma) for sigma in covariances]

# equal weights for every component
weights = np.ones(K)

# weights are automatically normalized
input_mixture = pypmc.density.mixture.create_gaussian_mixture(means, covariances, weights)

# create initial guess from first K_out components
K_out = 10
initial_guess = pypmc.density.mixture.create_gaussian_mixture(means[:K_out], covariances[:K_out], weights[:K_out])

###
# hierarchical clustering
#
# - the output closely resembles the initial guess
# - components laid out spherically symmetric
# - every component is preserved
###
h = pypmc.mix_adapt.hierarchical.Hierarchical(input_mixture, initial_guess)
h.run(verbose=True)

###
# VBMerge
#
# - N is the number of samples that gave rise to the input mixture. It
#   is arbitrary, so play around with it. You might have to adjust the
#   ``prune`` parameter in the ``run()`` method
# - only one component survives, again it is spherically symmetric
###
vb = pypmc.mix_adapt.variational.VBMerge(input_mixture, N=1000,
                                         initial_guess=initial_guess)
print()
print("Start variational Bayes:")
vb.run(verbose=True)

# plot results
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

def set_axlimits():
    plt.gca().set_aspect('equal')
    plt.xlim(-12.0, +12.0)
    plt.ylim(-12.0, +12.0)

plt.subplot(221)
plt.title('input mixture')
pypmc.tools.plot_mixture(input_mixture)
set_axlimits()

plt.subplot(222)
plt.title('initial guess')
pypmc.tools.plot_mixture(initial_guess)
set_axlimits()

plt.subplot(223)
plt.title('variational Bayes')
pypmc.tools.plot_mixture(vb.make_mixture(), cmap='autumn')
set_axlimits()

plt.subplot(224)
plt.title('hierarchical output')
pypmc.tools.plot_mixture(h.g)
set_axlimits()

plt.tight_layout()
plt.show()

(Source code, png, hires.png, pdf)

_images/mixture_reduction.png

4.6. MCMC + variational Bayes

'''This example illustrates how pypmc can be used to integrate a
non-negative function. The presented algorithm needs very little
analytical knowledge about the function.

'''

import numpy as np
import pypmc

# The idea is to find a good proposal function for importance sampling
# with as little information about the target function as possible.
#
# In this example we will first map out regions of interest using Markov
# chains, then we use the variational Bayes to approximate the target
# with a Gaussian mixture.

# *************************** Important: ***************************
# * The target function must be defined such that it returns the   *
# * log of the function of interest. The methods we use imply that *
# * the function is interpreted as an unnormalized probability     *
# * density.                                                       *
# ******************************************************************

# Define the target; i.e., the function you want to sample from.  In
# this case, it is a Student's t mixture of three components with
# different degrees of freedom. They are located close to each other.
# If you want a multimodal target, adjust the means.

dim = 2

mean0       = np.array ([-6.0,  7.3  ])
covariance0 = np.array([[ 0.8, -0.3 ],
                        [-0.3,  1.25]])

mean1       = np.array ([-7.0,  8.0   ])
covariance1 = np.array([[ 0.5,  0.    ],
                        [ 0. ,  0.2  ]])

mean2       = np.array ([-8.5,  7.5   ])
covariance2 = np.array([[ 0.5,  0.2   ],
                        [ 0.2,  0.2  ]])

component_weights = np.array([0.3, 0.4, 0.3])
component_means = [mean0, mean1, mean2]
component_covariances = [covariance0, covariance1, covariance2]
dofs = [13, 17, 5]

target_mixture = pypmc.density.mixture.create_t_mixture(component_means, component_covariances, dofs, component_weights)
log_target = target_mixture.evaluate

# Now we suppose that we only have the following knowledge about the
# target function: its regions of interest are at a distance of no more
# than order ten from zero.

# Now we try to find these with Markov chains.  We have to deal with
# the fact that there may be modes separated by regions of very low
# probability. It is thus unlikely that a single chain explores more
# than one mode in such a case. To deal with this multimodality, we
# start several chains and hope that they find all modes.  We will
# start ten Markov chains at random positions in the square
# [(-10,-10), (+10,+10)].
starts = [np.random.uniform(-10,10, size=dim) for i in range(10)]

# For a local-random-walk Markov chain, we also need an initial
# proposal.  Here, we take a gaussian with initial covariance
# diag(1e-3).  The initial covariance should be chosen such that it is
# of the same order as the real covariance of the mode to be mapped
# out. For a Gaussian target, the overall scale should
# decrease as 2.38^2/d as the dimension d increases to achieve an
# acceptance rate around 20%.
mc_prop = pypmc.density.gauss.LocalGauss(np.eye(dim) * 2.38**2 / dim)
mcs = [pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, mc_prop, start) for start in starts]

print('running Markov chains ...')

# In general we need to let the chain move to regions of high
# probability, these samples are not representative, so we discard them
# as burn-in. Then we let the Markov chains map out the regions of
# interest. The samples are used to adapt the proposal covariance to
# yield a satisfactory acceptance rate.
for mc in mcs:
    for i in range(20):
        mc.run(500)
        mc.adapt()
        if i == 0:
            mc.clear()

mc_samples_sorted_by_chain = [mc.samples[:] for mc in mcs]
mc_samples = np.vstack(mc_samples_sorted_by_chain)

means = np.zeros((len(mcs), dim))
variances = np.zeros_like(means)

for i,mc in enumerate(mc_samples_sorted_by_chain):
    means[i] = mc.mean(axis=0)
    variances[i] = mc.var(axis=0)

# Now we use the Markov chain samples to generate a mixture proposal
# function for importance sampling. For this purpose, we choose the
# variational Bayes algorithm that takes samples and an initial guess
# of the mixture as input. To create the initial guess, we group all
# chains that mixed, and create 10 components per group. For a
# unimodal target, all chains should mix. For more information about
# the following call, check the example "Grouping by Gelman-Rubin R
# value"(r-group.py) or the reference documentation.
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mc_samples_sorted_by_chain, K_g=10)

# Comments on arguments:
#   o mc_samples[::100]   - Samples in the Markov chains are strongly correlated
#                           => thin the samples to get approx. independent samples
#   o W0=np.eye(dim)*1e10 - The resulting covariance matrices can be very
#                           sensitive to W0. Its inverse should be chosen much
#                           smaller than the actual covariance. If it is too small,
#                           W0 will dominate the resulting covariances and
#                           usually lead to very bad results.
vb = pypmc.mix_adapt.variational.GaussianInference(mc_samples[::100], initial_guess=long_patches, W0=np.eye(dim)*1e10)

# When we run variational Bayes, we want unneccessary components to be
# automatically pruned. The prune parameter sets how many samples a
# component must effectively have to be considered important. The rule
# of thumb employed here proved good in our experiments.
vb_prune = 0.5 * len(vb.data) / vb.K

# Run the variational Bayes for at most 1,000 iterations.  But if the
# lower bound of the model evidence changes by less than `rel_tol`,
# convergence is declared before. If we increase `rel_tol` to 1e-4, it
# takes less iterations but potentially more (useless) components
# survive the pruning. The trade-off depends on the complexity of the
# problem.
print('running variational Bayes ...')
vb.run(1000, rel_tol=1e-8, abs_tol=1e-5, prune=vb_prune, verbose=True)

# extract the most probable Gaussian mixture given the samples
vbmix = vb.make_mixture()

# Now we can instantiate an importance sampler. We draw 1,000
# importance samples and use these for a proposal update using
# variational Bayes again. In case there are multiple modes and
# the chains did not mix, we need this step to infer the right
# component weights because the component weight is given by
# how many chains it attracted, which could be highly dependent
# on the starting points and independent of the correct
# probability mass.
print('running importance sampling ...')
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
sampler.run(1000)

# The variational Bayes allows us, unlike PMC, to include the
# information gained by the Markov chains in subsequent proposal
# updates. We know that we cannot trust the component weights obtained
# by the chains. Nevertheless, we can rely on the means and covariances. The
# following lines show how to code that into the variational Bayes by
# removing the hyperparameter `alpha0` that encodes the component
# weights.
prior_for_proposal_update = vb.posterior2prior()
prior_for_proposal_update.pop('alpha0')
vb2 = pypmc.mix_adapt.variational.GaussianInference(sampler.samples[:],
                                                    initial_guess=vbmix,
                                                    weights=sampler.weights[:][:,0],
                                                    **prior_for_proposal_update)

# Note: This time we leave "prune" at the default value "1" because we
#       want to keep all components that are expected to contribute
#       with at least one effective sample per importance sampling run.
print('running variational Bayes ...')
vb2.run(1000, rel_tol=1e-8, abs_tol=1e-5, verbose=True)
vb2mix = vb2.make_mixture()

# Now we draw another 10,000 samples with the updated proposal
sampler.proposal = vb2mix
print('running importance sampling ...')
sampler.run(10**4)

# We can combine the samples and weights from the two runs, see reference [Cor+12].
weights = pypmc.sampler.importance_sampling.combine_weights([samples[:]      for samples in sampler.samples],
                                                            [weights[:][:,0] for weights in sampler.weights],
                                                            [vbmix, vb2mix]                                 ) \
                                                            [:][:,0]
samples = sampler.samples[:]

# The integral can then be estimated from the weights. The error is also
# estimated from the weights. By the central limit theorem, the integral
# estimator has a gaussian distribution.
integral_estimator = weights.sum() / len(weights)
integral_uncertainty_estimator = np.sqrt((weights**2).sum() / len(weights) - integral_estimator**2) / np.sqrt(len(weights)-1)

print('analytical integral = 1')
print('estimated  integral =', integral_estimator, '+-', integral_uncertainty_estimator)

# Let's see how good the proposal matches the target density: the closer
# the values of perplexity and effective sample size (ESS) are to 1,
# the better.  Outliers, or samples out in the tails of the target
# with a very large weight, show up in the 2D marginal and reduce the
# ESS significantly. For the above integral estimate to be right on
# average, they are 'needed'. Without outliers (most of the time), the
# integral is a tad too small.
print('perplexity', pypmc.tools.convergence.perp(weights))
print('effective sample size', pypmc.tools.convergence.ess(weights))

# As mentioned in the very beginning, the methods we applied reinterpret
# the target function as an unnormalized probability density.
# In addition to the integral, we also get weighted samples distributed according
# to that probability density.
try:
    import matplotlib.pyplot as plt
except ImportError:
    print('For plotting "matplotlib" needs to be installed')
    exit(1)

plt.figure()
plt.hist2d(samples[:,0], samples[:,1], weights=weights, bins=100, cmap='gray_r')
pypmc.tools.plot_mixture(sampler.proposal, visualize_weights=True, cmap='jet')
plt.colorbar()
plt.clim(0.0, 1.0)
plt.title('colors visualize component weights')
plt.show()

(Source code, png, hires.png, pdf)

_images/uniting_markov_chains_and_variational_bayes.png