LMC simulation and failure without burn-in

LMC

In [ ]:

import numpy as np
import pandas as pd
import math
from scipy.stats import norm
import scipy.stats as stats
import matplotlib.pyplot as plt

In [ ]:

def LMC(n: int, eta: float, x=None) -> float:
    '''Inputs: n is the total number of iterations;
               eta is the step size;
               x is the starting x;
       Output: the nth number generated by LMC that simulate norm(0,1)
    '''
    if x == None:
        x=norm.rvs(loc=0,scale=1,size=1)[0]
    i = 0
    while i < n:
        z = norm.rvs(loc = 0, scale = 1, size = 1)[0] # z_k
        c = - x / (np.sqrt(2*np.pi)) * np.exp(- x**2 / 2) # gradU(x_k)
        x = x - eta * c + np.sqrt(2*eta) * z # x_k+1
        i += 1

    return  x

Just a test of what it looks like¶

In [ ]:

i = 0
normrv = []
n=1000
eta = 0.0001
while i < 100:
    normrv.append(LMC(n=n,eta=eta))
    i += 1
fig, ax = plt.subplots(1, 1)
ax.hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)

# Plot normal pdf
x = np.linspace(norm.ppf(0.01),
                norm.ppf(0.99), 100)
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax.legend(loc='best', frameon=False)
plt.title("Normal Distribution, n = " + str(n) + " eta = " + str(eta))
plt.show()

No description has been provided for this image

We observe the mean and variance of the stochastically generated random variables to see how close they are to the true N(0,1)

In [ ]:

print("Mean of LMC generated distribution: ",np.mean(normrv))
print("Variance of LMC generated distribution: ", np.var(normrv))

Mean of LMC generated distribution:  -0.0031864833532611807
Variance of LMC generated distribution:  1.1783566599091377

Testing hyperparameters to see the convergence:¶

In [ ]:

def graph(n, eta):
    '''Inputs: n is the total number of iterations. It is a list of integers;
               eta is the step size. It is a list of floats;
       Output: the nth number generated by LMC that simulate norm(0,1) given different n and eta. 
               The output is given as two lists: means and variances of the generated distributions.
    '''
    fig, ax = plt.subplots(len(n),len(eta), figsize=(20, 40))
    mean = []
    var = []

    for j in range(len(n)):
        n_j = n[j]
        for k in range(len(eta)):
            eta_k = eta[k]
            i = 0
            normrv=[]
            while i < 100:
                normrv.append(LMC(n=n_j,eta=eta_k))
                i += 1
            
            ax[j][k].hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
            mean.append(np.mean(normrv))
            var.append(np.var(normrv))


            # Plot normal pdf
            x = np.linspace(norm.ppf(0.01),
                            norm.ppf(0.99), 100)
            ax[j][k].plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
            ax[j][k].legend(loc='right', frameon=False)
            ax[j][k].set_title("n = " + str(n_j) + ", eta = " + str(eta_k))
    
    #ax[len(n)].plot()
    plt.show()
    return mean,var

In [ ]:

def graph(n, eta):
    '''Inputs: n is the total number of iterations. It is a list of integers;
               eta is the step size. It is a list of floats;
       Output: the nth number generated by LMC that simulate norm(0,1) given different n and eta. 
               The output is given as two lists: means and variances of the generated distributions.
    '''
    fig, ax = plt.subplots(len(n),len(eta), figsize=(20, 40))
    mean = [[0 for _ in range(len(eta))] for _ in range(len(n))]
    var = [[0 for _ in range(len(eta))] for _ in range(len(n))]

    for j in range(len(n)):
        n_j = n[j]
        for k in range(len(eta)):
            eta_k = eta[k]
            i = 0
            normrv=[]
            while i < 100:
                normrv.append(LMC(n=n_j,eta=eta_k))
                i += 1
            
            ax[j][k].hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
            #mean.append(np.mean(normrv))
            mean[j][k] = np.mean(normrv)
            var[j][k] = np.var(normrv)
            #var.append(np.var(normrv))


            # Plot normal pdf
            x = np.linspace(norm.ppf(0.01),
                            norm.ppf(0.99), 100)
            ax[j][k].plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
            ax[j][k].legend(loc='right', frameon=False)
            ax[j][k].set_title("n = " + str(n_j) + ", eta = " + str(eta_k))
    
    plt.style.use("fast")
    plt.show()
    return mean,var

In [ ]:

ns = [100,500,1000,2000,3000]
etas = [0.005,0.001,0.0001,0.00001]
mean3, var3 = graph(ns,etas)
print(mean3)
print(var3)

[[0.1818463965865965, -0.05454038574405464, 0.010083789802315174, -0.06072416879524485], [-0.1674246098541142, -0.14796209419862486, 0.011510887346283185, 0.020800291879686274], [-0.4124260513514807, -0.16413857549842042, -0.04182724606005664, -0.1378668131576316], [-0.7127856172683308, 0.1610837712451786, -0.13804921155255886, 0.06928075785573949], [0.5043703798329314, 0.08970123133566751, 0.058033459062362625, 0.015082277749448404]]
[[2.1231521223163607, 1.3040952650515236, 0.9837168888122667, 1.0450596854721517], [6.228538815935967, 2.251560239946582, 1.5369236602470377, 1.046610287889536], [13.367128442430307, 3.2455364118045282, 1.2673493177351423, 0.88526883492904], [28.042139422331974, 5.9925624717533905, 1.6323223838283305, 1.0867278614513918], [33.027829674914564, 8.068360355256521, 1.6061783569207817, 1.0937748461117844]]

Now plot the mean and variance convergence to 0 and 1 for different values of n and etas

In [ ]:

print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']

In [ ]:

fig, ax = plt.subplots(1, 2, figsize=(10, 5))  # Adjusted figsize for better visibility

for i in range(len(ns)):
    means = [mean3[i][k] for k in range(len(etas))]
    variances = [var3[i][k] for k in range(len(etas))]
    ax[0].plot(np.log(etas), means, label="n=" + str(ns[i]))
    ax[1].plot(np.log(etas), variances, label="n=" + str(ns[i]))

ax[0].axhline(y=0, color='gray', linestyle='--', linewidth=1)
ax[1].axhline(y=1, color='gray', linestyle='--', linewidth=1)

ax[0].set_title("Mean")
ax[0].set_xlabel("log(eta)")
ax[0].set_ylabel("Mean Value")
ax[0].legend(loc='best')

ax[1].set_title("Variance")
ax[1].set_xlabel("log(eta)")
ax[1].set_ylabel("Variance Value")
ax[1].legend(loc='best')

plt.style.use("fast")
plt.show()

On the left graph, we observe the mean converges to 0 as eta gets smaller. Larger n seems to converge closer to 0. On the right graph, we observe the variance converging to 1 as eta gets smaller.

Below is an attempt to use the separation method Professor Deb mentioned, where we can sample numbers with several iterations in between to guarantee the independence of the stochastically generated random variables. However, I can’t seem to make it work. The result I got is very far from a normal distribution¶

In [ ]:

def list_LMC(total: int, gap: int, eta: float) -> list:
    '''Inputs: total is the total numbers in the list of random variables;
               gap is the gap we choose to put between each rv;
               eta is the step size
       Output: a list of random variables simulated through LMC
    '''
    i = 1
    x = LMC(n=gap, eta=eta)
    rv = [x]
    while i < total:
        x = LMC(n=gap, eta=eta, x = x) # I made use of the n and x to start the generation process after gap
        rv.append(x)
        i += 1
    return rv

In [ ]:

normrv = list_LMC(total = 100, gap = 10000, eta = 0.0001)
fig, ax = plt.subplots(1, 1)
ax.hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)

x = np.linspace(norm.ppf(0.01),
                norm.ppf(0.99), 100)
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax.legend(loc='best', frameon=False)
plt.show()