LMC simulation and failure without burn-in
In [ ]:
import numpy as np
import pandas as pd
import math
from scipy.stats import norm
import scipy.stats as stats
import matplotlib.pyplot as plt
In [ ]:
def LMC(n: int, eta: float, x=None) -> float:
'''Inputs: n is the total number of iterations;
eta is the step size;
x is the starting x;
Output: the nth number generated by LMC that simulate norm(0,1)
'''
if x == None:
x=norm.rvs(loc=0,scale=1,size=1)[0]
i = 0
while i < n:
z = norm.rvs(loc = 0, scale = 1, size = 1)[0] # z_k
c = - x / (np.sqrt(2*np.pi)) * np.exp(- x**2 / 2) # gradU(x_k)
x = x - eta * c + np.sqrt(2*eta) * z # x_k+1
i += 1
return x
Just a test of what it looks like¶
In [ ]:
i = 0
normrv = []
n=1000
eta = 0.0001
while i < 100:
normrv.append(LMC(n=n,eta=eta))
i += 1
fig, ax = plt.subplots(1, 1)
ax.hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
# Plot normal pdf
x = np.linspace(norm.ppf(0.01),
norm.ppf(0.99), 100)
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax.legend(loc='best', frameon=False)
plt.title("Normal Distribution, n = " + str(n) + " eta = " + str(eta))
plt.show()
We observe the mean and variance of the stochastically generated random variables to see how close they are to the true N(0,1)
In [ ]:
print("Mean of LMC generated distribution: ",np.mean(normrv))
print("Variance of LMC generated distribution: ", np.var(normrv))
Mean of LMC generated distribution: -0.0031864833532611807 Variance of LMC generated distribution: 1.1783566599091377
Testing hyperparameters to see the convergence:¶
In [ ]:
def graph(n, eta):
'''Inputs: n is the total number of iterations. It is a list of integers;
eta is the step size. It is a list of floats;
Output: the nth number generated by LMC that simulate norm(0,1) given different n and eta.
The output is given as two lists: means and variances of the generated distributions.
'''
fig, ax = plt.subplots(len(n),len(eta), figsize=(20, 40))
mean = []
var = []
for j in range(len(n)):
n_j = n[j]
for k in range(len(eta)):
eta_k = eta[k]
i = 0
normrv=[]
while i < 100:
normrv.append(LMC(n=n_j,eta=eta_k))
i += 1
ax[j][k].hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
mean.append(np.mean(normrv))
var.append(np.var(normrv))
# Plot normal pdf
x = np.linspace(norm.ppf(0.01),
norm.ppf(0.99), 100)
ax[j][k].plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax[j][k].legend(loc='right', frameon=False)
ax[j][k].set_title("n = " + str(n_j) + ", eta = " + str(eta_k))
#ax[len(n)].plot()
plt.show()
return mean,var
In [ ]:
def graph(n, eta):
'''Inputs: n is the total number of iterations. It is a list of integers;
eta is the step size. It is a list of floats;
Output: the nth number generated by LMC that simulate norm(0,1) given different n and eta.
The output is given as two lists: means and variances of the generated distributions.
'''
fig, ax = plt.subplots(len(n),len(eta), figsize=(20, 40))
mean = [[0 for _ in range(len(eta))] for _ in range(len(n))]
var = [[0 for _ in range(len(eta))] for _ in range(len(n))]
for j in range(len(n)):
n_j = n[j]
for k in range(len(eta)):
eta_k = eta[k]
i = 0
normrv=[]
while i < 100:
normrv.append(LMC(n=n_j,eta=eta_k))
i += 1
ax[j][k].hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
#mean.append(np.mean(normrv))
mean[j][k] = np.mean(normrv)
var[j][k] = np.var(normrv)
#var.append(np.var(normrv))
# Plot normal pdf
x = np.linspace(norm.ppf(0.01),
norm.ppf(0.99), 100)
ax[j][k].plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax[j][k].legend(loc='right', frameon=False)
ax[j][k].set_title("n = " + str(n_j) + ", eta = " + str(eta_k))
plt.style.use("fast")
plt.show()
return mean,var
In [ ]:
ns = [100,500,1000,2000,3000]
etas = [0.005,0.001,0.0001,0.00001]
mean3, var3 = graph(ns,etas)
print(mean3)
print(var3)
[[0.1818463965865965, -0.05454038574405464, 0.010083789802315174, -0.06072416879524485], [-0.1674246098541142, -0.14796209419862486, 0.011510887346283185, 0.020800291879686274], [-0.4124260513514807, -0.16413857549842042, -0.04182724606005664, -0.1378668131576316], [-0.7127856172683308, 0.1610837712451786, -0.13804921155255886, 0.06928075785573949], [0.5043703798329314, 0.08970123133566751, 0.058033459062362625, 0.015082277749448404]] [[2.1231521223163607, 1.3040952650515236, 0.9837168888122667, 1.0450596854721517], [6.228538815935967, 2.251560239946582, 1.5369236602470377, 1.046610287889536], [13.367128442430307, 3.2455364118045282, 1.2673493177351423, 0.88526883492904], [28.042139422331974, 5.9925624717533905, 1.6323223838283305, 1.0867278614513918], [33.027829674914564, 8.068360355256521, 1.6061783569207817, 1.0937748461117844]]
Now plot the mean and variance convergence to 0 and 1 for different values of n and etas
In [ ]:
print(plt.style.available)
['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']
In [ ]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5)) # Adjusted figsize for better visibility
for i in range(len(ns)):
means = [mean3[i][k] for k in range(len(etas))]
variances = [var3[i][k] for k in range(len(etas))]
ax[0].plot(np.log(etas), means, label="n=" + str(ns[i]))
ax[1].plot(np.log(etas), variances, label="n=" + str(ns[i]))
ax[0].axhline(y=0, color='gray', linestyle='--', linewidth=1)
ax[1].axhline(y=1, color='gray', linestyle='--', linewidth=1)
ax[0].set_title("Mean")
ax[0].set_xlabel("log(eta)")
ax[0].set_ylabel("Mean Value")
ax[0].legend(loc='best')
ax[1].set_title("Variance")
ax[1].set_xlabel("log(eta)")
ax[1].set_ylabel("Variance Value")
ax[1].legend(loc='best')
plt.style.use("fast")
plt.show()
On the left graph, we observe the mean converges to 0 as eta gets smaller. Larger n seems to converge closer to 0. On the right graph, we observe the variance converging to 1 as eta gets smaller.
Below is an attempt to use the separation method Professor Deb mentioned, where we can sample numbers with several iterations in between to guarantee the independence of the stochastically generated random variables. However, I can’t seem to make it work. The result I got is very far from a normal distribution¶
In [ ]:
def list_LMC(total: int, gap: int, eta: float) -> list:
'''Inputs: total is the total numbers in the list of random variables;
gap is the gap we choose to put between each rv;
eta is the step size
Output: a list of random variables simulated through LMC
'''
i = 1
x = LMC(n=gap, eta=eta)
rv = [x]
while i < total:
x = LMC(n=gap, eta=eta, x = x) # I made use of the n and x to start the generation process after gap
rv.append(x)
i += 1
return rv
In [ ]:
normrv = list_LMC(total = 100, gap = 10000, eta = 0.0001)
fig, ax = plt.subplots(1, 1)
ax.hist(normrv, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
x = np.linspace(norm.ppf(0.01),
norm.ppf(0.99), 100)
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
ax.legend(loc='best', frameon=False)
plt.show()