########################################

# Section 3.2.1
# Moments estimators

########################################

# Simulating from an exponential n=1,..,100
# Do it over 500 replications

run1 = matrix(rep(0,100*500),ncol = 100)

# simulate from an exponential with rate 1.5

# We estimate the rate by using the inverse of the sample mean.

sim1 = matrix(rexp(100*500,rate = 1.5),ncol = 100)

for(j in c(1:500)){
    
for(i in c(1:100)){

 test = sim1[j,c(1:i)]
test1 = mean(test)
run1[j,i] = 1/test1

}

}

##################################3


m1 = max(run1[c(1,2,3,4,5),])
m2 = min(run1[c(1,2,3,4,5),])

# Each row of run1 contains averages evaluated from n = 1 to n = 100.

# Make a plot of the first 5

plot(c(1:100),run1[1,c(1:100)], type='l', lwd=3, main="",xlab="sample size",ylab="sample rate",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8, ylim=c(m2,m1),col="blue")
grid(lwd=3)
lines(c(1:100),run1[2,c(1:100)],lwd=3,col="red")
lines(c(1:100),run1[3,c(1:100)],lwd=3,col="green")
lines(c(1:100),run1[4,c(1:100)],lwd=3,col="orange")
lines(c(1:100),run1[5,c(1:100)],lwd=3,col="purple")
lines(c(1:100),rep(1.5,100),lty=3,lwd=3,col="pink")

################################

# comparing the variance with the asymptotic sampling variance:

var2 = rep(0,100)

for(j in c(2:100)){
    
var2[j] = j*var(run1[,j])

}

plot(c(3:100),var2[c(3:100)], type='l', lwd=3, main="sampling var of estimator",xlab="sample size n",ylab="n times variance",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8,col="blue")

lines(c(3:100),rep(2.25,98),lty=3,lwd=3,col="red")

###############################
#
# Next we compare look at the finite sampling distribution of the estimators
#
###############################

# Consider the sample sizes n = 5, 20 and 100

# n = 5

hist(run1[,5],xlab="",ylab="",main="sampling distribution, when n  = 5")

# simulate from a normal with mean 1.5 and variance 2.25/n

sim5  = rnorm(n=1000,mean = 1.5, sd = sqrt(2.25/5))

hist(sim5,xlab="",ylab="",main="corresponding normal, when n  = 5")

# n = 20

hist(run1[,20],xlab="",ylab="",main="sampling distribution, when n  = 20")

# simulate from a normal with mean 1.5 and variance 2.25/n

sim20  = rnorm(n=1000,mean = 1.5, sd = sqrt(2.25/20))

hist(sim20,xlab="",ylab="",main="corresponding normal, when n  = 20")

# n = 100 

hist(run1[,100],xlab="",ylab="",main="sampling distribution, when n  = 100")

# simulate from a normal with mean 1.5 and variance 2.25/n

sim100  = rnorm(n=1000,mean = 1.5, sd = sqrt(2.25/100))

hist(sim100,xlab="",ylab="",main="corresponding normal, when n  = 100")

#########################################

# Section 3.3.1 Monte Carlo methods.

# The true distribution of the estimator with population lambda = 1.5

explambda = 1.5
run2 = rep(0,500)

# simulate from an exponential with rate 1.5

sim2 = matrix(rexp(20*500,rate = explambda),ncol = 20)

# For each row estimate the lambda and do this 500 times, 
# If we do it enough times we obtain nearly the sampling 
# distribution of lambda.

for(j in c(1:500)){
    
 test = sim2[j,]
test2 = mean(test)
run2[j] = 1/test2

}

hist(run2,xlab="",ylab="",main="histogram based on lambda = 1.5")

# Bootstrap demonstration with the exponential distribution

demo = rexp(n=20,rate = 1.5)
hatlambda = 1/mean(demo)

run2 = rep(0,500)

# simulate from an exponential with the estimated rate hatrate = 1/mean(demo)
# We estimate the lambda› by using the inverse of the sample mean.

sim2 = matrix(rexp(20*500,rate = hatlambda),ncol = 20)

for(j in c(1:500)){
    
 test = sim2[j,]
test2 = mean(test)
run2[j] = 1/test2

}

hist(run2,xlab="",ylab="",main="histogram based on lambdahat = 1.243")

#######################

# Making a QQplot

truequantiles = sort(run1[,20])
estquantiles = sort(run2)

plot(truequantiles,estquantiles, type='p', lwd=1, main="QQplot",xlab="True Quantiles",ylab="Estimated distribution",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8,col="blue")


lines(truequantiles,truequantiles)

####################

# Estimated variance = var(run2)
# True variance = var(run1[,20])

#########################

# Section 3.2.2
# Bootstrap sample
# resampling from demo

sim3 = matrix(rep(0,20*500),ncol = 20)

for(i in c(1:500)){

sim3[i,] = sample(demo,replace = T)    

    }

run3 = rep(0,500)

# simulate from an exponential with rate 1.5

# We estimate the rate by using the inverse of the sample mean.

for(j in c(1:500)){
    
 test = sim3[j,]
test2 = mean(test)
run3[j] = 1/test2

}

hist(run3,xlab="",ylab="",main="histogram based on bootstrap")


###########################################################

# Bootstrap z-transforms (pivotal)
# The above gives the bootstrap distribution but not the z-transform, 
# We need the bootstrap distribution of the z-transform to construct confidence
# intervals. 

# First get data (this is called demo)

demo = rexp(n=20,rate = 1.5)

# In class we used:

demo = c(0.5475, 0.0089, 1.1269, 0.7519, 0.5628, 0.8547, 1.9941, 0.7383,
  0.0529, 0.0243,  0.5029, 0.4628, 0.3951, 0.7799, 2.3609, 1.3204, 0.1754, 1.5920, 1.0729, 0.7659)

#lambdaest = 1/mean(demo) = 1.242

# est1 gives the point estimate of lambda based on demo

est1 = 1/mean(demo)

sim4 = matrix(rep(0,20*5000),ncol = 20)

#Next we make the bootstrap simulation (5000 draws each of length 20):

for(i in c(1:5000)){

sim4[i,] = sample(demo,replace = T)    

    }

# For each bootstrap simulation we make the 
# ztransform (note sample size n = 20)

run4 = rep(0,5000)

for(j in c(1:5000)){
    
 test = sim4[j,]
 test2 = mean(test)
 lambda1 = 1/test2
 stderrorlambda1 = lambda1/sqrt(20)
 bootstrapz = (lambda1 - est1)/stderrorlambda1 
 run4[j] = bootstrapz

}

# The histogram should give a corrected z-score.

hist(run4, main="bootstrap distribution of standardized estimator")

quantile(run4,0.025)
quantile(run4,0.975)

# To get the true z-transform distribution we use

truerun4 = rep(0,500)

for(j in c(1:500)){
    
 test = rexp(n=20,rate = 1.5)
test2 = mean(test)
lambda1 = 1/test2
stderrorlambda1 = lambda1/sqrt(20)
true = (lambda1 - 1.5)/stderrorlambda1 
    
truerun4[j] = true

}

hist(truerun4,main="True distribution of standardized estimator")

quantile(truerun4,0.025)
quantile(truerun4,0.975)


################################3

# QQplot
# Comparing the bootstrap distribution with the true sampling distribution


truequantiles = sort(run1[,20])
estquantiles = sort(run3)

plot(truequantiles,estquantiles, type='p', lwd=1, main="QQplot",xlab="True Quantiles",ylab="Estimated distribution",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8,col="blue")


lines(truequantiles,truequantiles)

#######################################
########################################

# Section 3.4.1 motivating MLE

#########################################

test = rexp(n=20,rate = 1/750)
tempmean = mean(test)
tempsum = 20*tempmean

#Making the likelihood

rateinv = c(500:1500)

# like1 = the MLE for parameter between 1/1500 to 1/500

like1 = (1/rateinv**20)*exp(-(1/rateinv)*tempsum)

plot(rateinv,like1,type = "l",lwd = 3,col="blue",ylab="likelihood",main="Plot of Likelihood")

lines(c(750,750),c(0,max(like1)),lwd = 3,col="red")
lines(c(1000,1000),c(0,max(like1)),lwd = 3,col="red")
lines(c(1250,1250),c(0,max(like1)),lwd = 3,col="red")

plot(rateinv,log(like1),type = "l",lwd = 3,col="blue",ylab="log likelihood",main="Plot of log-Likelihood")
lines(c(750,750),c(min(log(like1)),max(log(like1))),lwd = 3,col="red")
lines(c(1000,1000),c(min(log(like1)),max(log(like1))),lwd = 3,col="red")
lines(c(1250,1250),c(min(log(like1)),max(log(like1))),lwd = 3,col="red")


#################################################

# Section 3.4.2 The inflated Poisson distribution

k = c(0:20)

P = rep(0,21)

p = 0.3

#lambda = 1
#lambda = 4
lambda = 10

P[1] = p+(1-p)*exp(-lambda)


for(i in c(2:21)){

    P[i] = (1-p)*exp(-lambda)*(lambda**(k[i]))/gamma((k[i]+1))

    }

#P1 = P
#P4 = P

P10 = P

plot(k,P1,type="o",lwd=2,col="blue",xlab="k",ylab="P(X=k)",main="Inflated Possion")
points(k,P4,type="o",lwd=2,col="purple")
points(k,P10,type="o",lwd=2,col="red")


#################################################

# Section 3.4.3

# Simulate from a gamma function alpha = 9, beta = 2

xdata = rgamma(n=100,shape =9,rate =2)

# Below is the negative-likelihood based on 
# gamma distribution using the generated data.

LikeGamma = function(par){
  n = length(xdata) # xdata is the data vector we input into likelihood
  alpha = par[1]; beta = par[2]
  loglik = (alpha-1)*sum(log(xdata))- beta*sum(xdata) + n*alpha*log(beta) - n*log(gamma(alpha))
  nloglik= -loglik
  return(nloglik)
}
    
## "minimization" using optim function
## par: initial value; fn = function to minimize

# As initial value we give the vector alpha = 1 and
# beta = 1.

# A better initial value is to give the Moments estimator
# this would speed up the algorithm and if the
# likelihood was not concave

fit.optim = optim(par= c(2,2), fn=LikeGamma, hessian = T)

fit.optim$par
fit.optim$convergence
solve(fit.optim$hessian) #inverse hessian

#################################################

# Section 3.5.2
# Finite sampling distributions

# Focus on Gamma, but to makes things easier we set one parameter as 
# known. # shape = alpha, rate = beta


zmatrixS = rgamma(40000,shape = 9,rate =2)

simS = matrix(zmatrixS,nrow=400)
runS = matrix(rep(0,100*400),nrow = 400)
asyVar = matrix(zmatrixS,nrow=400)

for(j in c(1:400)){
    for(i in c(1:100)){

xdata = simS[j,c(1:i)]

# Below is the negative-likelihood based on 
# gamma distribution using the generated data.

LikeGamma = function(par){
  n = length(xdata) # xdata is the data vector we input into likelihood
  alpha = par[1]; beta = 2
  loglik = (alpha-1)*sum(log(xdata))- beta*sum(xdata) + n*alpha*log(beta) - n*log(gamma(alpha))
  nloglik= -loglik
  return(nloglik)
}
    
## "minimization" using optim function
## par: initial value; fn = function to minimize
# As initial value we give the vector alpha = 3

fit.optim = optim(par= 3, fn=LikeGamma, hessian = T)
runS[j,i] = fit.optim$par
asyVar[j,i] = solve(fit.optim$hessian) #inverse hessian

    }
    }


# Make the usual trajectories. 

m1 = max(runS[c(1,2,3,4,5),])
m2 = min(runS[c(1,2,3,4,5),])

# Each row of run1 contains averages evaluated from n = 1 to n = 500.

# Make a plot of the first 5

plot(c(1:100),runS[1,c(1:100)], type='l', lwd=3, main="",xlab="sample size",ylab="sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8, ylim=c(m2,m1),col="blue")
grid(lwd=3)
lines(c(1:100),runS[2,c(1:100)],lwd=3,col="red")
lines(c(1:100),runS[3,c(1:100)],lwd=3,col="green")
lines(c(1:100),runS[4,c(1:100)],lwd=3,col="orange")
lines(c(1:100),runS[5,c(1:100)],lwd=3,col="purple")
lines(c(1:100),rep(9,100),lty=3,col="pink")

# Make a plot of the bias

meanM = rep(0,100)

for(i in c(1:100)){

    meanM[i] = (runS[,i]-9)

}

varM = rep(0,100)

for(i in c(1:100)){

    varM[i] = var(runS[,i])*i

}

plot(c(1:100),meanM, type='l', lwd=3, main="average bias",xlab="sample size",col="blue")

plot(c(1:100),varM, type='l', lwd=3, main="variance times sample size",xlab="sample size",col="blue")
lines(c(1:100),rep(8.5,100),lty=4,col="red")

##############################################

# make histograms for different sample sizes

M1=9
V1=8.5

k=20

transform = sort((runS[,k]-M1)/sqrt(V1/k))

# Make histogram of transformed sample means

par(mfrow=c(2,1))
hist(transform,xlab="",ylab="",main="n=20")

# qqplot

plot(nq,transform, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of sample mean")

lines(nq,nq)


#################################################

# Section 3.6 (Fisher information matrix)

# Simulate from two different exponentials;
# one were \lambda is small = small I(\lambda)
# the other where lambda is large

# generate data lambda = 1

expsmall = rexp(n=10,rate = 1)

ratesmall = seq(0.1,5,length=1000)

expsmall = c(2.0546, 0.9861, 0.3977, 1.9480, 0.8082,
  0.8082,  0.0491,  2.5444, 0.4528, 0.9950)


# generate data lambda = 100

explarge = rexp(n=10,rate = 100)

explarge = c(0.0008424, 0.0321545, 0.0009847,  0.0070063,
  0.0044617, 0.0470954, 0.0076897,  0.0055562, 0.0000079, 0.0050735)


zoomratelarge = seq(87,95,length=1000)
ratelarge = seq(50,140,length=1000)

# the MLE for parameter for the two data sets

likesmall = (ratesmall**10)*exp(-ratesmall*sum(expsmall))

likelarge = (ratelarge**10)*exp(-ratelarge*sum(explarge))
zoomlikelarge = (zoomratelarge**10)*exp(-zoomratelarge*sum(explarge))

# The log-likelihood


loglikesmall = log((ratesmall**10)*exp(-ratesmall*sum(expsmall)))

loglikelarge = log((ratelarge**10)*exp(-ratelarge*sum(explarge)))
loglikelargezoom = log((zoomratelarge**10)*exp(-zoomratelarge*sum(explarge)))


# A plot of the likelihood with a large information matrix 10/lambda^{2} lambda =1, n=10

plot(ratesmall,likesmall,type = "l",lwd = 3,col="blue",ylab="likelihood",main="Plot of Likelihood",xlab="lambda=1")

plot(ratesmall,loglikesmall,type = "l",lwd = 3,col="blue",ylab="log likelihood",main="Plot of Log-Likelihood",xlab="lambda=1")

# A plot of the likelihood with a small information matrix 10/lambda^{2} lambda = 100, n=10

plot(zoomratelarge,zoomlikelarge,type = "l",lwd = 3,col="blue",ylab="likelihood",main="Plot of Likelihood",xlab="lambda=100")

plot(ratelarge,likelarge,type = "l",lwd = 3,col="blue",ylab="likelihood",main="Plot of Likelihood",xlab="lambda=100")

####

plot(zoomratelarge,loglikelargezoom,type = "l",lwd = 3,col="blue",ylab="log likelihood",main="Plot of log-Likelihood",xlab="lambda=100")

plot(ratelarge,loglikelarge,type = "l",lwd = 3,col="blue",ylab="likelihood",main="Plot of log-Likelihood",xlab="lambda=100")


#######

# Plot the exponential density for different values of lambda 

x = seq(0,5,length = 100)
y1 = exp(-x)
y5 = 5*exp(-5*x)
y10 = 10*exp(-10*x)

plot(x,y1,ylim = c(0,11),ylab = "density",xlab = "x-axis",main = "exponential (lambda = 1,5,10)", type = "l",col = "blue",lwd=2)
lines(x,y5,col = "red",lwd=2)
lines(x,y10,col = "green",lwd=2)

# Plot the exponential density for different values of mu

plot(x,y1,ylim = c(0,11),ylab = "density",xlab = "x-axis",main = "exponential (mu = 1,1/5,1/10)", type = "l",col = "blue",lwd=2)
lines(x,y5,col = "red",lwd=2)
lines(x,y10,col = "green",lwd=2)

#########################################################################

# THE OBSERVED FISHER INFORMATION

# The observed Fisher information matrix: obtained in a numerical routine

# Simulate from a gamma distribution sample size 30


xdata = rgamma(30,shape = 9,rate =2)


##########################################################

# D = 1

# The observed Fisher information matrix: obtained in a numerical routine

#In this Routine we keep rate = 2 fixed and estimate shappe = 9

# The Fisher Information will have dimension 1

# shape = alpha and beta = rate.

# Below is the negative-likelihood based on 
# gamma distribution using the generated data.

LikeGamma = function(par){
  n = length(xdata) # xdata is the data vector we input into likelihood
  alpha = par[1]
  beta = 2
  loglik = (alpha-1)*sum(log(xdata))- beta*sum(xdata) + n*alpha*log(beta) - n*log(gamma(alpha))
  nloglik= -loglik
  return(nloglik)
}
    
## "minimization" using optim function
## par: initial value; fn = function to minimize
# As initial value we give the vector alpha = 3

fit.optim = optim(par=c(3), fn=LikeGamma, hessian = T)
par = fit.optim$par
ObservedFisher = fit.optim$hessian # Hessian
# Variance is 
V = solve(ObservedFisher)

###################################

# In this routine we estimate both shape and rate

# D = 2

# The observed Fisher information matrix: obtained in a numerical routine

#In this Routine we estimate both rate = 2 and shape  = 9

# The Fisher Information will be a 2 by 2 matrix

# Below is the negative-likelihood based on 
# gamma distribution using the generated data.

# Note that the same data set is used.
# The estimate for shape will be slightly different 
# if we include rate or do not include rate in the estimation.

LikeGamma = function(par){
  n = length(xdata) # xdata is the data vector we input into likelihood
  alpha = par[1]
  beta = par[2]
  loglik = (alpha-1)*sum(log(xdata))- beta*sum(xdata) + n*alpha*log(beta) - n*log(gamma(alpha))
  nloglik= -loglik
  return(nloglik)
}
    
## "minimization" using optim function
## par: initial value; fn = function to minimize
# As initial value we give the vector alpha = 3

fit.optim = optim(par=c(3,3), fn=LikeGamma, hessian = T)
par = fit.optim$par
ObservedFisher = fit.optim$hessian # Hessian
# Variance is 
V = solve(ObservedFisher)


#########################################################################

# Section 3.7 The uniform distribution

# Sample from a uniform distribution 

sample2 = runif(n=2,min=0,max=10)
sample4 = runif(n=4,min=0,max=10)
sample8 = runif(n=8,min=0,max=10)
sample16 = runif(n=16,min=0,max=10)
sample32 = runif(n=32,min=0,max=10)
sample64 = runif(n=64,min=0,max=10)

# Make a plot

plot(sample2,rep(1,2),type="o",ylim=c(0,7),lwd=2,xlim=c(0,11),ylab="log (base 2) of sample size",xlab="data")
points(sample4,rep(2,4),type="o",lwd=2)
points(sample8,rep(3,8),type="o",lwd=2)
points(sample16,rep(4,16),type="o",lwd=2)
points(sample32,rep(5,32),type="o",lwd=2)
points(sample64,rep(6,64),type="o",lwd=2)
# PUT MLE on plot
points(max(sample2),1,lwd=2,col="red")
points(max(sample4),2,lwd=2,col="red")
points(max(sample8),3,lwd=2,col="red")
points(max(sample16),4,lwd=2,col="red")
points(max(sample32),5,lwd=2,col="red")
points(max(sample64),6,lwd=2,col="red")
lines(c(10,10),c(0,7),col="blue",lwd=3)
grid(lwd=2)

###########################################################################

# Section 3.8 (relative efficiency MoM and MLE)


efficiency = function(alpha){

       eff = alpha*trigamma(alpha)

# Note that the trigamma disribution is the second derivative of log gamma
    
    return(eff)

      }

test = seq(0.1,10,length=100)

plot(test,efficiency(test),type="l",lwd = 3,col="blue",ylim = c(0,10),xlab = "alpha",ylab = "Asymptotic efficiency",
     main="Efficiency of Method of Moments wrt to MLE for Gamma")

lines(test,rep(1,length=100),lwd=3,lty=2,col="red")