# Rcode for the class
# Chapter 1

# Requires MASS

library("MASS")

############################

# Section 1.2

# Picture for projections onto Euclidean space
# PROJECTIONS ON EUCLIDEAN SPACE

y = c(0,2)
x = c(0,1)

y1 = c(1/4)
x1 = c(1/2)
y2 = c(1/sqrt(5))
x2 = c(2/sqrt(5))
y3 = c(0,3*sqrt(4/5)/2)
x3 = c(0,3*sqrt(4/5))
y4 = c(4/5)
x4 = c(8/5)

plot(x,y, type='o', lwd=4,   main="",xlab="",ylab="", ylim=c(0,2.25),xlim =c(0,2.25), col="blue")
points(x1,y1,lwd=3,col="blue")
lines(c(0,x1),c(0,y1),lwd=2,col="blue")
points(x2,y2,lwd=4,col="orange")
lines(x3,y3,lwd=3,col="orange",lty=2)
points(x4,y4,lwd=4,col="orange")
lines(c(1,x4),c(2,y4),col="red",lty=2,lwd = 3)

grid(lwd=3)

###########################

# Section 1.2 Dust clouds

## IID Data

Sigma = matrix(c(1,0,0,1),ncol=2)

xtemp = mvrnorm(500,c(0,0),Sigma)

# Plotting the bivariate data

plot(xtemp,  main="",xlab="X",ylab="Y", col="blue", lwd=4, ylim=c(-3,3),xlim =c(-3,3))
grid(lwd=3)

# New basis

E2 = matrix(c(1/sqrt(2),1/sqrt(2),1/sqrt(2),-1/sqrt(2)),ncol=2)

transform2 = xtemp%*%E2

plot(transform2,  main="",xlab="X",ylab="Y", col="blue", lwd=4, ylim=c(-3,3),xlim =c(-3,3))
grid(lwd=3)

# stretched basis

E2 = matrix(c(1/sqrt(2),1/sqrt(2),1/sqrt(2),-1/sqrt(2)),ncol=2)

xtemp3 = cbind(3*xtemp[,1],xtemp[,2])

transform3 = xtemp3%*%E2

plot(transform3,  main="",xlab="X",ylab="Y", col="blue", lwd=4)# ylim=c(-3,3),xlim =c(-3,3))
grid(lwd=3)


############################

# Section 1.3

# UNDERSTANDING COVARIANCE D=2

# Load the library MASS
# This allows us to draw samples from 
# a multivariate normal distribution. 

library(MASS)

# We will use the function mvrnorm
# this requires us to specify a vector mean
# and matrix variance.

#############################

library("MASS")

# Drawing 
# Correlated normal data

# Here we define the variance matrix. 

Sigma = matrix(c(5,4,4,6),ncol = 2)

# The vector mean in this example is (1,2)

# Below we draw 200 times a bivariate normal 
# distribution with mean (1,2) and variance Sigma

xtemp = mvrnorm(200,c(1,2),Sigma)

# Plotting the bivariate data

plot(xtemp,  main="",xlab="X",ylab="Y", ylim=c(-6,10),xlim =c(-6,10), col="blue")


#########################

# Drawing
# Independent normal data

Sigma = matrix(c(5,0,0,6),ncol = 2)

xtemp = mvrnorm(200,c(1,2),Sigma)

# Plotting the bivariate data

plot(xtemp,  main="",xlab="X",ylab="Y", ylim=c(-6,10),xlim =c(-6,10), col="blue")


############################

# Section 1.3 more dust clouds

A = matrix(c(3,-1.5,-2,2),nrow=2)

Sigma = matrix(c(1,0,0,1),ncol = 2)

xtemp = t(mvrnorm(200,c(0,0),Sigma))

ytemp = A%*%xtemp + c(7,8)


plot(xtemp[1,],xtemp[2,],pch=16,lwd=3,col="blue",xlab="X",ylab="Y",xlim=c(-5,20),ylim=c(-5,20),asp=1)

points(ytemp[1,],ytemp[2,],col="red",lwd=3,pch=16)

# Since the xtemp are iid the variance (X_{1},X_{2}) is

A%*%t(A)

> A%*%t(A)
     [,1]  [,2]
[1,] 13.0 -8.50
[2,] -8.5  6.25

# The eigenvectors are

> eigen(A%*%t(A))

eigen() decomposition
$values
[1] 18.7705249  0.4794751

$vectors
           [,1]       [,2]
[1,] -0.8273551 -0.5616792
[2,]  0.5616792 -0.8273551


############################

# Projections by demeaning Dimension 2

# Suppose that (X_{1},X_{2}) are iid with mean 5 and variance one.


library("MASS")


Sigma2 = matrix(c(1,0,0,1),ncol = 2)

xtemp2 = t(mvrnorm(200,c(5,5),Sigma2))

ytemp2 = xtemp2

n = length(xtemp2[1,])

for(i in c(1:n)){

    ytemp2[,i] = xtemp2[,i] - mean(xtemp2[,i])

    }

plot(xtemp2[1,],xtemp2[2,],pch=16,lwd=3,col="blue",xlab="X",ylab="Y",xlim=c(-3,10),ylim=c(-3,10),asp=1)

points(ytemp2[1,],ytemp2[2,],col="red",lwd=3,pch=16)

points(c(xtemp2[1,1]),c(xtemp2[2,1]),lwd=2,col="green",pch=16)

lines(c(xtemp2[1,1],ytemp2[1,1]),c(xtemp2[2,1],ytemp2[2,1]),lwd=2,col="green")

points(c(xtemp2[1,2]),c(xtemp2[2,2]),lwd=2,col="green",pch=16)

lines(c(xtemp2[1,2],ytemp2[1,2]),c(xtemp2[2,2],ytemp2[2,2]),lwd=2,col="green")


################################

# Dimension 3

# Suppose that (X_{1},X_{2},X_{3}) are iid with mean 5 and variance one.

library("MASS")
library("scatterplot3d")


Sigma3 = matrix(c(1,0,0,0,1,0,0,0,1),ncol = 3)

xtemp3 = t(mvrnorm(100,c(5,5,5),Sigma3))

ytemp3 = xtemp3

n = length(xtemp3[1,])

for(i in c(1:n)){

    ytemp3[,i] = xtemp3[,i] - mean(xtemp3[,i])

    }

xD = xtemp3[1,]

yD = xtemp3[2,]

zD = xtemp3[3,]

xM = ytemp3[1,]

yM = ytemp3[2,]

zM = ytemp3[3,]


#scatterplot3d(xM[c(1:2)],yM[c(1:2)],zM[c(1:2)],pch = 16, color="blue",lwd=3,xlab = "X1",ylab = "Y1", zlab="Z1",xlim=c(-3,8),ylim = c(-3,8),zlim=c(-3,8))

#scatterplot3d(2,1,1,pch = 16, color="blue",lwd=3,xlab = "X1",ylab = "Y1", zlab="Z1",xlim=c(-3,8),ylim = c(-3,8),zlim=c(-3,8))

sd3 = scatterplot3d(xM,yM,zM,pch = 16, color="red",lwd=3,xlab = "X1",ylab = "Y1", zlab="Z1",xlim=c(-3,8),ylim = c(-3,8),zlim=c(-3,8),angle = 200)

sd3$points3d(xD,yD,zD,col="blue",pch=16,lwd=3)


plot(xtemp2[1,],xtemp2[2,],pch=16,lwd=3,col="blue",xlab="X",ylab="Y",xlim=c(-3,10),ylim=c(-3,10))

points(ytemp2[1,],ytemp2[2,],col="red",lwd=3,pch=16)


############################

# Section 1.4
# Types of convergence

#############################

# DEMONSTRATING DIFFERENT FORMS OF CONVERGENCE, USING THE SAMPLE MEAN
# AT DIFFERENT SAMPLE SIZES (n=1...100 or 500). We start with n = 500, then focus
# on the case n = 100.
# 100 replications are done (it would have been better to do 1000 replications
# but I was a bit lazy to wait).

############################

# make a matrix of averages 

run1 = matrix(rep(0,1000*500),ncol = 500)

# simulate from a chi-square 1

#sim1 = matrix(rchisq(100*500,df=1),ncol = 500)

sim1 = matrix(rbinom(1000*500,size=5,p=0.1),ncol = 500)

for(j in c(1:1000)){
    
for(i in c(1:500)){

 test = sim1[j,c(1:i)]
test1 = mean(test)
run1[j,i] = test1

}

}

#############################

# Make plot of first 5

m1 = max(run1[c(1,2,3,4,5),])
m2 = min(run1[c(1,2,3,4,5),])

# Each row of run1 contains averages evaluated from n = 1 to n = 500.

# Make a plot of the first 5

plot(c(1:500),run1[1,c(1:500)], type='l', lwd=3, main="",xlab="sample size",ylab="sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8, ylim=c(m2,m1),col="blue")
grid(lwd=3)
lines(c(1:500),run1[2,c(1:500)],lwd=3,col="red")
lines(c(1:500),run1[3,c(1:500)],lwd=3,col="green")
lines(c(1:500),run1[4,c(1:500)],lwd=3,col="orange")
lines(c(1:500),run1[5,c(1:500)],lwd=3,col="purple")
lines(c(1:500),rep(1,500),lty=3,col="pink")

#############################


# Make plot of first 100
# To make the plots less cluttered we only use a max sample size of 100.

m1 = max(run1[c(1:100),])
m2 = min(run1[c(1:100),])

# Each row of run1 contains averages evaluated from n = 1 to n = 100.

# Make a plot of the first 5

plot(c(1:100),run1[1,c(1:100)], type='l', lwd=1, main="",xlab="sample size",ylab="sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8, ylim=c(m2,m1))
grid(lwd=3)

for(j in c(2:100)){

lines(c(1:100),run1[j,c(1:100)],lwd=1)

    }

# Also include the standard error.

lines(c(1:100),(c(1:100)/2)**(-1/2),col="red",lwd=2)

###################################

# calculate and plot the average squared error.

msqerr = rep(0,100)

for(j in c(1:100)){

    temp = sum((run1[,j]-1)**2)/length(run1[,j])

    msqerr[j] = temp

}

# Calculate the bias

meanbias = rep(0,100)

for(j in c(1:100)){

    tempbias = (mean(run1[,j])-1)

    meanbias[j] = tempbias

}

##################

#MSE plot

finer = seq(5,100,by=5)

SigmaTrue = (2/finer)

plot(finer,msqerr[finer], type='o', lwd=2,  main="",xlab="sample size",ylab="mean square error",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8)

lines(finer,msqerr[finer],lwd = 2,lty=2,col="red")
#points(seq(2,100,by=1),msqerr[seq(2,100,by=1)],col="red")
lines(finer,SigmaTrue,lwd=2,col="blue")


###################

#Bias plot

plot(c(1:100),meanbias, type='l', lwd=2, main="",xlab="sample size",ylab="mean square error",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8,col="blue")

lines(c(1:100),meanbias,lty=2,lwd = 2,col="red")


###################################

# Making QQplots, as described in Section 1.5.3, Definition 1.7
# Obtaining normal quantiles (for example, when size n )

# Let x be the input data set. Then 

n = length(x)

# Define the initial vector nq, this will give the quantiles of 
# the normal distribution (or whatever distribution you want)

nq = rep(0,n)

# use p=(i-0.5)/n rather than p=i/n to avoid the case p=1 (when i=n)

for(i in c(1:n)){

   nq[i] = qnorm((i-0.5)/n)
# For a t-distribution with df=2
# nq[i] = qt((i-0.5)/n,df=2)   
    
    }

# sort data from smallest to largest

xsort = sort(x)

# remove the last observation 

# Plot the quantiles of a normal against the empirical quantiles of 
# the data

plot(nq,xsort, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of data")

#####################################

# plot a histogram of the averages at several time points

# n = 1

hist(run1[,1],xlab="sample size",ylab="",main="n=1",xlim=c(0,6))

transform = sort((run1[,1]-1)/sqrt(2))

hist(transform,xlab="sample size",ylab="",main="n=1")

# qqplot

plot(nq,transform, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8)

lines(nq,nq)

####################################


# plot a histogram of the averages at several time points

# n = 5

hist(run1[,5],xlab="sample size",ylab="",main="n=5",xlim = c(0,6))

transform = sort((run1[,5]-1)/sqrt(2/5))

hist(transform,xlab="sample size",ylab="",main="n=5")

# qqplot

plot(nq,transform, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8)

lines(nq,nq)

####################################

# plot a histogram of the averages at several time points

# n = 10

hist(run1[,10],xlab="sample size",ylab="",main="n=10",xlim = c(0,6))

transform = sort((run1[,10]-1)/sqrt(2/10))

hist(transform,xlab="sample size",ylab="",main="n=10")

# qqplot

plot(nq,transform, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8)

lines(nq,nq)

#########################################


# plot a histogram of the averages at several time points

# n = 100

hist(run1[,100],xlab="sample size",ylab="",main="n=100",xlim=c(0,6))

transform = sort((run1[,100]-1)/sqrt(2/100))

hist(transform,xlab="sample size",ylab="",main="n=100")

# qqplot

plot(nq,transform, type='o', lwd=2, main="",xlab="normal quantiles",ylab="quantiles of sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8)

lines(nq,nq)

############################################################
#############################

# Make plot of first 5
# To make the plots less cluttered we only use a max sample size of 100.

all = run1[c(1:5),] 

m1 = max(run1[c(1:5),])
m2 = min(run1[c(1:5),])

all[1,c(15,67)] = rep(2,2) 

all[2,c(10,78)]=c(2.5,0) 

#all[3,c(12,37,76)]=rep(2.5,3) 

all[3,c(50,100)] = c(1.5,2.2)

all[4,c(58,92)]=c(1.8,2.4) 

all[5,c(23,90)]=rep(0.2,2)

#all[5,c(34,44,63,84)]=rep(1.8,4)

# Each row of run1 contains averages evaluated from n = 1 to n = 200.

# Make a plot of the first 5

plot(c(1:100),all[1,c(1:100)], type='l', lwd=2, main="",xlab="sample size",ylab="sample mean",
     cex.lab=1.3, cex.axis = 1.3, cex.main=1.8, ylim=c(m2,m1),col="blue")
points(c(1:100),all[1,c(1:100)])
grid(lwd=3)
lines(c(1:100),all[2,c(1:100)],lwd=2,col="red")
points(c(1:100),all[2,c(1:100)],lwd=1,col="red")
lines(c(1:100),all[3,c(1:100)],lwd=2,col="green")
points(c(1:100),all[3,c(1:100)],lwd=1,col="green")
lines(c(1:100),all[4,c(1:100)],lwd=2,col="orange")
points(c(1:100),all[4,c(1:100)],lwd=1,col="orange")
lines(c(1:100),all[5,c(1:100)],lwd=2,col="purple")
points(c(1:100),all[5,c(1:100)],lwd=1,col="purple")
#lines(c(1:100),rep(1,100),lty=3,col="pink")

#############################

# Section 1.5.4 (transformations of the sample means): consider two cases
# when the derivative condition is satisfied and not satisfied. 

##############################

# Sample size n = 200

##############################

test0 = matrix(rnorm(n=200*1000,mean = 0,sd=1),ncol=1000)
test05 = matrix(rnorm(n=200*1000,mean = 0.5,sd=1),ncol=1000)

mean0 = rep(0,1000)
mean05 = rep(0,1000)

for(i in c(1:1000)){

    mean0[i] = mean(test0[,i])
    mean05[i] = mean(test05[,i])

}

mean0sq = mean0**2
mean05sq = mean05**2


hist(mean0sq,xlab="sample mean squared",ylab="",main="mean = 0, n  = 200")

hist(mean05sq,xlab="sample mean squared",ylab="",main="mean = 0.5, n  = 200")

##############################

# Sample size n = 1000

###############################

test0 = matrix(rnorm(n=1000*1000,mean = 0,sd=1),ncol=1000)
test05 = matrix(rnorm(n=1000*1000,mean = 0.5,sd=1),ncol=1000)

mean0 = rep(0,1000)
mean05 = rep(0,1000)

for(i in c(1:1000)){

    mean0[i] = mean(test0[,i])
    mean05[i] = mean(test05[,i])

}

mean0sq = mean0**2
mean05sq = mean05**2


hist(mean0sq,xlab="sample mean squared",ylab="",main="mean = 0, n  = 1000")

hist(mean05sq,xlab="sample mean squared",ylab="",main="mean = 0.5, n  = 1000")