##########################################################################

# likelihood and bayesian analysis in R to help you with 
# take-home test 1 problem 2(B)

# unbuffer the output

# set the right working directory

# on the desktop installed in the baskin auditorium this is

#   setwd( 'C:/Users/user/Desktop/STAT-206-Winter-2020' )

y <- c( 495, 541, 1461, 1555, 1603, 2201, 2750, 3468, 3516, 4319, 6622, 
  7728, 13159, 21194 )

# from the context of the problem, y is non-negative and can
# only take on integer values, but the observed range is from
# around 0 to more than 21,000, and the gaps between the
# possible values (only 1 in size) are so small in relation
# to the spread of the data, that it's vastly preferable 
# to model our uncertaintly about y (before the data arrived) 
# with a continuous probability density function (PDF)

# we still use the cheating approach to
# sampling distribution specification at this stage
# of the course, because we haven't looked yet at better options

# make a histogram to look at the distributional shape

hist( y, prob = T, main = 'Wire Stress Case Study' )

# we want to model y with a continuous density,
# because putting a discrete distribution on it
# would result in about 20,000 distinct possible values,
# which is silly; and besides, the gaps of size 1
# between the possible values of y are tiny in relation
# to typical y values (e.g., the median is about 3,000)

# the histogram looks roughly like a member of
# the family of Exponential distributions

print( n <- length( y ) )

# [1] 14

print( s <- sum( y ) )

# [1] 70612

# math interlude: work out the likelihood function
# in the Exponential sampling model

# you can use either of the following two R functions
# to evaluate the likelihood:

exponential.likelihood <- function( lambda, s, n ) {

  log.likelihood <- - n * log( lambda ) - s / lambda

  return( exp( log.likelihood ) )

}

exponential.likelihood <- function( lambda, s, n ) {

  likelihood <- lambda^( - n ) * exp( - s / lambda )

  return( likelihood )

}

# both work equally well here, because with our
# n and s no overflow or underflow conditions arise

# warning: in some versions of this code i showed you guys
# earlier, the calling sequence for the likelihood function
# was 

#   function( lambda, n, s )

# this will obviously produce different results
# if we inadvertently interchange the roles of s and n

# math interlude: show that the MLE of lambda is the sample mean

print( lambda.hat.mle <- mean( y ) )

# [1] 5043.714

# math interlude: write down the log likelihood function
# in this model

exponential.log.likelihood <- function( lambda, s, n ) {

  log.likelihood <- - n * log( lambda ) - s / lambda

  return( log.likelihood )

}

# let's plot the log likelihood for lambda between 2000
# and 12000

lambda.grid <- seq( 2000, 12000, length = 500 )

plot( lambda.grid, exponential.log.likelihood( lambda.grid, s, n ),
  type = 'l', lwd = 2, col = 'cadetblue4', xlab = 'lambda',
  ylab = 'log likelihood', main = 'Exponential sampling model' )

# math interlude: show that the inverse cdf (quantile function)
# in the Exponential sampling model is 

#   F^( -1 )( p ) = - lambda * log( 1 - p )

exponential.inverse.cdf <- function( lambda, p ) {

  quantile <- - lambda * log( 1 - p )

  return( quantile )

}

# let's make the Exponential probability plot
# and see if it looks like the line y = x

plot( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ), 
  sort( y ), xlab = 'Exponential quantiles', ylab = 'y',
  xlim = c( 0, max( y ) ), type = 'l', lwd = 2, col = 'red' )

abline( 0, 1, lwd = 2, col = 'black', lty = 2 )

#########################################################################

# making the table in take-home test 1 problem 2(B)(v)(b)

inverse.gamma.mean.sd <- function( alpha, beta ) {

  mean <- beta / ( alpha - 1 )

  sd <- beta / ( ( alpha - 1 ) * sqrt( alpha - 2 ) )

  return( c( mean, sd ) )

}

alpha <- 33 / 4

beta <- 32625

print( prior.inverse.gamma.mean.sd <- 
  inverse.gamma.mean.sd( alpha, beta ) )

# [1] 4500 1800

print( likelihood.inverse.gamma.mean.sd <- 
  inverse.gamma.mean.sd( n - 1, s ) )

# [1] 5884.333 1774.193

print( posterior.inverse.gamma.mean.sd <- 
  inverse.gamma.mean.sd( alpha + n, beta + s ) )

# [1] 4858.212 1079.603

#      prior likelihood posterior

# mean  4500   5884.3     4858.2    posterior mean has to be between prior and lik. mean
# sd    1800   1774.2     1079.6    posterior sd has to be smaller than both prior and
#                                     lik. sd 

# get the inverse gamma density and quantile functions from CRAN

install.packages( 'invgamma' )

library( invgamma )

# plotting the prior, likelihood and posterior on the same graph

lambda.grid <- seq( 1000, 12000, length = 500 )

plot( lambda.grid, dinvgamma( lambda.grid, alpha + n, rate = ( beta + s ) ),
  type = 'l', lwd = 2, lty = 1, col = 'black', xlab = 'lambda',
  ylab = 'Density', main = 'THT 1 problem 2(B)(v)(c)' )

lines( lambda.grid, dinvgamma( lambda.grid, n - 1, rate = s ),
  lwd = 2, lty = 2, col = 'blue' )

lines( lambda.grid, dinvgamma( lambda.grid, alpha, rate = beta ),
  lwd = 2, lty = 3, col = 'darkcyan' )

text( 7000, 3.5e-04, 'posterior', col = 'black' )

text( 8000, 2.0e-04, 'likelihood', col = 'blue' )

text( 2000, 2.5e-04, 'prior', col = 'darkcyan' )

##########################################################################

# math interlude: show that the fisher information in this model
# is n / lambda.hat^2, so that the (repeated-sampling) variance
# of the MLE is sqrt( reciprocal( fisher info ) ) = lambda.hat.mle / sqrt( n )

print( se.hat.lambda.hat.mle <- lambda.hat.mle / sqrt( n ) )

# [1] 1347.989

# math interlude: remember that a 100 ( 1 - alpha )% CI for lambda
# looks like

#   lambda.hat.mle +/- Phi^( -1 ) ( 1 - alpha / 2 ) * SE.hat( lambda.hat.mle )

print( z.95 <- qnorm( 1 - 0.05 / 2 ) )

print( ninety.five.percent.interval <- c( lambda.hat.mle - 
  z.95 * se.hat.lambda.hat.mle, lambda.hat.mle + 
  z.95 * se.hat.lambda.hat.mle  ) )

# [1] 2401.704 7685.725

print( z.999 <- qnorm( 0.9995 ) )

print( ninety.nine.point.nine.percent.interval <- c( lambda.hat.mle - 
  z.999 * se.hat.lambda.hat.mle, lambda.hat.mle + 
  z.999 * se.hat.lambda.hat.mle  ) )

# [1]  608.1193 9479.3093

# math interlude: recall from our earlier work that
# the posterior with the conjugate prior is 

#   Gamma^( -1 )( alpha + n, beta + s )

# for 95% posterior interval we want

c( qinvgamma( 0.025, alpha + n, rate = ( beta + s ) ),
   qinvgamma( 0.975, alpha + n, rate = ( beta + s ) ) )

# [1] 3186.027 7381.964

# for 99.9% posterior interval we want

c( qinvgamma( 0.0005, alpha + n, rate = ( beta + s ) ),
   qinvgamma( 0.9995, alpha + n, rate = ( beta + s ) ) )

# [1]  2511.783 10423.188

#       method              95% interval        99.9% interval

# maximum likelihood       2401.7 7685.7       608.1   9479.3
# bayes with this prior    3186.0 7382.0      2511.8  10423.2

# these are quite different: why?

#############################################################################

# the rest of this file covers optional material
# that's not part of take-home test 1

# the red curve and the black line (the target) 
# in the Exponential probability plot above are not far from each other,
# which implies that the Exponential is a decent choice
# with this data set, but the two largest data points are somewhat
# bigger than the Exponential distribution expects; is this evidence
# sufficient to make us look for a better fit?

# we could answer this question with math, but it's easier
# to address with simulation

# here's the plan: (1) first i make a 'null' plot that will
# eventually contain all of the simulation results:

plot( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
  sort( y ), col = 'red', xlab = 'Quantiles', ylab = 'sorted y values',
  main = 'Exponential Sampling Distribution', lwd = 2,
  xlim = c( 0, max( y ) ), ylim = c( 0, max( y ) ),
  type = 'n' )

# (2) then i execute a loop that repeatedly (a) generates a data set
# randomly drawn from the Exponential distribution with parameter
# lambda equal to lambda.hat.mle and (b) superimposes the resulting
# Exponential probability plot on the graph

# the help( rexp ) page says that the built-in function rexp( n, rate ) 
# creates n random draws from the distribution with density

#   f ( y ) = rate * exp( - rate * y )

# this is not the parameterization we're using in the take-home test
# problem; to get the right distribution i have to set 

#   rate = 1 / lambda.hat.mle

# i have to decide how many simulation replications to use;
# i'll start with M = 100

M <- 100

# each simulation replication has to have its own random number seed
# (otherwise each replication will repeat the same results
# over and over again)

for ( m in 1:M ) {

  set.seed( m )

  y.star <- rexp( n, rate = 1 / lambda.hat.mle )

  lines( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
    sort( y.star ), lwd = 1, col = 'black', lty = 3 )

}

abline( 0, 1, lwd = 2, col = 'blue', lty = 2 )

# now i superimpose the *actual* Exponential probability plot
# with our data on top of this plot, to see how unusual
# our data set is

lines( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
  sort( y ), lwd = 3, col = 'red' )

# you can see that our data set lies comfortably in the range
# of what to expect with Exponential probability plots
# of samples of size n = 14 -- in other words, the Exponential
# looks like a good sampling model for this case study

# if you want you can make M bigger and see what happens;
# here i'll use M = 1000 and i'll increase the upper limit
# on the y axis to have less of the plot cut off at the top:

plot( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
  sort( y ), col = 'red', xlab = 'Quantiles', ylab = 'sorted y values',
  main = 'Exponential Sampling Distribution', lwd = 2,
  xlim = c( 0, 17000 ), ylim = c( 0, 55000 ),
  type = 'n' )

M <- 1000

# here's where all of those colors in R called 'grayxx' are helpful,
# for creating the uncertainty band without making it go too black

for ( m in 1:M ) {

  set.seed( m )

  y.star <- rexp( n, rate = 1 / lambda.hat.mle )

  lines( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
    sort( y.star ), lwd = 1, col = 'gray70', lty = 3 )

}

abline( 0, 1, lwd = 2, col = 'blue', lty = 2 )

lines( exponential.inverse.cdf( lambda.hat.mle, ( ( 1:n ) - 0.5 ) / n ),
  sort( y ), lwd = 3, col = 'red' )

#############################################################################