############################################################################# # gibbs sampling example: fitting the gaussian model with both # mu and sigma unknown (lecture notes part 9, pages 57-71) # the full-conditional distributions are given on page 57 # the next (driver) function implements gibbs sampling for this model # i use the conjugate prior (lecture notes part 8, page 135) # as inputs to this function, # y is the data vector # nu.0 is the prior sample size for sigma^2 # sigma2.0 is the prior estimate of sigma^2 # mu.0 is the prior estimate of mu # kappa.0 is the prior sample size for mu # mu.initial is the starting value for mu # sigma2.initial is the starting value for sigma^2 # M is the number of monitoring iterations # B is the number of burn-in iterations # seed is the initialization random number seed gibbs.sampling.example <- function( y, nu.0, sigma2.0, mu.0, kappa.0, mu.initial, sigma2.initial, M, B, seed ) { set.seed( seed ) n <- length( y ) y.bar <- mean( y ) mcmc.data.set <- matrix( NA, M + B + 1, 4 ) mcmc.data.set[ 1, ] <- c( mu.initial, sigma2.initial, sqrt( sigma2.initial ), mu.initial ) for ( i in 2:( M + B + 1 ) ) { mu.star <- rnorm( 1, ( kappa.0 * mu.0 + n * y.bar ) / ( kappa.0 + n ), sqrt( mcmc.data.set[ i - 1, 2 ] / ( kappa.0 + n ) ) ) s2.mu.star <- sum( ( y - mu.star )^2 ) / n sigma2.star <- rsichi2( 1, nu.0 + 1 + n, ( nu.0 * sigma2.0 + kappa.0 * ( mu.star - mu.0 )^2 + n * s2.mu.star ) / ( nu.0 + 1 + n ) ) y.star <- rnorm( 1, mu.star, sqrt( sigma2.star ) ) mcmc.data.set[ i, ] <- c( mu.star, sigma2.star, sqrt( sigma2.star ), y.star ) if ( ( i %% 10000 ) == 0 ) print( i ) } return( mcmc.data.set ) } # here's a simple function that generates IID samples from the # scaled inverse chi-squared distribution rsichi2 <- function( n, nu, s2 ) { return( nu * s2 / rchisq( n, nu ) ) } # let's create a small gaussian data set for illustration; # here n = 10 and the data-generating mean and SD are 50 and 5, # respectively set.seed( 3531229 ) print( y <- sort( signif( rnorm( 10, 50, 5 ), 3 ) ) ) # [1] 43.5 48.5 50.5 51.4 52.2 53.2 53.6 53.9 55.3 61.6 mean( y ) # [1] 52.37 sd( y ) # [1] 4.671914 # here i'll choose a rather informative prior for illustration: # the prior sample sizes for sigma^2 and mu are both 10, # and the prior estimates differ quite a bit from the data-generating values nu.0 <- 10 sigma2.0 <- 15 mu.0 <- 90 kappa.0 <- 10 # i'm deliberately starting the markov chain off # at values far from the posterior means mu.initial <- 200 sigma2.initial <- 0.1 M <- 100000 B <- 1000 seed <- 123456 system.time( mcmc.data.set.1 <- gibbs.sampling.example( y, nu.0, sigma2.0, mu.0, kappa.0, mu.initial, sigma2.initial, M, B, seed ) ) # [1] 10000 # [1] 20000 # . # . # . # [1] 90000 # [1] 100000 # user system elapsed # 1.18 0.09 1.28 str( mcmc.data.set.1 ) # num [1:101001, 1:4] 70 72 65.3 84.5 66.8 ... # the output of the driver function is the mcmc data set, # which has ( 1 + B + M ) rows and 4 columns: the posterior draws # for mu, sigma^2, sigma, and a new y (the predictive distribution) # since this is a conjugate situation, we know what the correct # posterior distribution is, and we can compare the mcmc output # with the right answer print( n <- length( y ) ) # [1] 10 print( nu.n <- nu.0 + n ) # [1] 20 print( y.bar <- mean( y ) ) # [1] 52.37 print( s2 <- var( y ) ) # [1] 21.82678 print( sigma2.n <- ( nu.0 * sigma2.0 + ( n - 1 ) * s2 + kappa.0 * n * ( y.bar - mu.0 )^2 / ( kappa.0 + n ) ) / nu.n ) # [1] 371.3263 print( mu.n <- ( kappa.0 * mu.0 + n * y.bar ) / ( kappa.0 + n ) ) # 70.385 print( kappa.n <- kappa.0 + n ) # 20 # here i generate sigma2.benchmark <- rsichi2( M, nu.n, sigma2.n ) qqplot( mcmc.data.set.1[ ( 1 + B ):( 1 + B + M ), 2 ], sigma2.benchmark, xlab = 'MCMC output for sigma^2', ylab = 'Benchmark output for sigma^2' ) abline( 0, 1, lwd = 2, col = 'blue' ) # you can see that the mcmc output essentially coincides # with random draws from the correct marginal posterior for sigma^2 # the next function generates IID samples from the scaled t distribution rscaled.t <- function( n, mu, sigma2, nu ) { return( sqrt( sigma2 ) * rt( n, nu ) + mu ) } mu.benchmark <- rscaled.t( M, mu.n, sigma2.n / kappa.n, nu.n ) qqplot( mcmc.data.set.1[ ( 1 + B ):( 1 + B + M ), 1 ], mu.benchmark, xlab = 'MCMC output for mu', ylab = 'Benchmark output for mu' ) abline( 0, 1, lwd = 2, col = 'blue' ) # again the mcmc output essentially coincides with random draws # from the correct marginal posterior for mu y.next.benchmark <- rscaled.t( M, mu.n, ( kappa.n + 1 ) * sigma2.n / kappa.n, nu.n ) qqplot( mcmc.data.set.1[ ( 1 + B ):( 1 + B + M ), 4 ], y.next.benchmark, xlab = 'MCMC output for y.next', ylab = 'Benchmark output for y.next' ) abline( 0, 1, lwd = 2, col = 'blue' ) # and once again the mcmc output essentially coincides with random draws # from the correct marginal posterior predictive for a new y # let's see what effect the burn-in period had # recall from above that i started the chain off at ridiculous # starting values relative to the actual data-generating mu and sigma^2 # ( mu.initial, sigma2.initial ) = ( 200, 0.1 ) versus # ( mu.data.generating, sigma2.data.generating ) = ( 50, 25 ) par( mfrow = c( 2, 1 ) ) plot( 1:50, mcmc.data.set.1[ 1:50, 1 ], type = 'l', xlab = 'iteration number', ylab = 'mu draws', lwd = 2, col = 'red', ylim = c( 0, 200 ) ) abline( h = mu.n, lwd = 2, lty = 2, col = 'darkcyan' ) abline( v = 2, lwd = 2, lty = 3 ) text( 7.5, 150, 'iteration 2' ) plot( 1:50, mcmc.data.set.1[ 1:50, 2 ], type = 'l', xlab = 'iteration number', ylab = 'sigma^2 draws', lwd = 2, col = 'red', ylim = c( 0, 1000 ) ) abline( h = sigma2.n, lwd = 2, lty = 2, col = 'darkcyan' ) abline( v = 2, lwd = 2, lty = 3 ) text( 7.5, 800, 'iteration 2' ) par( mfrow = c( 1, 1 ) ) # the chain is already in equilibrium at iteration 2 (!) # this is why, in simple problems like the ones we've looked at so far, # a default burn-in of 1,000 iterations (a) is plenty adequate and # (b) does essentially no harm ##############################################################################