#########################################################################

# likelihood and bayesian analysis of nb10 case study, part 2:
# considering the family of t distributions for the sampling model

# unbuffer the output

# set the correct working directory

#########################################################################

# here's a function that makes a probability (quantile-quantile) plot
# for the NB10 data using as target sampling distribution
# the t family

# nu (a positive number ) is the degrees of freedom of the t distribution

# y is the NB10 data set

# M (a positive integer) is the number of simulation replications

# seed (a positive integer) is the random number seed, for replicability
# of results

t.probability.plot <- function( nu, y, M, seed ) {

  set.seed( seed )

  n <- length( y )

  standardized.y <- ( y - mean( y ) ) / sd( y )

  plot( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y ), col = 'red', 
    xlab = 'Quantiles', ylab = 'sorted standardized y values', 
    main = paste( 'T Sampling Distribution (Degrees of Freedom): ', 
    nu, sep = '' ), lwd = 2, type = 'n', xlim = c( -6, 6 ), 
    ylim = c( -10, 10 ) )

  for ( m in 1:M ) {

    y.star <- rt( n, nu )

    lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y.star ),
      lwd = 1, col = 'gray70', lty = 3 )

  }

  abline( 0, 1, lwd = 2, col = 'black', lty = 2 )

  lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( standardized.y ), 
    lwd = 3, col = 'red' )

  text( -2, 0.5, 'NB10', col = 'red' )

  lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( rnorm( n ) ), 
    lwd = 4, col = 'darkcyan' )

  text( 5, 0.5, 'Normal', col = 'darkcyan' )

  return( 'Quack!' )

}

y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
        398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 
        408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 
        403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 
        405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 
        404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 
        404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 
        412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 
        408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 
        401., 407., 412., 375., 409., 406., 398., 406., 403., 404. 
)

M <- 1000

seed <- 3412519

nu <- 10

t.probability.plot( nu, y, M, seed )

# a sample of size 100 from the Normal( 0, 1 ) distribution
# is included for reference, as a kind of control:
# the smaller nu gets, the worse the fit should be
# for the simulated Normal sample

# now just run the function with different nu values,
# for example starting with nu = 1 (the Cauchy distribution)
# and increasing by 1 successively until the plot goes from 

#   nu = 1 (NB10 data's tails not that heavy)

# to

#   nu = ? (NB10 data's tails not that light)

# to see if any members of the t family are plausible
# with the NB10 data ...

#########################################################################