Visible

About

This Shiny app was developed by Don van den Bergh (donvdbergh<at>hotmail.com) for the “Tools for Teaching Quantative Thinking” (TquanT) Erasmus programme. The goal of this shiny app is to visualize the effect of shrinkage estimators and compare their performance to other estimators. This app is intended for educational purposes; the different tabs contain explanations of the used datasets and estimators.

library(shiny) library(shinythemes) library(R2jags) # setwd("C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/") source("functions.R") dataOptions = lapply(1:4, `[`) names(dataOptions) = c("Efron & Morris (1977)", "Olympics (2012 & 2016)", "Eredivisie (2015 - 2016)", "Simulate your own data!") estOptions = lapply(1:2, `[`) names(estOptions) = c("James-Stein", "Hierarchical Bayes") ui = fluidPage(theme = shinytheme("simplex"), includeScript("../../../Matomo-tquant.js"), headerPanel("James-Stein Estimator"), sidebarPanel( selectInput(inputId = "data", label = h3("Select Dataset"), choices = dataOptions, selected = 1), selectInput(inputId = "estimator", label = h3("Select Estimator"), choices = estOptions, selected = 1), # Number of samples conditionalPanel( condition = "input.estimator == 2", sliderInput(inputId = "samples", label = h3("Posterior Samples (1000 burn-in)"), value = 4e3, min = 1e1, step = 1e1, max = 1e4) ), # sample months for eredivisie conditionalPanel( condition = "input.data == 3", sliderInput(inputId = "sampleDate", label = h3("Select months sampled"), min = as.Date("20-08-2015", format = "%d-%m-%Y"), max = as.Date("10-05-2016", format = "%d-%m-%Y"), value = as.Date("01-09-2015", format = "%d-%m-%Y"), timeFormat= "%h") ), # UI for simulating data conditionalPanel( condition = "input.data == 4", sliderInput(inputId = "nBetween", label = "Choose the number of persons.", min = 5, max = 5e2, value = 20), sliderInput(inputId = "nWithin", label = "Choose the number of samples within persons.", min = 5, max = 100, value = 10), sliderInput(inputId = "groupDistVar", label = "Choose your groupDistVar", min = .5, max = 10, value = 5, step = .5), sliderInput(inputId = "grandVar", label = "Choose your population variance", min = .5, max = 10, value = 1, step = .5)) ), # Plots - Data description - Estimator description mainPanel( conditionalPanel( condition="output.estimator == 2", h4('Visible') ), tabsetPanel( tabPanel("Plots", plotOutput("plot"), tableOutput("table")), tabPanel("Information about the dataset", uiOutput("dataDescription"), tableOutput("tableDescription") ), tabPanel("Information about the estimator", uiOutput("estDescription") ), tabPanel("About", includeMarkdown("Descriptions/About.Rmd")) ) ) ) server = function(input, output) { # get reactive arguments uIn = reactive({ samples = input$samples set.seed(1) # same results every time # select function shrinkFun = switch (input$estimator, "1" = JS.mean, "2" = hierarchicalBayes ) # replace spaces in estimator name by line breaks estName0 = names(estOptions)[as.integer(input$estimator)] # space separated estName = gsub(" ", "\n", estName0) # line break separated # data dependent plotting adjustments plotTitle = ifelse(input$data == 4L, "Simulated data", names(dataOptions)[as.integer(input$data)]) axLabels = c("Mean", estName, "True\nMean") # select data if (input$data == 1L) { dat = read.table("Datasets/EffronMorris.txt", header = TRUE, sep = ",") obsMean = dat[, 2] trueMean = dat[, 3] } else if (input$data == 2L) { dat = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ") obsMean = dat$Population.per.Medal2 #dat$Medals dat2 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ") trueMean = dat2$Population.per.Medal2 #dat$Medals axLabels[c(1, 3)] = c("London\n2012", "Rio\nde Janeiro\n2016") } else if (input$data == 3L) { dat = read.table("Datasets/Eredivisie2015.txt", header = TRUE, sep = " ") dat$Date = as.Date(dat$Date) obsMean = calcPointsDate(dat, input$sampleDate)[3, ] trueMean = calcPointsDate(dat, Sys.Date())[3, ] } else { # input$data == 5L x = genData(nBetween = input$nBetween, nWithin = input$nWithin, args1 = list(mean = 0, sd = input$grandVar), args2 = list(sd = input$groupDistVar)) obsMean = sapply(x$data, mean) trueMean = x$mu } JS = shrinkFun(obsMean, samples) # RMSE table tb = matrix(c(RMSE(obsMean, trueMean), RMSE(JS, trueMean)), 2, 1, dimnames = list(c("Observed Mean", estName0), "Root Mean Squared Error")) # return reactive output return(list(obsMean = obsMean, JS = JS, trueMean = trueMean, tb = tb, plotTitle = plotTitle, input = as.integer(input$data), estName = estName, axLabels = axLabels)) }) # make plots output$plot = renderPlot({ if (!is.null(uIn()$JS)) { par(las = 1, bty = "n", mar = c(5, 6, 4, 2) + .1) shrinkagePlot(JS = uIn()$JS, Mu = uIn()$obsMean, trueMean = uIn()$trueMean, estName = uIn()$estName, pch = 19, lty = 2, yaxt = "n", type = "b", ylab = "", xlab = "Estimate", main = uIn()$plotTitle, ylim = c(0, 2), col = rainbow(length(uIn()$obsMean))) axis(2, at = 2:0, labels = uIn()$axLabels, las = 1) } }) # RMSE table output$table = renderTable({if (!is.null(uIn()$tb)) uIn()$tb}, include.rownames=TRUE) # small description of the data output$dataDescription = renderUI({ switch(input$data, "1" = includeMarkdown("Descriptions/Efron and Morris.Rmd"), "2" = includeMarkdown("Descriptions/Olympics.Rmd"), "3" = includeMarkdown("Descriptions/Eredivisie.Rmd") ) }) # tables of data description output$tableDescription = renderTable({ switch(input$data, "1" = NULL, "2" = NULL, "3" = eredivisieTable() ) }) # small description of the estimator output$estDescription = renderUI({ switch(input$estimator, "1" = withMathJax(includeMarkdown("Descriptions/JamesStein.Rmd")), "2" = withMathJax(includeMarkdown("Descriptions/HierarchicalBayes.Rmd")) ) }) } # run shiny app shinyApp(ui = ui, server = server)

genData = function(nBetween, nWithin, fun1 = 'rnorm', fun2 = 'rnorm', args1 = list(mean = 1e2, sd = 1), args2 = list(sd = 5)) { args11 = args1[names(args1) %in% names(formals(fun1))] # only retain function args actually used in fun1 args22 = args2[names(args2) %in% names(formals(fun2))] # only retain function args actually used in fun2 mu = do.call(fun1, c(list(n = nBetween), args11)) # do function fun1 with arguments # make a list where every element in data.list with fun2 executed with arguments data.list = lapply(1L:nBetween, function(i) { do.call(fun2, c(list(n = nWithin, mu[i]), args22)) }) # return output, data contains 100 datasets, mu contains the true means return(list(data = data.list, mu = mu)) } # x = genData(...)$data JS.est = function(x) { y = unlist(lapply(x, mean)) # calc mean by group yh = mean(unlist(x)) # calc mean of all data together sig = var(y) # calc variance of the group means if (is.na(sig)) { sig = 0 } k = length(x) # number of means to estimate (== number, of groups) c = 1 - (k-3)*sig / (sum((y - yh)^2)) # formula for shrinkage factor from paper if (is.nan(c)) { c = 1 } z = yh + c * (y - yh) # formula for stein estimates from paper return(list(z = z, c = c)) # return output } JS.mean = function(y, dataset = NULL, samples = NULL) { sig = var(y) if (is.na(sig)) sig = 0 k = length(y) yh = mean(y) c = 1 - (k-3)*sig / (sum((y - yh)^2)) # formula for shrinkage factor from paper if (is.nan(c)) { c = 1 } z = yh + c * (y - yh) return(z) #return(list(z = z, c = c)) # return output } EB = function(z) { (1 - (length(z) - 2) / sum(z^2))*z } RMSE = function(x, y) { return(sqrt(mean((x - y)^2))) } # sample a % of observations and calculate the mean of it - deprecated? partialMean = function(x, sample) { mean(sample(x, ceiling(length(x)*sample), replace = FALSE)) } partialLength = function(x, sample) { length(sample(x, ceiling(length(x)*sample), replace = FALSE)) } shrinkagePlot = function(JS, Mu, trueMean, estName, usePlotly = FALSE, useGgplot = FALSE, ...) { x = t(cbind(Mu, JS, trueMean)) # bind estimates in a matrix x = as.matrix(x[, order(x[1, ])]) # order matrix by first column estName = gsub(" ", "\n", estName) if (!usePlotly && !useGgplot) { y = t(matrix(2:0, ncol(x), 3, TRUE)) y = y + apply(x, 2, duplicated)*.1 matplot(x, y, ...) # plot matrix. } else { dat = data.frame(x = c(x), y = c(t(matrix(factor(c('Sample\nMean', estName, 'True\nMean')), ncol(x), 3, TRUE))), group = factor(rep(1:ncol(x), 1, NA, 3))) if (useGgplot) { library(ggplot2) g = ggplot(data = dat, aes(x = x, y = y, group = group, color = group)) + geom_line() + geom_point() + scale_y_discrete('') if (usePlotly) { library(plotly) g = ggplotly(g) } } else { library(plotly) levels(dat[, 2])[levels(dat[, 2])=='James\nStein'] <- 'James<br>Stein' g = plot_ly(data = dat, x = x, y = y, group = group, type = 'line', color = group) %>% layout(yaxis = list(title = '', autorange = "reversed")) } return(suppressWarnings(g)) } } # Eredivisie functions calcPointsDate = function(dat, date) { idx = dat$Date < date return(calcPoints(dat[idx, ])) } calcPoints = function(dat) { clubs = unique(dat$Home) out = matrix(0, 3, ncol = length(clubs)) colnames(out) = clubs for (i in seq_along(clubs)) { ih = dat$Home == clubs[i] ia = dat$Away == clubs[i] out[1, i] = sum(dat$homeScore[ih], dat$awayScore[ia]) out[2, i] = sum(ih, ia) } out[3, ] = out[1, ] / out[2, ] return(out) } eredivisieTable = function() { df = read.csv(file = "Descriptions/EredivisieTable.txt", header = FALSE, colClasses = c("character", rep("integer", 8))) colnames(df) = c("Club", "Played", "Won", "Tied", "Lost", "Points", "Goals made", "Goals against", "Goal Difference") df$`Goal / Match Ratio` = round(df$`Goals made` / 34, 3) return(df) } # Olympics functions olympicsTable = function() { d12 = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ") d16 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ") colnames(d16)[5] = "Population per Medal" return(d16) } # hierarchicalBayes functions invLogit = function(x) exp(x) / (1 + exp(x)) modelBinom = function() { mu ~ dnorm(0, 1) prec ~ dgamma(.0001, .0001) sigma <- 1/sqrt(prec) for (i in 1:nt) { theta[i] ~ dnorm(mu, prec) } for (i in 1:nt) { obsCount[i] ~ dbinom(ilogit(theta[i]), obsN[i]) } } modelPoisson = function() { alpha ~ dgamma(1, 1) beta ~ dgamma(1, 1) for (i in 1:nt) { theta[i] ~ dgamma(alpha, beta) } for (i in 1:nt) { obsCount[i] ~ dpois(theta[i]) } } modelNormal = function() { mu ~ dnorm(0, .001) prec ~ dgamma(.001, .001) sigma <- 1/sqrt(prec) for (i in 1:nt) { theta[i] ~ dnorm(mu, prec) } for (i in 1:nt) { obsCount[i] ~ dnorm(theta[i], prec) } } hierarchicalBayes = function(obs, samples = 5e3, debug = FALSE) { if (all(obs <= 1 & obs >= 0)) { # Proportions: Effron / Eredivisie obsCount = round(obs * 45) model = modelBinom obsN = rep(45, length(obsCount)) data.names = c('nt', 'obsCount', 'obsN') handler = invLogit } else { handler = identity obsCount = obs data.names = c('nt', 'obsCount') if (all(as.integer(obsCount) == obsCount)) { # Counts: Olympics data model = modelPoisson } else { # simulated normal normal model model = modelNormal } } nt = length(obsCount) init = list(list(theta = rep(mean(obs), nt))) par = "theta" capture.output( res <- jags(data.names, init, par, model, n.iter = 1e3 + samples, n.chains = 1, n.burnin = 1000, n.thin = 5)) theta.est = res$BUGSoutput$sims.list$theta theta.est = handler(theta.est) if (debug) { return(res) } else { return(colMeans(theta.est)) } }

rm(list = ls()) setwd('C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/') source('functions.R') # tests test = genData(nBetween = 10, nWithin = 10, fun2 = "rpois") obsMean = sapply(test$data, mean) trueMean = test$mu boxplot(test$data, las = 1, bty = 'n') points(seq_along(test$data) - .25, obsMean, pch = 1, col = 'dodgerblue') points(seq_along(test$data) - .25, trueMean, pch = 1, col = 'darkmagenta') # two versions of the estimator: check they are completely equal JS = JS.mean(obsMean) JS2 = JS.est(test$data) all(mapply(function(x, y) all.equal(x, y), JS, JS2$z)) # add JS to boxplot points(seq_along(test$data) + .25, test$mu, pch = 2, col = 'firebrick2') # shrinkageplots shrinkagePlot(JS, obsMean, trueMean, estName = "James\nStein", type = 'b', lty = 1, pch = 19, yaxt = 'n', bty = 'n', ylab = '') axis(2, at = 2:0, labels = c('Mean', 'James\nStein', 'True\nMean'), las = 1) # RMSE should be smaller for James Stein r0 = RMSE(obsMean, trueMean) r1 = RMSE(JS, trueMean) r1 < r0 # additional plot options shrinkagePlot(JS, test$mu, trueMean, usePlotly = FALSE, useGgplot = TRUE) suppressWarnings(shrinkagePlot(JS, test$mu, trueMean, usePlotly = TRUE, useGgplot = FALSE)) suppressWarnings(shrinkagePlot(JS, test$mu, trueMean, usePlotly = TRUE, useGgplot = TRUE)) # Hierarchical bayes testing: # Effron & Morris dat = read.table('Datasets/EffronMorris.txt', header = TRUE, sep = ',') test = hierarchicalBayes(dat$avg45, debug = TRUE) # Soccer data dat = read.table('Datasets/Eredivisie2015.txt', header = TRUE, sep = " ") dat$Date = as.Date(dat$Date) Date = as.Date("25-10-2015", format = "%d-%m-%Y") tmp = calcPointsDate(dat, Date) obsCount = tmp[1, ] obsN = tmp[2, ] test = hierarchicalBayes(obsCount, samples = 1000L, debug = TRUE) # Olympics dat = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ") test = hierarchicalBayes(dat$Medals) # Simulated data test = hierarchicalBayes(obsMean)

library(shiny) library(shinythemes) library(R2jags) # setwd("C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/") source("functions.R") dataOptions = lapply(1:4, `[`) names(dataOptions) = c("Efron & Morris (1977)", "Olympics (2012 & 2016)", "Eredivisie (2015 - 2016)", "Simulate your own data!") estOptions = lapply(1:2, `[`) names(estOptions) = c("James-Stein", "Hierarchical Bayes") ui = fluidPage(theme = shinytheme("simplex"), headerPanel("James-Stein Estimator"), sidebarPanel( selectInput(inputId = "data", label = h3("Select Dataset"), choices = dataOptions, selected = 1), selectInput(inputId = "estimator", label = h3("Select Estimator"), choices = estOptions, selected = 1), # Number of samples conditionalPanel( condition = "input.estimator == 2", sliderInput(inputId = "samples", label = h3("Posterior Samples (1000 burn-in)"), value = 4e3, min = 1e1, step = 1e1, max = 1e4) ), # sample months for eredivisie conditionalPanel( condition = "input.data == 3", sliderInput(inputId = "sampleDate", label = h3("Select months sampled"), min = as.Date("20-08-2015", format = "%d-%m-%Y"), max = as.Date("10-05-2016", format = "%d-%m-%Y"), value = as.Date("01-09-2015", format = "%d-%m-%Y"), timeFormat= "%h") ), # UI for simulating data conditionalPanel( condition = "input.data == 4", sliderInput(inputId = "nBetween", label = "Choose the number of persons.", min = 5, max = 5e2, value = 20), sliderInput(inputId = "nWithin", label = "Choose the number of samples within persons.", min = 5, max = 100, value = 10), sliderInput(inputId = "groupDistVar", label = "Choose your groupDistVar", min = .5, max = 10, value = 5, step = .5), sliderInput(inputId = "grandVar", label = "Choose your population variance", min = .5, max = 10, value = 1, step = .5)) ), # Plots - Data description - Estimator description mainPanel( conditionalPanel( condition="output.estimator == 2", h4('Visible') ), tabsetPanel( tabPanel("Plots", plotOutput("plot"), tableOutput("table")), tabPanel("Information about the dataset", uiOutput("dataDescription"), tableOutput("tableDescription") ), tabPanel("Information about the estimator", uiOutput("estDescription") ), tabPanel("About", includeMarkdown("Descriptions/About.rmd")) ) ) ) server = function(input, output) { # get reactive arguments uIn = reactive({ samples = input$samples set.seed(1) # same results every time # select function shrinkFun = switch (input$estimator, "1" = JS.mean, "2" = hierarchicalBayes ) # replace spaces in estimator name by line breaks estName0 = names(estOptions)[as.integer(input$estimator)] # space separated estName = gsub(" ", "\n", estName0) # line break separated # data dependent plotting adjustments plotTitle = ifelse(input$data == 4L, "Simulated data", names(dataOptions)[as.integer(input$data)]) axLabels = c("Mean", estName, "True\nMean") # select data if (input$data == 1L) { dat = read.table("Datasets/EffronMorris.txt", header = TRUE, sep = ",") obsMean = dat[, 2] trueMean = dat[, 3] } else if (input$data == 2L) { dat = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ") obsMean = dat$Population.per.Medal2 #dat$Medals dat2 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ") trueMean = dat2$Population.per.Medal2 #dat$Medals axLabels[c(1, 3)] = c("London\n2012", "Rio\nde Janeiro\n2016") } else if (input$data == 3L) { dat = read.table("Datasets/Eredivisie2015.txt", header = TRUE, sep = " ") dat$Date = as.Date(dat$Date) obsMean = calcPointsDate(dat, input$sampleDate)[3, ] trueMean = calcPointsDate(dat, Sys.Date())[3, ] } else { # input$data == 5L x = genData(nBetween = input$nBetween, nWithin = input$nWithin, args1 = list(mean = 0, sd = input$grandVar), args2 = list(sd = input$groupDistVar)) obsMean = sapply(x$data, mean) trueMean = x$mu } JS = shrinkFun(obsMean, samples) # RMSE table tb = matrix(c(RMSE(obsMean, trueMean), RMSE(JS, trueMean)), 2, 1, dimnames = list(c("Observed Mean", estName0), "Root Mean Squared Error")) # return reactive output return(list(obsMean = obsMean, JS = JS, trueMean = trueMean, tb = tb, plotTitle = plotTitle, input = as.integer(input$data), estName = estName, axLabels = axLabels)) }) # make plots output$plot = renderPlot({ if (!is.null(uIn()$JS)) { par(las = 1, bty = "n", mar = c(5, 6, 4, 2) + .1) shrinkagePlot(JS = uIn()$JS, Mu = uIn()$obsMean, trueMean = uIn()$trueMean, estName = uIn()$estName, pch = 19, lty = 2, yaxt = "n", type = "b", ylab = "", xlab = "Estimate", main = uIn()$plotTitle, ylim = c(0, 2), col = rainbow(length(uIn()$obsMean))) axis(2, at = 2:0, labels = uIn()$axLabels, las = 1) } }) # RMSE table output$table = renderTable({if (!is.null(uIn()$tb)) uIn()$tb}, include.rownames=TRUE) # small description of the data output$dataDescription = renderUI({ switch(input$data, "1" = includeMarkdown("Descriptions/Efron and Morris.Rmd"), "2" = includeMarkdown("Descriptions/Olympics.Rmd"), "3" = includeMarkdown("Descriptions/Eredivisie.Rmd") ) }) # tables of data description output$tableDescription = renderTable({ switch(input$data, "1" = NULL, "2" = NULL, "3" = eredivisieTable() ) }) # small description of the estimator output$estDescription = renderUI({ switch(input$estimator, "1" = withMathJax(includeMarkdown("Descriptions/JamesStein.Rmd")), "2" = withMathJax(includeMarkdown("Descriptions/HierarchicalBayes.Rmd")) ) }) } # run shiny app shinyApp(ui = ui, server = server)