library(shiny)
library(shinythemes)
library(R2jags)
# setwd("C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/")
source("functions.R")
dataOptions = lapply(1:4, `[`)
names(dataOptions) = c("Efron & Morris (1977)",
"Olympics (2012 & 2016)",
"Eredivisie (2015 - 2016)",
"Simulate your own data!")
estOptions = lapply(1:2, `[`)
names(estOptions) = c("James-Stein", "Hierarchical Bayes")
ui = fluidPage(theme = shinytheme("simplex"),
includeScript("../../../Matomo-tquant.js"),
headerPanel("James-Stein Estimator"),
sidebarPanel(
selectInput(inputId = "data", label = h3("Select Dataset"),
choices = dataOptions, selected = 1),
selectInput(inputId = "estimator", label = h3("Select Estimator"),
choices = estOptions, selected = 1),
# Number of samples
conditionalPanel(
condition = "input.estimator == 2",
sliderInput(inputId = "samples", label = h3("Posterior Samples (1000 burn-in)"),
value = 4e3, min = 1e1, step = 1e1, max = 1e4)
),
# sample months for eredivisie
conditionalPanel(
condition = "input.data == 3",
sliderInput(inputId = "sampleDate", label = h3("Select months sampled"),
min = as.Date("20-08-2015", format = "%d-%m-%Y"),
max = as.Date("10-05-2016", format = "%d-%m-%Y"),
value = as.Date("01-09-2015", format = "%d-%m-%Y"),
timeFormat= "%h")
),
# UI for simulating data
conditionalPanel(
condition = "input.data == 4",
sliderInput(inputId = "nBetween", label = "Choose the number of persons.",
min = 5, max = 5e2, value = 20),
sliderInput(inputId = "nWithin", label = "Choose the number of samples within persons.",
min = 5, max = 100, value = 10),
sliderInput(inputId = "groupDistVar", label = "Choose your groupDistVar",
min = .5, max = 10, value = 5, step = .5),
sliderInput(inputId = "grandVar", label = "Choose your population variance",
min = .5, max = 10, value = 1, step = .5))
),
# Plots - Data description - Estimator description
mainPanel(
conditionalPanel(
condition="output.estimator == 2",
h4('Visible')
),
tabsetPanel(
tabPanel("Plots",
plotOutput("plot"),
tableOutput("table")),
tabPanel("Information about the dataset",
uiOutput("dataDescription"),
tableOutput("tableDescription")
),
tabPanel("Information about the estimator",
uiOutput("estDescription")
),
tabPanel("About",
includeMarkdown("Descriptions/About.Rmd"))
)
)
)
server = function(input, output) {
# get reactive arguments
uIn = reactive({
samples = input$samples
set.seed(1) # same results every time
# select function
shrinkFun = switch (input$estimator,
"1" = JS.mean,
"2" = hierarchicalBayes
)
# replace spaces in estimator name by line breaks
estName0 = names(estOptions)[as.integer(input$estimator)] # space separated
estName = gsub(" ", "\n", estName0) # line break separated
# data dependent plotting adjustments
plotTitle = ifelse(input$data == 4L, "Simulated data",
names(dataOptions)[as.integer(input$data)])
axLabels = c("Mean", estName, "True\nMean")
# select data
if (input$data == 1L) {
dat = read.table("Datasets/EffronMorris.txt", header = TRUE, sep = ",")
obsMean = dat[, 2]
trueMean = dat[, 3]
} else if (input$data == 2L) {
dat = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ")
obsMean = dat$Population.per.Medal2 #dat$Medals
dat2 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ")
trueMean = dat2$Population.per.Medal2 #dat$Medals
axLabels[c(1, 3)] = c("London\n2012", "Rio\nde Janeiro\n2016")
} else if (input$data == 3L) {
dat = read.table("Datasets/Eredivisie2015.txt", header = TRUE, sep = " ")
dat$Date = as.Date(dat$Date)
obsMean = calcPointsDate(dat, input$sampleDate)[3, ]
trueMean = calcPointsDate(dat, Sys.Date())[3, ]
} else { # input$data == 5L
x = genData(nBetween = input$nBetween, nWithin = input$nWithin,
args1 = list(mean = 0, sd = input$grandVar),
args2 = list(sd = input$groupDistVar))
obsMean = sapply(x$data, mean)
trueMean = x$mu
}
JS = shrinkFun(obsMean, samples)
# RMSE table
tb = matrix(c(RMSE(obsMean, trueMean), RMSE(JS, trueMean)), 2, 1,
dimnames = list(c("Observed Mean", estName0),
"Root Mean Squared Error"))
# return reactive output
return(list(obsMean = obsMean, JS = JS, trueMean = trueMean, tb = tb, plotTitle = plotTitle,
input = as.integer(input$data), estName = estName, axLabels = axLabels))
})
# make plots
output$plot = renderPlot({
if (!is.null(uIn()$JS)) {
par(las = 1, bty = "n", mar = c(5, 6, 4, 2) + .1)
shrinkagePlot(JS = uIn()$JS, Mu = uIn()$obsMean, trueMean = uIn()$trueMean,
estName = uIn()$estName,
pch = 19, lty = 2, yaxt = "n", type = "b", ylab = "", xlab = "Estimate",
main = uIn()$plotTitle, ylim = c(0, 2),
col = rainbow(length(uIn()$obsMean)))
axis(2, at = 2:0, labels = uIn()$axLabels, las = 1)
}
})
# RMSE table
output$table = renderTable({if (!is.null(uIn()$tb)) uIn()$tb},
include.rownames=TRUE)
# small description of the data
output$dataDescription = renderUI({
switch(input$data,
"1" = includeMarkdown("Descriptions/Efron and Morris.Rmd"),
"2" = includeMarkdown("Descriptions/Olympics.Rmd"),
"3" = includeMarkdown("Descriptions/Eredivisie.Rmd")
)
})
# tables of data description
output$tableDescription = renderTable({
switch(input$data,
"1" = NULL,
"2" = NULL,
"3" = eredivisieTable()
)
})
# small description of the estimator
output$estDescription = renderUI({
switch(input$estimator,
"1" = withMathJax(includeMarkdown("Descriptions/JamesStein.Rmd")),
"2" = withMathJax(includeMarkdown("Descriptions/HierarchicalBayes.Rmd"))
)
})
}
# run shiny app
shinyApp(ui = ui, server = server)
genData = function(nBetween, nWithin, fun1 = 'rnorm', fun2 = 'rnorm',
args1 = list(mean = 1e2, sd = 1), args2 = list(sd = 5)) {
args11 = args1[names(args1) %in% names(formals(fun1))] # only retain function args actually used in fun1
args22 = args2[names(args2) %in% names(formals(fun2))] # only retain function args actually used in fun2
mu = do.call(fun1, c(list(n = nBetween), args11)) # do function fun1 with arguments
# make a list where every element in data.list with fun2 executed with arguments
data.list = lapply(1L:nBetween, function(i) {
do.call(fun2, c(list(n = nWithin, mu[i]), args22))
})
# return output, data contains 100 datasets, mu contains the true means
return(list(data = data.list, mu = mu))
}
# x = genData(...)$data
JS.est = function(x) {
y = unlist(lapply(x, mean)) # calc mean by group
yh = mean(unlist(x)) # calc mean of all data together
sig = var(y) # calc variance of the group means
if (is.na(sig)) {
sig = 0
}
k = length(x) # number of means to estimate (== number, of groups)
c = 1 - (k-3)*sig / (sum((y - yh)^2)) # formula for shrinkage factor from paper
if (is.nan(c)) {
c = 1
}
z = yh + c * (y - yh) # formula for stein estimates from paper
return(list(z = z, c = c)) # return output
}
JS.mean = function(y, dataset = NULL, samples = NULL) {
sig = var(y)
if (is.na(sig)) sig = 0
k = length(y)
yh = mean(y)
c = 1 - (k-3)*sig / (sum((y - yh)^2)) # formula for shrinkage factor from paper
if (is.nan(c)) {
c = 1
}
z = yh + c * (y - yh)
return(z)
#return(list(z = z, c = c)) # return output
}
EB = function(z) {
(1 - (length(z) - 2) / sum(z^2))*z
}
RMSE = function(x, y) {
return(sqrt(mean((x - y)^2)))
}
# sample a % of observations and calculate the mean of it - deprecated?
partialMean = function(x, sample) {
mean(sample(x, ceiling(length(x)*sample), replace = FALSE))
}
partialLength = function(x, sample) {
length(sample(x, ceiling(length(x)*sample), replace = FALSE))
}
shrinkagePlot = function(JS, Mu, trueMean, estName, usePlotly = FALSE, useGgplot = FALSE, ...) {
x = t(cbind(Mu, JS, trueMean)) # bind estimates in a matrix
x = as.matrix(x[, order(x[1, ])]) # order matrix by first column
estName = gsub(" ", "\n", estName)
if (!usePlotly && !useGgplot) {
y = t(matrix(2:0, ncol(x), 3, TRUE))
y = y + apply(x, 2, duplicated)*.1
matplot(x, y, ...) # plot matrix.
} else {
dat = data.frame(x = c(x),
y = c(t(matrix(factor(c('Sample\nMean', estName, 'True\nMean')), ncol(x), 3, TRUE))),
group = factor(rep(1:ncol(x), 1, NA, 3)))
if (useGgplot) {
library(ggplot2)
g = ggplot(data = dat, aes(x = x, y = y, group = group, color = group)) +
geom_line() +
geom_point() + scale_y_discrete('')
if (usePlotly) {
library(plotly)
g = ggplotly(g)
}
} else {
library(plotly)
levels(dat[, 2])[levels(dat[, 2])=='James\nStein'] <- 'James<br>Stein'
g = plot_ly(data = dat, x = x, y = y, group = group, type = 'line',
color = group) %>% layout(yaxis = list(title = '', autorange = "reversed"))
}
return(suppressWarnings(g))
}
}
# Eredivisie functions
calcPointsDate = function(dat, date) {
idx = dat$Date < date
return(calcPoints(dat[idx, ]))
}
calcPoints = function(dat) {
clubs = unique(dat$Home)
out = matrix(0, 3, ncol = length(clubs))
colnames(out) = clubs
for (i in seq_along(clubs)) {
ih = dat$Home == clubs[i]
ia = dat$Away == clubs[i]
out[1, i] = sum(dat$homeScore[ih], dat$awayScore[ia])
out[2, i] = sum(ih, ia)
}
out[3, ] = out[1, ] / out[2, ]
return(out)
}
eredivisieTable = function() {
df = read.csv(file = "Descriptions/EredivisieTable.txt", header = FALSE,
colClasses = c("character", rep("integer", 8)))
colnames(df) = c("Club", "Played", "Won", "Tied", "Lost", "Points", "Goals made",
"Goals against", "Goal Difference")
df$`Goal / Match Ratio` = round(df$`Goals made` / 34, 3)
return(df)
}
# Olympics functions
olympicsTable = function() {
d12 = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ")
d16 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ")
colnames(d16)[5] = "Population per Medal"
return(d16)
}
# hierarchicalBayes functions
invLogit = function(x) exp(x) / (1 + exp(x))
modelBinom = function() {
mu ~ dnorm(0, 1)
prec ~ dgamma(.0001, .0001)
sigma <- 1/sqrt(prec)
for (i in 1:nt) {
theta[i] ~ dnorm(mu, prec)
}
for (i in 1:nt) {
obsCount[i] ~ dbinom(ilogit(theta[i]), obsN[i])
}
}
modelPoisson = function() {
alpha ~ dgamma(1, 1)
beta ~ dgamma(1, 1)
for (i in 1:nt) {
theta[i] ~ dgamma(alpha, beta)
}
for (i in 1:nt) {
obsCount[i] ~ dpois(theta[i])
}
}
modelNormal = function() {
mu ~ dnorm(0, .001)
prec ~ dgamma(.001, .001)
sigma <- 1/sqrt(prec)
for (i in 1:nt) {
theta[i] ~ dnorm(mu, prec)
}
for (i in 1:nt) {
obsCount[i] ~ dnorm(theta[i], prec)
}
}
hierarchicalBayes = function(obs, samples = 5e3, debug = FALSE) {
if (all(obs <= 1 & obs >= 0)) { # Proportions: Effron / Eredivisie
obsCount = round(obs * 45)
model = modelBinom
obsN = rep(45, length(obsCount))
data.names = c('nt', 'obsCount', 'obsN')
handler = invLogit
} else {
handler = identity
obsCount = obs
data.names = c('nt', 'obsCount')
if (all(as.integer(obsCount) == obsCount)) { # Counts: Olympics data
model = modelPoisson
} else { # simulated normal normal model
model = modelNormal
}
}
nt = length(obsCount)
init = list(list(theta = rep(mean(obs), nt)))
par = "theta"
capture.output(
res <- jags(data.names, init, par, model, n.iter = 1e3 + samples,
n.chains = 1, n.burnin = 1000, n.thin = 5))
theta.est = res$BUGSoutput$sims.list$theta
theta.est = handler(theta.est)
if (debug) {
return(res)
} else {
return(colMeans(theta.est))
}
}
rm(list = ls())
setwd('C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/')
source('functions.R')
# tests
test = genData(nBetween = 10, nWithin = 10, fun2 = "rpois")
obsMean = sapply(test$data, mean)
trueMean = test$mu
boxplot(test$data, las = 1, bty = 'n')
points(seq_along(test$data) - .25, obsMean, pch = 1, col = 'dodgerblue')
points(seq_along(test$data) - .25, trueMean, pch = 1, col = 'darkmagenta')
# two versions of the estimator: check they are completely equal
JS = JS.mean(obsMean)
JS2 = JS.est(test$data)
all(mapply(function(x, y) all.equal(x, y), JS, JS2$z))
# add JS to boxplot
points(seq_along(test$data) + .25, test$mu, pch = 2, col = 'firebrick2')
# shrinkageplots
shrinkagePlot(JS, obsMean, trueMean, estName = "James\nStein",
type = 'b', lty = 1, pch = 19, yaxt = 'n', bty = 'n', ylab = '')
axis(2, at = 2:0, labels = c('Mean', 'James\nStein', 'True\nMean'), las = 1)
# RMSE should be smaller for James Stein
r0 = RMSE(obsMean, trueMean)
r1 = RMSE(JS, trueMean)
r1 < r0
# additional plot options
shrinkagePlot(JS, test$mu, trueMean,
usePlotly = FALSE, useGgplot = TRUE)
suppressWarnings(shrinkagePlot(JS, test$mu, trueMean,
usePlotly = TRUE, useGgplot = FALSE))
suppressWarnings(shrinkagePlot(JS, test$mu, trueMean,
usePlotly = TRUE, useGgplot = TRUE))
# Hierarchical bayes testing:
# Effron & Morris
dat = read.table('Datasets/EffronMorris.txt', header = TRUE, sep = ',')
test = hierarchicalBayes(dat$avg45, debug = TRUE)
# Soccer data
dat = read.table('Datasets/Eredivisie2015.txt', header = TRUE, sep = " ")
dat$Date = as.Date(dat$Date)
Date = as.Date("25-10-2015", format = "%d-%m-%Y")
tmp = calcPointsDate(dat, Date)
obsCount = tmp[1, ]
obsN = tmp[2, ]
test = hierarchicalBayes(obsCount, samples = 1000L, debug = TRUE)
# Olympics
dat = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ")
test = hierarchicalBayes(dat$Medals)
# Simulated data
test = hierarchicalBayes(obsMean)
library(shiny)
library(shinythemes)
library(R2jags)
# setwd("C:/Users/donvd/_Laptop/ResMas/Conference/TquanT/Shiny/JamesSteinRemake/")
source("functions.R")
dataOptions = lapply(1:4, `[`)
names(dataOptions) = c("Efron & Morris (1977)",
"Olympics (2012 & 2016)",
"Eredivisie (2015 - 2016)",
"Simulate your own data!")
estOptions = lapply(1:2, `[`)
names(estOptions) = c("James-Stein", "Hierarchical Bayes")
ui = fluidPage(theme = shinytheme("simplex"),
headerPanel("James-Stein Estimator"),
sidebarPanel(
selectInput(inputId = "data", label = h3("Select Dataset"),
choices = dataOptions, selected = 1),
selectInput(inputId = "estimator", label = h3("Select Estimator"),
choices = estOptions, selected = 1),
# Number of samples
conditionalPanel(
condition = "input.estimator == 2",
sliderInput(inputId = "samples", label = h3("Posterior Samples (1000 burn-in)"),
value = 4e3, min = 1e1, step = 1e1, max = 1e4)
),
# sample months for eredivisie
conditionalPanel(
condition = "input.data == 3",
sliderInput(inputId = "sampleDate", label = h3("Select months sampled"),
min = as.Date("20-08-2015", format = "%d-%m-%Y"),
max = as.Date("10-05-2016", format = "%d-%m-%Y"),
value = as.Date("01-09-2015", format = "%d-%m-%Y"),
timeFormat= "%h")
),
# UI for simulating data
conditionalPanel(
condition = "input.data == 4",
sliderInput(inputId = "nBetween", label = "Choose the number of persons.",
min = 5, max = 5e2, value = 20),
sliderInput(inputId = "nWithin", label = "Choose the number of samples within persons.",
min = 5, max = 100, value = 10),
sliderInput(inputId = "groupDistVar", label = "Choose your groupDistVar",
min = .5, max = 10, value = 5, step = .5),
sliderInput(inputId = "grandVar", label = "Choose your population variance",
min = .5, max = 10, value = 1, step = .5))
),
# Plots - Data description - Estimator description
mainPanel(
conditionalPanel(
condition="output.estimator == 2",
h4('Visible')
),
tabsetPanel(
tabPanel("Plots",
plotOutput("plot"),
tableOutput("table")),
tabPanel("Information about the dataset",
uiOutput("dataDescription"),
tableOutput("tableDescription")
),
tabPanel("Information about the estimator",
uiOutput("estDescription")
),
tabPanel("About",
includeMarkdown("Descriptions/About.rmd"))
)
)
)
server = function(input, output) {
# get reactive arguments
uIn = reactive({
samples = input$samples
set.seed(1) # same results every time
# select function
shrinkFun = switch (input$estimator,
"1" = JS.mean,
"2" = hierarchicalBayes
)
# replace spaces in estimator name by line breaks
estName0 = names(estOptions)[as.integer(input$estimator)] # space separated
estName = gsub(" ", "\n", estName0) # line break separated
# data dependent plotting adjustments
plotTitle = ifelse(input$data == 4L, "Simulated data",
names(dataOptions)[as.integer(input$data)])
axLabels = c("Mean", estName, "True\nMean")
# select data
if (input$data == 1L) {
dat = read.table("Datasets/EffronMorris.txt", header = TRUE, sep = ",")
obsMean = dat[, 2]
trueMean = dat[, 3]
} else if (input$data == 2L) {
dat = read.table("Datasets/Olympics2012.txt", header = TRUE, sep = " ")
obsMean = dat$Population.per.Medal2 #dat$Medals
dat2 = read.table("Datasets/Olympics2016.txt", header = TRUE, sep = " ")
trueMean = dat2$Population.per.Medal2 #dat$Medals
axLabels[c(1, 3)] = c("London\n2012", "Rio\nde Janeiro\n2016")
} else if (input$data == 3L) {
dat = read.table("Datasets/Eredivisie2015.txt", header = TRUE, sep = " ")
dat$Date = as.Date(dat$Date)
obsMean = calcPointsDate(dat, input$sampleDate)[3, ]
trueMean = calcPointsDate(dat, Sys.Date())[3, ]
} else { # input$data == 5L
x = genData(nBetween = input$nBetween, nWithin = input$nWithin,
args1 = list(mean = 0, sd = input$grandVar),
args2 = list(sd = input$groupDistVar))
obsMean = sapply(x$data, mean)
trueMean = x$mu
}
JS = shrinkFun(obsMean, samples)
# RMSE table
tb = matrix(c(RMSE(obsMean, trueMean), RMSE(JS, trueMean)), 2, 1,
dimnames = list(c("Observed Mean", estName0),
"Root Mean Squared Error"))
# return reactive output
return(list(obsMean = obsMean, JS = JS, trueMean = trueMean, tb = tb, plotTitle = plotTitle,
input = as.integer(input$data), estName = estName, axLabels = axLabels))
})
# make plots
output$plot = renderPlot({
if (!is.null(uIn()$JS)) {
par(las = 1, bty = "n", mar = c(5, 6, 4, 2) + .1)
shrinkagePlot(JS = uIn()$JS, Mu = uIn()$obsMean, trueMean = uIn()$trueMean,
estName = uIn()$estName,
pch = 19, lty = 2, yaxt = "n", type = "b", ylab = "", xlab = "Estimate",
main = uIn()$plotTitle, ylim = c(0, 2),
col = rainbow(length(uIn()$obsMean)))
axis(2, at = 2:0, labels = uIn()$axLabels, las = 1)
}
})
# RMSE table
output$table = renderTable({if (!is.null(uIn()$tb)) uIn()$tb},
include.rownames=TRUE)
# small description of the data
output$dataDescription = renderUI({
switch(input$data,
"1" = includeMarkdown("Descriptions/Efron and Morris.Rmd"),
"2" = includeMarkdown("Descriptions/Olympics.Rmd"),
"3" = includeMarkdown("Descriptions/Eredivisie.Rmd")
)
})
# tables of data description
output$tableDescription = renderTable({
switch(input$data,
"1" = NULL,
"2" = NULL,
"3" = eredivisieTable()
)
})
# small description of the estimator
output$estDescription = renderUI({
switch(input$estimator,
"1" = withMathJax(includeMarkdown("Descriptions/JamesStein.Rmd")),
"2" = withMathJax(includeMarkdown("Descriptions/HierarchicalBayes.Rmd"))
)
})
}
# run shiny app
shinyApp(ui = ui, server = server)