############################################################################################ # Question III.2. Probabilités empiriques cat("\014"); rm(list=ls()); options(max.print=5000) load("vincennes.RData") head(pmu) # 01/01/2016 - 31/12/2019 min(pmu$cote) # La cote est le "rapport simple gagnant" (cad le gain brut) ntot <- aggregate(place ~ cote, pmu, length) nfirst <- aggregate(place ~ cote, pmu[pmu$place=="1er",], length) empirical_prob <- merge(nfirst, ntot, by=c("cote")) rm(ntot, nfirst) empirical_prob$empirical_prob <- empirical_prob$place.x / empirical_prob$place.y empirical_prob <- empirical_prob[ empirical_prob$place.y >= 100, ] # keep only cotes that are observed at least 100 times plot(empirical_prob$cote, empirical_prob$empirical_prob, type="p", pch=18) summary(empirical_prob$cote) quantile(empirical_prob$cote, 0.9) plot(empirical_prob$cote, empirical_prob$empirical_prob, type="p", pch=18, xlim=c(0, quantile(empirical_prob$cote, 0.9))) ############################################################################################ # Question III.3. Gain espéré empirical_prob$E_gain <- empirical_prob$empirical_prob * empirical_prob$cote - 1 head(empirical_prob) plot(empirical_prob$cote, empirical_prob$E_gain, type="p", pch=18, col="red") abline(h=0, lty="dotted") plot(empirical_prob$cote, empirical_prob$E_gain, type="p", pch=18, col="red", xlim=c(0, quantile(empirical_prob$cote, 0.9))) abline(h=0, lty="dotted") summary(empirical_prob$E_gain) quantile(empirical_prob$E_gain, seq(0, 0.25, by=0.01)) ############################################################################################ # III.4 Représentation de la vraisemblance prem <- unique(subset(pmu[pmu$place=="1er", ], select=c(course, date, cote))) names(prem) <- c("course", "date", "cote_premier") pmulik <- subset(pmu, select=c(course, date, place, cote)) pmulik <- merge(pmulik, prem, by=c("course", "date"), all.x=TRUE) rm(pmu, prem) ll <- function( atheta ) { temp <- pmulik if ( atheta == 0 ) { atheta <- 1e-4 } temp$ll_element <- ( 1 - exp( - atheta * pmulik$cote_premier ) ) / ( 1 - exp( - atheta * pmulik$cote ) ) temp <- as.vector(aggregate( ll_element ~ course + date, temp, sum, na.action = na.omit )[,3]) - sum( log ( 1 / temp ), na.rm=TRUE ) # - la vraisemblance (algorithme optimisation R = minimisation par convention) } xx <- seq(-0.02, 0.05, 0.005); yy <- lapply(xx, ll); plot(xx, yy, type="p", xlab="a * theta", ylab="- vraisemblance") ############################################################ # III.5 et 6. Maximum de vraisemblance : fonction optim cat("\014"); rm(list=ls()); options(max.print=5000) load("vincennes.RData") prem <- unique(subset(pmu[pmu$place=="1er", ], select=c(course, date, cote))) names(prem) <- c("course", "date", "cote_premier") pmulik <- subset(pmu, select=c(course, date, place, cote)) pmulik <- merge(pmulik, prem, by=c("course", "date"), all.x=TRUE) rm(pmu, prem) pmu_loglik <- function( b ) { atheta <- b[1] temp <- pmulik if ( atheta == 0 ) { atheta <- 1e-4 } temp$pmu_loglik_element <- ( 1 - exp( - atheta * pmulik$cote_premier ) ) / ( 1 - exp( - atheta * pmulik$cote ) ) temp <- as.vector(aggregate( pmu_loglik_element ~ course + date, temp, sum, na.action = na.omit )[,3]) - sum( log ( 1 / temp ), na.rm=TRUE ) # add minus sign to minimize } opt <- optim(par=runif(1, min=-1e-2, max=1e-2), fn=pmu_loglik) opt$convergence; opt$par; opt$value opt <- optim(par=runif(1, min=-1e-2, max=1e-2), fn=pmu_loglik, gr = NULL, method = c("Brent"), lower=c(-2e-2), upper=c(2e-2), control=list(maxit=20000)) opt$convergence; opt$par; opt$value ###################################################################################### # Maximum de vraisemblance : bbmle cat("\014"); rm(list=ls()); options(max.print=5000) load("vincennes.RData") prem <- unique(subset(pmu[pmu$place=="1er", ], select=c(course, date, cote))) names(prem) <- c("course", "date", "cote_premier") pmulik <- subset(pmu, select=c(course, date, place, cote)) pmulik <- merge(pmulik, prem, by=c("course", "date"), all.x=TRUE) rm(pmu, prem) pmu_loglik <- function( atheta ) { temp <- pmulik if ( atheta == 0 ) { atheta <- 1e-4 } temp$ll_element <- ( 1 - exp( - atheta * pmulik$cote_premier ) ) / ( 1 - exp( - atheta * pmulik$cote ) ) temp <- as.vector(aggregate( ll_element ~ course + date, temp, sum, na.action = na.omit )[,3]) - sum( log ( 1 / temp ), na.rm=TRUE ) } require(bbmle) rr <- mle2(pmu_loglik, start = list(atheta = 1e4), optimizer = "optim", method="Brent", lower=c(-2e-2), upper=c(2e-2)) summary(rr) confint(profile(rr))