## R-code for lecture 9: 8/5-19. AL # create a data set n <- 500 set.seed(3) exdata <- data.frame(age = round(100 * runif(n, 0, 1)), errors = round(100 * runif(n, 0, 0.5))) beta.true <- c(0, 0.04, -0.2) exdata\$xbtrue <- beta.true[1] + beta.true[2] * exdata\$age + beta.true[3] * exdata\$errors exdata\$ptrue <- exp(exdata\$xbtrue) / (1 + exp(exdata\$xbtrue)) exdata\$win <- rbinom(n, 1, exdata\$ptrue) model <- glm(win ~ age + errors, data = exdata, family = "binomial") summary(model) confint(model) exdata\$xb <- predict(model) exdata\$phat <- predict(model, type = "response") with(exdata, plot(win ~ age, main = "Observations and predictions")) with(exdata, lines(ksmooth(age, win, bandwidth = 30))) with(exdata, points(age, phat, col = "red")) with(exdata, plot(win ~ errors, main = "Observations and predictions")) with(exdata, lines(ksmooth(errors, win, bandwidth = 20))) with(exdata, points(errors, phat, col = "red")) with(exdata, plot3D::scatter3D(age, errors, win, xlab = "age", ylab = "errors", zlab = "win")) #Influence#### exdata\$v <- influence(model)\$hat with(exdata, plot(v ~ age)) with(exdata, plot(v ~ errors)) with(exdata, plot(v ~ xb)) #Standardized residuale#### exdata\$r <- influence(model)\$pear.res / sqrt(1 - influence(model)\$hat) summary(exdata\$r) qqnorm(exdata\$r) qqline(exdata\$r) with(exdata, plot(r ~ xb, ylim = c(-10, 10))) abline(h = c(-2, 0, 2), col = "red", lty = 3) with(exdata, plot(r^2 ~ xb, ylim = c(0, 10^2))) abline(h = c(0, 2^2), col = "red", lty = 3) #Standardized deviance residuals#### exdata\$ds <- influence(model)\$dev.res / sqrt(1 - influence(model)\$hat) summary(exdata\$ds) with(exdata, plot(ds ~ xb, ylim = c(-3, 3), ylab = "standardized deviance residuals")) abline(h = c(-2, 0, 2), col = "red", lty = 3) with(exdata, plot(ds ~ age, ylim = c(-3, 3), ylab = "standardized deviance residuals")) abline(h = c(-2, 0, 2), col = "red", lty = 3) with(exdata, plot(ds ~ errors, ylim = c(-3, 3), ylab = "standardized deviance residuals")) abline(h = c(-2, 0, 2), col = "red", lty = 3) #Cook's distance#### exdata\$cook <- cooks.distance(model) summary(exdata\$cook) with(exdata, plot(cook ~ xb, ylim = c(0, .1), main = "Cook's distance")) abline(h = c(1, 4 / n), col = "red", lty = 3) #DFBETAS#### dfb <- dfbetas(model) summary(dfb) with(exdata, plot(dfb[, "(Intercept)"] ~ xb, ylim = c(-1, 1), main = "DFbeta0")) abline(h = c(-1, -2/sqrt(n), 0, 1, 2 / sqrt(n)), col = "red", lty = 3) with(exdata, plot(dfb[, "age"] ~ xb, ylim = c(-1, 1), main = "DFbeta1")) abline(h = c(-1, -2/sqrt(n), 0, 1, 2 / sqrt(n)), col = "red", lty = 3) with(exdata, plot(dfb[, "errors"] ~ xb, ylim = c(-1, 1), main = "DFbeta2")) abline(h = c(-1, -2/sqrt(n), 0, 1, 2 / sqrt(n)), col = "red", lty = 3) ###### Goodness of fit summary(model) (model.errors <- update(model, . ~ . -age)) (model.age <- update(model, . ~ . -errors)) # predict success if phat > 0.5 which is the same as xbeta > 0: exdata\$yhat <- exdata\$xb > 0 (tabell <- table(exdata\$win, exdata\$yhat)) (ptable <- prop.table(tabell, margin = 1)) # sensitivity = 74%, specificity = 95% # model with only errors: exdata\$yhat.errors <- predict(model.errors) > 0 (tabell.errors <- table(exdata\$win, exdata\$yhat.errors)) (ptabell.errors <- prop.table(tabell.errors, margin = 1)) # sens = 62%, spec = 92% # model with only age: exdata\$yhat.age <- predict(model.age) > 0 (tabell.age <- table(exdata\$win, exdata\$yhat.age)) mean(exdata\$win) # sens = 0%, spec = 100% #########################################