options(digits = 17)
ctrl <- glm.control(epsilon = 1e-14, maxit = 10000)
set.seed(99)
n <- 30
x1 <- rnorm(n)
x2 <- rnorm(n)
x3 <- 0.5 * x1 + rnorm(n, sd = 0.5)
eta_true <- 0.5 + 0.8 * x1 - 1.2 * x2 + 0.3 * x3
p_true <- 1 / (1 + exp(-eta_true))
y_bool <- as.integer(rbinom(n, 1, p_true))
y_float <- pmax(0.0001, pmin(0.9999, rbeta(n, p_true, 1. - p_true)))
var_wt <- runif(n, 0.5, 2.0)
freq_wt <- sample(1:3, n, replace = TRUE)
write.csv(
data.frame(y_bool, y_float, x1, x2, x3, var_wt, freq_wt),
file = "../data/logistic.csv", row.names = FALSE
)
expand_by_freq <- function(df, freq_col) {
idx <- rep(seq_len(nrow(df)), df[[freq_col]])
list(data = df[idx, ], orig_idx = cumsum(df[[freq_col]]) - df[[freq_col]] + 1)
}
df <- data.frame(y_bool, y_float, x1, x2, x3, var_wt, freq_wt)
exp_result <- expand_by_freq(df, "freq_wt")
df_exp <- exp_result$data
orig_idx <- exp_result$orig_idx
x_test <- data.frame(x1 = c(0.5, -1.0, 2.0), x2 = c(0.3, -0.5, 1.2), x3 = c(0.1, 0.8, -0.3))
off_test <- 0.5 * x_test$x1 x_test_sub <- x_test[, c("x2", "x3")]
export_scenario <- function(model, dir_name, orig_idx = NULL, model_no_int = NULL,
pred_data = NULL, pred_off = NULL) {
dir.create(dir_name, showWarnings = FALSE, recursive = TRUE)
ms <- summary(model)
sub <- function(x) {
if (!is.null(orig_idx)) x[orig_idx] else x
}
sub_mat <- function(x) {
if (!is.null(orig_idx)) x[orig_idx, , drop = FALSE] else x
}
write(model$coefficients, file.path(dir_name, "coefficients.csv"), ncolumns = 1)
write(ms$cov.scaled, file.path(dir_name, "covariance.csv"), ncolumns = 1)
write(ms$dispersion, file.path(dir_name, "dispersion.csv"), ncolumns = 1)
write(model$deviance, file.path(dir_name, "deviance.csv"), ncolumns = 1)
write(model$null.deviance,file.path(dir_name, "null_deviance.csv"), ncolumns = 1)
write(model$aic, file.path(dir_name, "aic.csv"), ncolumns = 1)
write(BIC(model), file.path(dir_name, "bic.csv"), ncolumns = 1)
write(sub(residuals(model, type = "response")), file.path(dir_name, "resid_resp.csv"), ncolumns = 1)
write(sub(residuals(model, type = "pearson")), file.path(dir_name, "resid_pear.csv"), ncolumns = 1)
write(sub(residuals(model, type = "deviance")), file.path(dir_name, "resid_dev.csv"), ncolumns = 1)
write(sub(residuals(model, type = "working")), file.path(dir_name, "resid_work.csv"), ncolumns = 1)
write(t(sub_mat(residuals(model, type = "partial"))),
file.path(dir_name, "resid_partial.csv"), ncolumns = 1)
write(sub(rstandard(model, type = "pearson")), file.path(dir_name, "resid_pear_std.csv"), ncolumns = 1)
write(sub(rstandard(model, type = "deviance")), file.path(dir_name, "resid_dev_std.csv"), ncolumns = 1)
write(sub(rstudent(model)), file.path(dir_name, "resid_student.csv"), ncolumns = 1)
infl <- influence(model, do.coef = TRUE)
write(sub(infl$hat), file.path(dir_name, "leverage.csv"), ncolumns = 1)
write(t(sub_mat(infl$coefficients)), file.path(dir_name, "loo_coef.csv"), ncolumns = 1)
write(sub(cooks.distance(model)), file.path(dir_name, "cooks.csv"), ncolumns = 1)
write(ms$coefficients[, "z value"], file.path(dir_name, "wald_z.csv"), ncolumns = 1)
lr_df <- model$df.null - model$df.residual
pvalue_lr <- pchisq(model$null.deviance - model$deviance, df = lr_df, lower.tail = FALSE)
write(pvalue_lr, file.path(dir_name, "pvalue_lr_test.csv"), ncolumns = 1)
write(ms$coefficients[, "Pr(>|z|)"], file.path(dir_name, "pvalue_wald.csv"), ncolumns = 1)
d1 <- drop1(model, test = "Chisq")
pred_p <- d1[["Pr(>Chi)"]][-1] if (!is.null(model_no_int)) {
intercept_p <- anova(model_no_int, model, test = "Chisq")[["Pr(>Chi)"]][2]
write(c(intercept_p, pred_p), file.path(dir_name, "pvalue_exact.csv"), ncolumns = 1)
} else {
write(pred_p, file.path(dir_name, "pvalue_exact.csv"), ncolumns = 1)
}
if (!is.null(pred_data)) {
if (!is.null(pred_off)) {
new_lp <- as.vector(as.matrix(pred_data) %*% model$coefficients + pred_off)
new_pred <- model$family$linkinv(new_lp)
} else {
new_pred <- predict(model, newdata = pred_data, type = "response")
}
write(new_pred, file.path(dir_name, "predict_resp.csv"), ncolumns = 1)
}
cat("Exported scenario:", dir_name, "\n")
}
m_bool_none <- glm(y_bool ~ x1 + x2 + x3, data = df, family = binomial(), control = ctrl)
m_bool_none_noint <- glm(y_bool ~ x1 + x2 + x3 - 1, data = df, family = binomial(), control = ctrl)
export_scenario(m_bool_none, "logistic_results/bool_none",
model_no_int = m_bool_none_noint, pred_data = x_test)
m_bool_var <- glm(y_bool ~ x1 + x2 + x3, data = df, family = binomial(), weights = var_wt, control = ctrl)
m_bool_var_noint <- glm(y_bool ~ x1 + x2 + x3 - 1, data = df, family = binomial(), weights = var_wt, control = ctrl)
export_scenario(m_bool_var, "logistic_results/bool_var",
model_no_int = m_bool_var_noint, pred_data = x_test)
m_bool_freq <- glm(y_bool ~ x1 + x2 + x3, data = df_exp, family = binomial(), control = ctrl)
m_bool_freq_noint <- glm(y_bool ~ x1 + x2 + x3 - 1, data = df_exp, family = binomial(), control = ctrl)
export_scenario(m_bool_freq, "logistic_results/bool_freq", orig_idx,
model_no_int = m_bool_freq_noint, pred_data = x_test)
m_bool_both <- glm(y_bool ~ x1 + x2 + x3, data = df_exp, family = binomial(), weights = var_wt, control = ctrl)
m_bool_both_noint <- glm(y_bool ~ x1 + x2 + x3 - 1, data = df_exp, family = binomial(), weights = var_wt, control = ctrl)
export_scenario(m_bool_both, "logistic_results/bool_both", orig_idx,
model_no_int = m_bool_both_noint, pred_data = x_test)
off <- 0.5 * x1
m_bool_off <- glm(y_bool ~ x2 + x3 - 1, offset = off, data = df, family = binomial(), control = ctrl)
export_scenario(m_bool_off, "logistic_results/bool_off",
pred_data = x_test_sub, pred_off = off_test)
m_float_none <- glm(y_float ~ x1 + x2 + x3, data = df, family = binomial(), control = ctrl)
m_float_none_noint <- glm(y_float ~ x1 + x2 + x3 - 1, data = df, family = binomial(), control = ctrl)
export_scenario(m_float_none, "logistic_results/float_none",
model_no_int = m_float_none_noint, pred_data = x_test)
m_float_var <- glm(y_float ~ x1 + x2 + x3, data = df, family = binomial(), weights = var_wt, control = ctrl)
m_float_var_noint <- glm(y_float ~ x1 + x2 + x3 - 1, data = df, family = binomial(), weights = var_wt, control = ctrl)
export_scenario(m_float_var, "logistic_results/float_var",
model_no_int = m_float_var_noint, pred_data = x_test)
cat("Done. All logistic scenarios exported.\n")