options(digits = 17)
set.seed(42)
n <- 25
x1 <- rnorm(n)
x2 <- rnorm(n)
x3 <- 0.5 * x1 + rnorm(n, sd = 0.5)
y <- 1.5 + 0.8 * x1 - 1.2 * x2 + 0.3 * x3 + rnorm(n, sd = 0.5)
var_wt <- runif(n, 0.5, 2.0)
freq_wt <- sample(1:3, n, replace = TRUE)
write.csv(data.frame(y, x1, x2, x3, var_wt, freq_wt),
file = "../data/linear.csv", row.names = FALSE)
export_scenario <- function(model, dir_name, orig_idx = NULL, model_no_int = NULL,
pred_data = NULL, pred_off = NULL) {
dir.create(dir_name, showWarnings = FALSE, recursive = TRUE)
ms <- summary(model)
sub <- function(x) {
if (!is.null(orig_idx)) x[orig_idx] else x
}
sub_mat <- function(x) {
if (!is.null(orig_idx)) x[orig_idx, , drop = FALSE] else x
}
write(model$coefficients, file.path(dir_name, "coefficients.csv"), ncolumns = 1)
write(ms$cov.scaled, file.path(dir_name, "covariance.csv"), ncolumns = 1)
write(ms$dispersion, file.path(dir_name, "dispersion.csv"), ncolumns = 1)
write(model$deviance, file.path(dir_name, "deviance.csv"), ncolumns = 1)
write(model$null.deviance, file.path(dir_name, "null_deviance.csv"), ncolumns = 1)
write(1 - model$deviance / model$null.deviance, file.path(dir_name, "r_sq.csv"), ncolumns = 1)
wts_all <- if (!is.null(model$prior.weights)) model$prior.weights else rep(1, length(fitted(model)))
write(sum(wts_all * residuals(model, type = "response")^2), file.path(dir_name, "rss.csv"), ncolumns = 1)
write(model$aic, file.path(dir_name, "aic.csv"), ncolumns = 1)
write(BIC(model), file.path(dir_name, "bic.csv"), ncolumns = 1)
write(sub(residuals(model, type = "response")), file.path(dir_name, "resid_resp.csv"), ncolumns = 1)
write(sub(residuals(model, type = "pearson")), file.path(dir_name, "resid_pear.csv"), ncolumns = 1)
write(sub(residuals(model, type = "deviance")), file.path(dir_name, "resid_dev.csv"), ncolumns = 1)
write(sub(residuals(model, type = "working")), file.path(dir_name, "resid_work.csv"), ncolumns = 1)
write(t(sub_mat(residuals(model, type = "partial"))), file.path(dir_name, "resid_partial.csv"), ncolumns = 1)
write(sub(rstandard(model, type = "pearson")), file.path(dir_name, "resid_pear_std.csv"), ncolumns = 1)
write(sub(rstandard(model, type = "deviance")), file.path(dir_name, "resid_dev_std.csv"), ncolumns = 1)
write(sub(rstudent(model)), file.path(dir_name, "resid_student.csv"), ncolumns = 1)
infl <- influence(model, do.coef = TRUE)
write(sub(infl$hat), file.path(dir_name, "leverage.csv"), ncolumns = 1)
write(t(sub_mat(infl$coefficients)), file.path(dir_name, "loo_coef.csv"), ncolumns = 1)
write(sub(cooks.distance(model)), file.path(dir_name, "cooks.csv"), ncolumns = 1)
write(ms$coefficients[, "t value"], file.path(dir_name, "wald_z.csv"), ncolumns = 1)
lr_df <- model$df.null - model$df.residual
pvalue_lr <- pchisq(model$null.deviance - model$deviance, df = lr_df, lower.tail = FALSE)
write(pvalue_lr, file.path(dir_name, "pvalue_lr_test.csv"), ncolumns = 1)
write(ms$coefficients[, "Pr(>|t|)"], file.path(dir_name, "pvalue_wald.csv"), ncolumns = 1)
d1 <- drop1(model, test = "F")
pred_p <- d1[["Pr(>F)"]][-1] if (!is.null(model_no_int)) {
intercept_p <- anova(model_no_int, model, test = "F")[["Pr(>F)"]][2]
write(c(intercept_p, pred_p), file.path(dir_name, "pvalue_exact.csv"), ncolumns = 1)
} else {
write(pred_p, file.path(dir_name, "pvalue_exact.csv"), ncolumns = 1)
}
if (!is.null(pred_data)) {
if (!is.null(pred_off)) {
new_lp <- as.vector(as.matrix(pred_data) %*% model$coefficients + pred_off)
new_pred <- model$family$linkinv(new_lp)
} else {
new_pred <- predict(model, newdata = pred_data, type = "response")
}
write(new_pred, file.path(dir_name, "predict_resp.csv"), ncolumns = 1)
}
cat("Exported scenario:", dir_name, "\n")
}
x_test <- data.frame(x1 = c(0.5, -1.0, 2.0), x2 = c(0.3, -0.5, 1.2), x3 = c(0.1, 0.8, -0.3))
off_test <- 0.3 * x_test$x1 x_test_sub <- x_test[, c("x2", "x3")]
m_none <- glm(y ~ x1 + x2 + x3, family = gaussian())
m_none_noint <- glm(y ~ x1 + x2 + x3 - 1, family = gaussian())
export_scenario(m_none, "linear_results/none", model_no_int = m_none_noint, pred_data = x_test)
m_var <- glm(y ~ x1 + x2 + x3, family = gaussian(), weights = var_wt)
m_var_noint <- glm(y ~ x1 + x2 + x3 - 1, family = gaussian(), weights = var_wt)
export_scenario(m_var, "linear_results/var", model_no_int = m_var_noint, pred_data = x_test)
expand_by_freq <- function(df, freq_col) {
idx <- rep(seq_len(nrow(df)), df[[freq_col]])
list(data = df[idx, ], orig_idx = cumsum(df[[freq_col]]) - df[[freq_col]] + 1)
}
df <- data.frame(y, x1, x2, x3, var_wt, freq_wt)
exp_result <- expand_by_freq(df, "freq_wt")
df_exp <- exp_result$data
orig_idx <- exp_result$orig_idx
m_freq <- glm(y ~ x1 + x2 + x3, data = df_exp, family = gaussian())
m_freq_noint <- glm(y ~ x1 + x2 + x3 - 1, data = df_exp, family = gaussian())
export_scenario(m_freq, "linear_results/freq", orig_idx, model_no_int = m_freq_noint, pred_data = x_test)
m_both <- glm(y ~ x1 + x2 + x3, data = df_exp, family = gaussian(), weights = var_wt)
m_both_noint <- glm(y ~ x1 + x2 + x3 - 1, data = df_exp, family = gaussian(), weights = var_wt)
export_scenario(m_both, "linear_results/both", orig_idx, model_no_int = m_both_noint, pred_data = x_test)
off <- 0.3 * x1
m_off_none <- glm(y ~ x2 + x3 - 1, offset = off, family = gaussian())
export_scenario(m_off_none, "linear_results/off_none",
pred_data = x_test_sub, pred_off = off_test)
m_off_var <- glm(y ~ x2 + x3 - 1, offset = off, family = gaussian(), weights = var_wt)
export_scenario(m_off_var, "linear_results/off_var",
pred_data = x_test_sub, pred_off = off_test)
off_exp <- 0.3 * df_exp$x1
m_off_freq <- glm(y ~ x2 + x3 - 1, offset = off_exp, data = df_exp, family = gaussian())
export_scenario(m_off_freq, "linear_results/off_freq", orig_idx,
pred_data = x_test_sub, pred_off = off_test)
m_off_both <- glm(y ~ x2 + x3 - 1, offset = off_exp, data = df_exp, family = gaussian(), weights = var_wt)
export_scenario(m_off_both, "linear_results/off_both", orig_idx,
pred_data = x_test_sub, pred_off = off_test)
library(glmnet)
x_mat <- as.matrix(data.frame(x1, x2, x3))
l2_param <- 0.1
lambda_glmnet <- 2 * l2_param / n
dir.create("linear_results/ridge_none", showWarnings = FALSE, recursive = TRUE)
m_ridge <- glmnet(x_mat, y, alpha = 0, lambda = lambda_glmnet, standardize = TRUE,
family = "gaussian", thresh = 1e-14)
write(as.vector(coef(m_ridge)), "linear_results/ridge_none/coefficients.csv", ncolumns = 1)
cat("Exported scenario: linear_results/ridge_none\n")
dir.create("linear_results/ridge_var", showWarnings = FALSE, recursive = TRUE)
m_ridge_var <- glmnet(x_mat, y, alpha = 0, lambda = lambda_glmnet, standardize = TRUE,
weights = var_wt / sum(var_wt) * n,
family = "gaussian", thresh = 1e-14)
write(as.vector(coef(m_ridge_var)), "linear_results/ridge_var/coefficients.csv", ncolumns = 1)
cat("Exported scenario: linear_results/ridge_var\n")
cat("Done. All scenarios exported.\n")