options(digits = 17)
ctrl <- glm.control(epsilon = 1e-14, maxit = 10000)
set.seed(7)
n <- 20
x1 <- rnorm(n)
x2 <- rnorm(n)
eta_true <- 0.3 + 0.8 * x1 - 0.5 * x2
mu_true <- exp(eta_true)
var_wt <- runif(n, 0.5, 2.0)
freq_wt <- sample(1:3, n, replace = TRUE)
y_exp <- rgamma(n, shape = 1, rate = 1 / mu_true)
y_gamma <- rgamma(n, shape = 2, rate = 2 / mu_true)
rinvgauss_sim <- function(n, mu, phi) {
lambda <- 1 / phi
v <- rnorm(n)^2
x <- mu + mu^2 * v / (2 * lambda) -
mu / (2 * lambda) * sqrt(4 * mu * lambda * v + mu^2 * v^2)
z <- runif(n)
ifelse(z <= mu / (mu + x), x, mu^2 / x)
}
y_ig <- rinvgauss_sim(n, mu_true, phi = 0.3)
write.csv(
data.frame(y_exp, y_gamma, y_ig, x1, x2, var_wt, freq_wt),
file = "../data/responses.csv", row.names = FALSE
)
expand_by_freq <- function(df, freq_col) {
idx <- rep(seq_len(nrow(df)), df[[freq_col]])
list(data = df[idx, ], orig_idx = cumsum(df[[freq_col]]) - df[[freq_col]] + 1)
}
df <- data.frame(y_exp, y_gamma, y_ig, x1, x2, var_wt, freq_wt)
exp_res <- expand_by_freq(df, "freq_wt")
df_exp <- exp_res$data
orig_idx <- exp_res$orig_idx
x_test <- data.frame(x1 = c(0.5, -1.0, 2.0), x2 = c(0.3, -0.5, 1.2))
export_responses_scenario <- function(model, dir_name, orig_idx = NULL,
export_partial = TRUE, pred_data = NULL) {
dir.create(dir_name, showWarnings = FALSE, recursive = TRUE)
sub <- function(x) if (!is.null(orig_idx)) x[orig_idx] else x
sub_mat <- function(x) if (!is.null(orig_idx)) x[orig_idx, , drop = FALSE] else x
write(model$coefficients, file.path(dir_name, "coefficients.csv"), ncolumns = 1)
write(model$deviance, file.path(dir_name, "deviance.csv"), ncolumns = 1)
write(model$null.deviance, file.path(dir_name, "null_deviance.csv"), ncolumns = 1)
write(sub(residuals(model, type = "response")), file.path(dir_name, "resid_resp.csv"), ncolumns = 1)
write(sub(residuals(model, type = "working")), file.path(dir_name, "resid_work.csv"), ncolumns = 1)
if (is.null(orig_idx)) {
write(residuals(model, type = "pearson"), file.path(dir_name, "resid_pear.csv"), ncolumns = 1)
write(residuals(model, type = "deviance"), file.path(dir_name, "resid_dev.csv"), ncolumns = 1)
infl <- influence(model, do.coef = FALSE)
write(infl$hat, file.path(dir_name, "leverage.csv"), ncolumns = 1)
}
if (export_partial) {
write(t(sub_mat(residuals(model, type = "partial"))),
file.path(dir_name, "resid_partial.csv"), ncolumns = 1)
}
lr_df <- model$df.null - model$df.residual
pvalue_lr <- pchisq(model$null.deviance - model$deviance, df = lr_df, lower.tail = FALSE)
write(pvalue_lr, file.path(dir_name, "pvalue_lr_test.csv"), ncolumns = 1)
if (!is.null(pred_data)) {
new_pred <- predict(model, newdata = pred_data, type = "response")
write(new_pred, file.path(dir_name, "predict_resp.csv"), ncolumns = 1)
}
cat("Exported scenario:", dir_name, "\n")
}
m_exp_none <- glm(y_exp ~ x1 + x2, family = Gamma(link = "log"), control = ctrl)
export_responses_scenario(m_exp_none, "responses_results/exp_none",
export_partial = TRUE, pred_data = x_test)
m_gamma_none <- glm(y_gamma ~ x1 + x2, family = Gamma(link = "log"), control = ctrl)
export_responses_scenario(m_gamma_none, "responses_results/gamma_none",
export_partial = TRUE, pred_data = x_test)
m_gamma_var <- glm(y_gamma ~ x1 + x2, family = Gamma(link = "log"),
weights = var_wt, control = ctrl)
export_responses_scenario(m_gamma_var, "responses_results/gamma_var",
export_partial = FALSE, pred_data = x_test)
m_gamma_freq <- glm(y_gamma ~ x1 + x2, data = df_exp,
family = Gamma(link = "log"), control = ctrl)
export_responses_scenario(m_gamma_freq, "responses_results/gamma_freq", orig_idx,
export_partial = TRUE, pred_data = x_test)
m_ig_none <- glm(y_ig ~ x1 + x2, family = inverse.gaussian(link = "log"), control = ctrl)
export_responses_scenario(m_ig_none, "responses_results/ig_none",
export_partial = TRUE, pred_data = x_test)
m_ig_var <- glm(y_ig ~ x1 + x2, family = inverse.gaussian(link = "log"),
weights = var_wt, control = ctrl)
export_responses_scenario(m_ig_var, "responses_results/ig_var",
export_partial = FALSE, pred_data = x_test)
cat("Done. All response scenarios exported.\n")