ndarray-glm 0.1.0

#!/usr/bin/env Rscript

# This script uses glmnet:
# https://glmnet.stanford.edu/articles/glmnet.html
library(glmnet)

infile <- "../data/iris.csv"
data <- read.csv(infile, header = TRUE)
x_data <- data[
  ,
  c("sepal_length", "sepal_width", "petal_length", "petal_width")
]
# This class is seperable, so this will test regularization
y_data <- data["class"] == "setosa"
# y_data <- data["class"] == "versicolor"
# The glmnet package divides the squared errors by N, so to match our
# convention we need to scale their lambda too.
l2 <- 1e-2 / length(y_data)

# Fit the model with internal standardization
model <- glmnet(
  x_data, y_data,
  standardize = TRUE,
  # the ridge penalty
  alpha = 0,
  # With this dataset, a large lambda zeros out all coefficients
  lambda = l2,
  # the tolerance has to be much smaller to make this result more precise.
  thresh = 1e-10,
  family = "binomial",
)
print(model)
beta <- coef(model)
print(beta)
beta <- beta[, "s0"]
# There are convenience functions to read array from single-column files
write(beta, file = "log_regularization/iris_setosa_l2_1e-2.csv", sep = "\n")

# Then re-do the fit without standardization
model <- glmnet(
  x_data, y_data,
  standardize = FALSE,
  # the ridge penalty
  alpha = 0,
  # With this dataset, a large lambda zeros out all coefficients
  lambda = l2,
  # the tolerance has to be much smaller to make this result more precise.
  thresh = 1e-10,
  family = "binomial",
)
print(model)
beta <- coef(model)
print(beta)
beta <- beta[, "s0"]
# There are convenience functions to read array from single-column files
write(beta, file = "log_regularization/iris_setosa_l2_1e-2_nostd.csv", sep = "\n")