use std::io::Cursor;

use polars::{
    error::PolarsResult,
    frame::DataFrame,
    prelude::{CsvReader, DataType, SerReader},
};

/// # Ability and Intelligence Tests
///
/// ## Description:
///
/// Six tests were given to 112 individuals. The covariance matrix is
/// given in this object.
///
/// ## Usage:
///
/// ability.cov
///
/// ## Details:
///
/// The tests are described as
///
/// * general: a non-verbal measure of general intelligence using
/// Cattell's culture-fair test.
/// * picture: a picture-completion test
/// * blocks: block design
/// * maze: mazes
/// * reading: reading comprehension
/// * vocab: vocabulary
/// * Bartholomew gives both covariance and correlation matrices, but
/// these are inconsistent.  Neither are in the original paper.
///
/// ## Source:
///
/// Bartholomew, D. J. (1987).  _Latent Variable Analysis and Factor
/// Analysis_.  Griffin.
///
/// Bartholomew, D. J. and Knott, M. (1990).  _Latent Variable
/// Analysis and Factor Analysis_.  Second Edition, Arnold.
///
/// ## References:
///
/// Smith, G. A. and Stanley G. (1983).  Clocking g: relating
/// intelligence and measures of timed performance.  _Intelligence_,
/// *7*, 353-368.  doi:10.1016/0160-2896(83)90010-7
/// <https://doi.org/10.1016/0160-2896%2883%2990010-7>.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// (ability.FA <- factanal(factors = 1, covmat = ability.cov))
/// update(ability.FA, factors = 2)
/// ## The signs of factors and hence the signs of correlations are
/// ## arbitrary with promax rotation.
/// update(ability.FA, factors = 2, rotation = "promax")
/// ```
pub fn ability_cov() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
    let mut center = Vec::new();
    let mut n_obs = 0;
    CsvReader::new(Cursor::new(include_str!("ability.cov.center.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| center.push(d.unwrap() as usize));
    CsvReader::new(Cursor::new(include_str!("ability.cov.n.obs.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| n_obs = d.unwrap() as usize);
    Ok((
        CsvReader::new(Cursor::new(include_str!("ability.cov.cov.csv"))).finish()?,
        center,
        n_obs,
    ))
}

/// # Passenger Miles on Commercial US Airlines, 1937-1960
///
/// ## Description:
///
/// The revenue passenger miles flown by commercial airlines in the
/// United States for each year from 1937 to 1960.
///
/// ## Usage:
///
/// airmiles
///
/// ## Format:
///
/// A time series of 24 observations; yearly, 1937-1960.
///
/// ## Source:
///
/// F.A.A. Statistical Handbook of Aviation.
///
/// ## References:
///
/// Brown, R. G. (1963) _Smoothing, Forecasting and Prediction of
/// Discrete Time Series_.  Prentice-Hall.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(airmiles, main = "airmiles data",
///  xlab = "Passenger-miles flown by U.S. commercial airlines", col = 4)
/// ```
pub fn airmiles() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("airmiles.csv"))).finish()
}

/// # Monthly Airline Passenger Numbers 1949-1960
///
/// ## Description:
///
/// The classic Box & Jenkins airline data.  Monthly totals of
/// international airline passengers, 1949 to 1960.
///
/// ## Usage:
///
/// AirPassengers
///
/// ## Format:
///
/// A monthly time series, in thousands.
///
/// ## Source:
///
/// Box, G. E. P., Jenkins, G. M. and Reinsel, G. C. (1976) _Time
/// Series Analysis, Forecasting and Control._ Third Edition.
/// Holden-Day. Series G.
///
/// ## Examples:
///
/// ```r
/// ## Not run:
///
/// ## These are quite slow and so not run by example(AirPassengers)
///
/// ## The classic 'airline model', by full ML
/// (fit <- arima(log10(AirPassengers), c(0, 1, 1),
///  seasonal = list(order = c(0, 1, 1), period = 12)))
/// update(fit, method = "CSS")
/// update(fit, x = window(log10(AirPassengers), start = 1954))
/// pred <- predict(fit, n.ahead = 24)
/// tl <- pred$pred - 1.96 * pred$se
/// tu <- pred$pred + 1.96 * pred$se
/// ts.plot(AirPassengers, 10^tl, 10^tu, log = "y", lty = c(1, 2, 2))
///
/// ## full ML fit is the same if the series is reversed, CSS fit is not
/// ap0 <- rev(log10(AirPassengers))
/// attributes(ap0) <- attributes(AirPassengers)
/// arima(ap0, c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
/// arima(ap0, c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12),
/// method = "CSS")
///
/// ## Structural Time Series
/// ap <- log10(AirPassengers) - 2
/// (fit <- StructTS(ap, type = "BSM"))
/// par(mfrow = c(1, 2))
/// plot(cbind(ap, fitted(fit)), plot.type = "single")
/// plot(cbind(ap, tsSmooth(fit)), plot.type = "single")
/// ## End(Not run)
/// ```
pub fn air_passengers() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("AirPassengers.csv"))).finish()
}

/// # New York Air Quality Measurements
///
/// ## Description:
///
/// Daily air quality measurements in New York, May to September 1973.
///
/// ## Usage:
///
/// airquality
///
/// ## Format:
///
/// A data frame with 153 observations on 6 variables.
///
///  * ‘\[,1\]’  ‘Ozone’ numeric  Ozone (ppb)
///  * ‘\[,2\]’  ‘Solar.R’  numeric  Solar R (lang)
///  * ‘\[,3\]’  ‘Wind’  numeric  Wind (mph)
///  * ‘\[,4\]’  ‘Temp’  numeric  Temperature (degrees F)
///  * ‘\[,5\]’  ‘Month’ numeric  Month (1-12)
///  * ‘\[,6\]’  ‘Day’numeric  Day of month (1-31)
///
/// ## Details:
///
/// Daily readings of the following air quality values for May 1, 1973
/// (a Tuesday) to September 30, 1973.
///
/// * ‘Ozone’: Mean ozone in parts per billion from 1300 to 1500
/// hours at Roosevelt Island
/// * ‘Solar.R’: Solar radiation in Langleys in the frequency band
/// 4000-7700 Angstroms from 0800 to 1200 hours at Central Park
/// * ‘Wind’: Average wind speed in miles per hour at 0700 and 1000
/// hours at LaGuardia Airport
/// * ‘Temp’: Maximum daily temperature in degrees Fahrenheit at La
/// Guardia Airport.
///
/// ## Source:
///
/// The data were obtained from the New York State Department of
/// Conservation (ozone data) and the National Weather Service
/// (meteorological data).
///
/// ## References:
///
/// Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P. A.
/// (1983) _Graphical Methods for Data Analysis_.  Belmont, CA:
/// Wadsworth.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// pairs(airquality, panel = panel.smooth, main = "airquality data")
/// ```
pub fn air_quality() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("airquality.csv"))).finish()
}

/// # Anscombe's Quartet of 'Identical' Simple Linear Regressions
///
/// ## Description:
///
/// Four x-y datasets which have the same traditional statistical
/// properties (mean, variance, correlation, regression line, etc.),
/// yet are quite different.
///
/// ## Usage:
///
/// anscombe
///
/// ## Format:
///
/// A data frame with 11 observations on 8 variables.
///
/// * x1 == x2 == x3  the integers 4:14, specially arranged
/// * x4  values 8 and 19
/// * y1, y2, y3, y4  numbers in (3, 12.5) with mean 7.5 and sdev 2.03
///
/// ## Source:
///
/// Tufte, Edward R. (1989).  _The Visual Display of Quantitative
/// Information_, 13-14.  Graphics Press.
///
/// ## References:
///
/// Anscombe, Francis J. (1973).  Graphs in statistical analysis.
/// _The American Statistician_, *27*, 17-21.  doi:10.2307/2682899
/// <https://doi.org/10.2307/2682899>.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// summary(anscombe)
///
/// ##-- now some "magic" to do the 4 regressions in a loop:
/// ff <- y ~ x
/// mods <- setNames(as.list(1:4), paste0("lm", 1:4))
/// for(i in 1:4) {
///  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
///  ## orff[[2]] <- as.name(paste0("y", i))
///  ##ff[[3]] <- as.name(paste0("x", i))
///  mods[[i]] <- lmi <- lm(ff, data = anscombe)
///  print(anova(lmi))
/// }
///
/// ## See how close they are (numerically!)
/// sapply(mods, coef)
/// lapply(mods, function(fm) coef(summary(fm)))
///
/// ## Now, do what you should have done in the first place: PLOTS
/// op <- par(mfrow = c(2, 2), mar = 0.1+c(4,4,1,1), oma =  c(0, 0, 2, 0))
/// for(i in 1:4) {
///  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
///  plot(ff, data = anscombe, col = "red", pch = 21, bg = "orange", cex = 1.2,
/// xlim = c(3, 19), ylim = c(3, 13))
///  abline(mods[[i]], col = "blue")
/// }
/// mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)
/// par(op)
/// ```
pub fn anscombe() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("anscombe.csv"))).finish()
}

/// # The Joyner-Boore Attenuation Data
///
/// ## Description:
///
/// This data gives peak accelerations measured at various observation
/// stations for 23 earthquakes in California.  The data have been
/// used by various workers to estimate the attenuating affect of
/// distance on ground acceleration.
///
/// ## Usage:
///
/// attenu
///
/// ## Format:
///
/// A data frame with 182 observations on 5 variables.
///
/// * \[,1\]  event numeric  Event Number
/// * \[,2\]  magnumeric  Moment Magnitude
/// * \[,3\]  station  factorStation Number
/// * \[,4\]  dist  numeric  Station-hypocenter distance (km)
/// * \[,5\]  accel numeric  Peak acceleration (g)
///
/// ## Source:
///
/// Joyner, W.B., D.M. Boore and R.D. Porcella (1981).  Peak
/// horizontal acceleration and velocity from strong-motion records
/// including records from the 1979 Imperial Valley, California
/// earthquake.  USGS Open File report 81-365. Menlo Park, Ca.
///
/// ## References:
///
/// Boore, D. M. and Joyner, W. B.(1982).  The empirical prediction of
/// ground motion, _Bulletin of the Seismological Society of America_,
/// *72*, S269-S268.
///
/// Bolt, B. A. and Abrahamson, N. A. (1982).  New attenuation
/// relations for peak and expected accelerations of strong ground
/// motion.  _Bulletin of the Seismological Society of America_, *72*,
/// 2307-2321.
///
/// Bolt B. A. and Abrahamson, N. A. (1983).  Reply to W. B. Joyner &
/// D. M. Boore's “Comments on: New attenuation relations for peak and
/// expected accelerations for peak and expected accelerations of
/// strong ground motion”, _Bulletin of the Seismological Society of
/// America_, *73*, 1481-1483.
///
/// Brillinger, D. R. and Preisler, H. K. (1984).  An exploratory
/// analysis of the Joyner-Boore attenuation data, _Bulletin of the
/// Seismological Society of America_, *74*, 1441-1449.
///
/// Brillinger, D. R. and Preisler, H. K. (1984).  _Further analysis
/// of the Joyner-Boore attenuation data_.  Manuscript.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// ## check the data class of the variables
/// sapply(attenu, data.class)
/// summary(attenu)
/// pairs(attenu, main = "attenu data")
/// coplot(accel ~ dist | as.factor(event), data = attenu, show.given = FALSE)
/// coplot(log(accel) ~ log(dist) | as.factor(event),
///  data = attenu, panel = panel.smooth, show.given = FALSE)
/// ```
pub fn attenu() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("attenu.csv"))).finish()
}

/// # The Chatterjee-Price Attitude Data
///
/// ## Description:
///
/// From a survey of the clerical employees of a large financial
/// organization, the data are aggregated from the questionnaires of
/// the approximately 35 employees for each of 30 (randomly selected)
/// departments.  The numbers give the percent proportion of
/// favourable responses to seven questions in each department.
///
/// ## Usage:
///
/// attitude
///
/// ## Format:
///
/// A data frame with 30 observations on 7 variables. The first column
/// are the short names from the reference, the second one the
/// variable names in the data frame:
///
/// * Y  ratingnumeric  Overall rating
/// * X\[1\]  complaints  numeric  Handling of employee complaints
/// * X\[2\]  privileges  numeric  Does not allow special privileges
/// * X\[3\]  learning numeric  Opportunity to learn
/// * X\[4\]  raisesnumeric  Raises based on performance
/// * X\[5\]  critical numeric  Too critical
/// * X\[6\]  advance  numeric  Advancement
///
/// ## Source:
///
/// Chatterjee, S. and Price, B. (1977) _Regression Analysis by
/// Example_.  New York: Wiley.  (Section 3.7, p.68ff of 2nd
/// ed.(1991).)
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// pairs(attitude, main = "attitude data")
/// summary(attitude)
/// summary(fm1 <- lm(rating ~ ., data = attitude))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// summary(fm2 <- lm(rating ~ complaints, data = attitude))
/// plot(fm2)
/// par(opar)
/// ```
pub fn attitude() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("attitude.csv"))).finish()
}

/// # Quarterly Time Series of the Number of Australian Residents
///
/// ## Description:
///
/// Numbers (in thousands) of Australian residents measured quarterly
/// from March 1971 to March 1994.  The object is of class ‘"ts"’.
///
/// ## Usage:
///
/// austres
///
/// ## Source:
///
/// P. J. Brockwell and R. A. Davis (1996) _Introduction to Time
/// Series and Forecasting._ Springer
pub fn austres() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("austres.csv"))).finish()
}

/// # Body Temperature Series of Two Beavers
///
/// ## Description:
///
/// Reynolds (1994) describes a small part of a study of the long-term
/// temperature dynamics of beaver _Castor canadensis_ in
/// north-central Wisconsin.  Body temperature was measured by
/// telemetry every 10 minutes for four females, but data from a one
/// period of less than a day for each of two animals is used there.
///
/// ## Usage:
///
/// beaver1
/// beaver2
///
/// ## Format:
///
/// The ‘beaver1’ data frame has 114 rows and 4 columns on body
/// temperature measurements at 10 minute intervals.
///
/// The ‘beaver2’ data frame has 100 rows and 4 columns on body
/// temperature measurements at 10 minute intervals.
///
/// The variables are as follows:
///
/// * day Day of observation (in days since the beginning of 1990),
/// December 12-13 (‘beaver1’) and November 3-4 (‘beaver2’).
/// * time Time of observation, in the form ‘0330’ for 3:30am
/// * temp Measured body temperature in degrees Celsius.
/// * activ Indicator of activity outside the retreat.
///
/// ## Note:
///
/// The observation at 22:20 is missing in ‘beaver1’.
///
/// ## Source:
///
/// P. S. Reynolds (1994) Time-series analyses of beaver body
/// temperatures.  Chapter 11 of Lange, N., Ryan, L., Billard, L.,
/// Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994) _Case
/// Studies in Biometry._ New York: John Wiley and Sons.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// (yl <- range(beaver1$temp, beaver2$temp))
///
/// beaver.plot <- function(bdat, ...) {
///  nam <- deparse(substitute(bdat))
///  with(bdat, {
/// # Hours since start of day:
/// hours <- time %/% 100 + 24*(day - day[1]) + (time %% 100)/60
/// plot (hours, temp, type = "l", ...,
/// main = paste(nam, "body temperature"))
/// abline(h = 37.5, col = "gray", lty = 2)
/// is.act <- activ == 1
/// points(hours[is.act], temp[is.act], col = 2, cex = .8)
///  })
/// }
/// op <- par(mfrow = c(2, 1), mar = c(3, 3, 4, 2), mgp = 0.9 * 2:0)
/// beaver.plot(beaver1, ylim = yl)
/// beaver.plot(beaver2, ylim = yl)
/// par(op)
/// ```
pub fn beaver1() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("beaver1.csv"))).finish()
}

/// # Body Temperature Series of Two Beavers
///
/// ## Description:
///
/// Reynolds (1994) describes a small part of a study of the long-term
/// temperature dynamics of beaver _Castor canadensis_ in
/// north-central Wisconsin.  Body temperature was measured by
/// telemetry every 10 minutes for four females, but data from a one
/// period of less than a day for each of two animals is used there.
///
/// ## Usage:
///
/// beaver1
/// beaver2
///
/// ## Format:
///
/// The ‘beaver1’ data frame has 114 rows and 4 columns on body
/// temperature measurements at 10 minute intervals.
///
/// The ‘beaver2’ data frame has 100 rows and 4 columns on body
/// temperature measurements at 10 minute intervals.
///
/// The variables are as follows:
///
/// * day Day of observation (in days since the beginning of 1990),
/// December 12-13 (‘beaver1’) and November 3-4 (‘beaver2’).
/// * time Time of observation, in the form ‘0330’ for 3:30am
/// * temp Measured body temperature in degrees Celsius.
/// * activ Indicator of activity outside the retreat.
///
/// ## Note:
///
/// The observation at 22:20 is missing in ‘beaver1’.
///
/// ## Source:
///
/// P. S. Reynolds (1994) Time-series analyses of beaver body
/// temperatures.  Chapter 11 of Lange, N., Ryan, L., Billard, L.,
/// Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994) _Case
/// Studies in Biometry._ New York: John Wiley and Sons.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// (yl <- range(beaver1$temp, beaver2$temp))
///
/// beaver.plot <- function(bdat, ...) {
///  nam <- deparse(substitute(bdat))
///  with(bdat, {
/// # Hours since start of day:
/// hours <- time %/% 100 + 24*(day - day[1]) + (time %% 100)/60
/// plot (hours, temp, type = "l", ...,
/// main = paste(nam, "body temperature"))
/// abline(h = 37.5, col = "gray", lty = 2)
/// is.act <- activ == 1
/// points(hours[is.act], temp[is.act], col = 2, cex = .8)
///  })
/// }
/// op <- par(mfrow = c(2, 1), mar = c(3, 3, 4, 2), mgp = 0.9 * 2:0)
/// beaver.plot(beaver1, ylim = yl)
/// beaver.plot(beaver2, ylim = yl)
/// par(op)
/// ```
pub fn beaver2() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("beaver2.csv"))).finish()
}

/// # Sales Data with Leading Indicator
///
/// ## Description:
///
/// The sales time series ‘BJsales’ and leading indicator
/// ‘BJsales.lead’ each contain 150 observations.  The objects are of
/// class ‘"ts"’.
///
/// ## Usage:
///
/// BJsales
/// BJsales.lead
///
/// ## Source:
///
/// The data are given in Box & Jenkins (1976).  Obtained from the
/// Time Series Data Library at <https://robjhyndman.com/TSDL/>
///
/// ## References:
///
/// G. E. P. Box and G. M. Jenkins (1976): _Time Series Analysis,
/// Forecasting and Control_, Holden-Day, San Francisco, p. 537.
///
/// P. J. Brockwell and R. A. Davis (1991): _Time Series: Theory and
/// Methods_, Second edition, Springer Verlag, NY, pp. 414.
pub fn bjsales() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("BJsales.csv"))).finish()
}

/// # Sales Data with Leading Indicator
///
/// ## Description:
///
/// The sales time series ‘BJsales’ and leading indicator
/// ‘BJsales.lead’ each contain 150 observations.  The objects are of
/// class ‘"ts"’.
///
/// ## Usage:
///
/// BJsales
/// BJsales.lead
///
/// ## Source:
///
/// The data are given in Box & Jenkins (1976).  Obtained from the
/// Time Series Data Library at <https://robjhyndman.com/TSDL/>
///
/// ## References:
///
/// G. E. P. Box and G. M. Jenkins (1976): _Time Series Analysis,
/// Forecasting and Control_, Holden-Day, San Francisco, p. 537.
///
/// P. J. Brockwell and R. A. Davis (1991): _Time Series: Theory and
/// Methods_, Second edition, Springer Verlag, NY, pp. 414.
pub fn bjsales_lead() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("BJsales.lead.csv"))).finish()
}

/// # Biochemical Oxygen Demand
///
/// ## Description:
///
/// The ‘BOD’ data frame has 6 rows and 2 columns giving the
/// biochemical oxygen demand versus time in an evaluation of water
/// quality.
///
/// ## Usage:
///
/// BOD
///
/// ## Format:
///
/// This data frame contains the following columns:
///
/// * ‘Time’ A numeric vector giving the time of the measurement (days).
/// * ‘demand’ A numeric vector giving the biochemical oxygen demand
/// (mg/l).
///
/// ## Source:
///
/// Bates, D.M. and Watts, D.G. (1988), _Nonlinear Regression Analysis
/// and Its Applications_, Wiley, Appendix A1.4.
///
/// Originally from Marske (1967), _Biochemical Oxygen Demand Data
/// Interpretation Using Sum of Squares Surface_ M.Sc. Thesis,
/// University of Wisconsin - Madison.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// # simplest form of fitting a first-order model to these data
/// fm1 <- nls(demand ~ A*(1-exp(-exp(lrc)*Time)), data = BOD,
///   start = c(A = 20, lrc = log(.35)))
/// coef(fm1)
/// fm1
/// # using the plinear algorithm  (trace o/p differs by platform)
/// ## IGNORE_RDIFF_BEGIN
/// fm2 <- nls(demand ~ (1-exp(-exp(lrc)*Time)), data = BOD,
///   start = c(lrc = log(.35)), algorithm = "plinear", trace = TRUE)
/// ## IGNORE_RDIFF_END
/// # using a self-starting model
/// fm3 <- nls(demand ~ SSasympOrig(Time, A, lrc), data = BOD)
/// summary(fm3)
/// ```
pub fn bod() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("BOD.csv"))).finish()
}

/// # Speed and Stopping Distances of Cars
///
/// ## Description:
///
/// The data give the speed of cars and the distances taken to stop.
/// Note that the data were recorded in the 1920s.
///
/// ## Usage:
///
/// cars
///
/// ## Format:
///
/// A data frame with 50 observations on 2 variables.
///
/// * \[,1\]  speed  numeric  Speed (mph)
/// * \[,2\]  distnumeric  Stopping distance (ft)
///
/// ## Source:
///
/// Ezekiel, M. (1930) _Methods of Correlation Analysis_.  Wiley.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
///  las = 1)
/// lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
/// title(main = "cars data")
/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
///  las = 1, log = "xy")
/// title(main = "cars data (logarithmic scales)")
/// lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
/// summary(fm1 <- lm(log(dist) ~ log(speed), data = cars))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
///
/// ## An example of polynomial regression
/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
/// las = 1, xlim = c(0, 25))
/// d <- seq(0, 25, length.out = 200)
/// for(degree in 1:4) {
///  fm <- lm(dist ~ poly(speed, degree), data = cars)
///  assign(paste("cars", degree, sep = "."), fm)
///  lines(d, predict(fm, data.frame(speed = d)), col = degree)
/// }
/// anova(cars.1, cars.2, cars.3, cars.4)
/// ```
pub fn cars() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("cars.csv"))).finish()
}

/// # Weight versus age of chicks on different diets
///
/// ## Description:
///
/// The ‘ChickWeight’ data frame has 578 rows and 4 columns from an
/// experiment on the effect of diet on early growth of chicks.
///
/// ## Usage:
///
/// ChickWeight
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// * weight a numeric vector giving the body weight of the chick (gm).
/// * Time a numeric vector giving the number of days since birth when
/// the measurement was made.
/// * Chick an ordered factor with levels ‘18’ < ... < ‘48’ giving a
/// unique identifier for the chick.  The ordering of the levels
/// groups chicks on the same diet together and orders them
/// according to their final weight (lightest to heaviest) within
/// diet.
/// * Diet a factor with levels 1, ..., 4 indicating which experimental
/// diet the chick received.
///
/// ## Details:
///
/// The body weights of the chicks were measured at birth and every
/// second day thereafter until day 20.  They were also measured on
/// day 21.  There were four groups on chicks on different protein
/// diets.
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Crowder, M. and Hand, D. (1990), _Analysis of Repeated Measures_,
/// Chapman and Hall (example 5.3)
///
/// Hand, D. and Crowder, M. (1996), _Practical Longitudinal Data
/// Analysis_, Chapman and Hall (table A.2)
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## See Also:
///
/// ‘SSlogis’ for models fitted to this dataset.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// coplot(weight ~ Time | Chick, data = ChickWeight,
///  type = "b", show.given = FALSE)
/// ```
pub fn chick_weight() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("ChickWeight.csv"))).finish()
}

/// # Chicken Weights by Feed Type
///
/// ## Description:
///
/// An experiment was conducted to measure and compare the
/// effectiveness of various feed supplements on the growth rate of
/// chickens.
///
/// ## Usage:
///
/// chickwts
///
/// ## Format:
///
/// A data frame with 71 observations on the following 2 variables.
///
/// * ‘weight’ a numeric variable giving the chick weight.
/// * ‘feed’ a factor giving the feed type.
///
/// ## Details:
///
/// Newly hatched chicks were randomly allocated into six groups, and
/// each group was given a different feed supplement.  Their weights
/// in grams after six weeks are given along with feed types.
///
/// ## Source:
///
/// Anonymous (1948) _Biometrika_, *35*, 214.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// boxplot(weight ~ feed, data = chickwts, col = "lightgray",
/// varwidth = TRUE, notch = TRUE, main = "chickwt data",
/// ylab = "Weight at six weeks (gm)")
/// anova(fm1 <- lm(weight ~ feed, data = chickwts))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
/// ```
pub fn chickwts() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("chickwts.csv"))).finish()
}

/// # Mauna Loa Atmospheric CO2 Concentration
///
/// ## Description:
///
/// Atmospheric concentrations of CO2 are expressed in parts per
/// million (ppm) and reported in the preliminary 1997 SIO manometric
/// mole fraction scale.
///
/// ## Usage:
///
/// co2
///
/// ## Format:
///
/// A time series of 468 observations; monthly from 1959 to 1997.
///
/// ## Details:
///
/// The values for February, March and April of 1964 were missing and
/// have been obtained by interpolating linearly between the values
/// for January and May of 1964.
///
/// ## Source:
///
/// Keeling, C. D. and Whorf, T. P., Scripps Institution of
/// Oceanography (SIO), University of California, La Jolla, California
/// USA 92093-0220.
///
/// <https://scrippsco2.ucsd.edu/data/atmospheric_co2/>.
///
/// Note that the data are subject to revision (based on recalibration
/// of standard gases) by the Scripps institute, and hence may not
/// agree exactly with the data provided by R.
///
/// ## References:
///
/// Cleveland, W. S. (1993) _Visualizing Data_.  New Jersey: Summit
/// Press.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(co2, ylab = expression("Atmospheric concentration of CO"[2]),
///   las = 1)
/// title(main = "co2 data set")
/// ```
pub fn co2_mauna() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("co2.mauna.csv"))).finish()
}

/// # Carbon Dioxide Uptake in Grass Plants
///
/// ## Description:
///
/// The ‘CO2’ data frame has 84 rows and 5 columns of data from an
/// experiment on the cold tolerance of the grass species _Echinochloa
/// crus-galli_.
///
/// ## Usage:
///
/// CO2
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// * Plant an ordered factor with levels ‘Qn1’ < ‘Qn2’ < ‘Qn3’ < ... <
/// ‘Mc1’ giving a unique identifier for each plant.
/// * Type a factor with levels ‘Quebec’ ‘Mississippi’ giving the origin
/// of the plant
/// * Treatment a factor with levels ‘nonchilled’ ‘chilled’
/// * conc a numeric vector of ambient carbon dioxide concentrations
/// (mL/L).
/// * uptake a numeric vector of carbon dioxide uptake rates (umol/m^2
/// sec).
///
/// ## Details:
///
/// The CO2 uptake of six plants from Quebec and six plants from
/// Mississippi was measured at several levels of ambient CO2
/// concentration.  Half the plants of each type were chilled
/// overnight before the experiment was conducted.
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Potvin, C., Lechowicz, M. J. and Tardif, S. (1990) “The
/// statistical analysis of ecophysiological response curves obtained
/// from experiments involving repeated measures”, _Ecology_, *71*,
/// 1389-1400.
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
///
/// coplot(uptake ~ conc | Plant, data = CO2, show.given = FALSE, type = "b")
/// ## fit the data for the first plant
/// fm1 <- nls(uptake ~ SSasymp(conc, Asym, lrc, c0),
/// data = CO2, subset = Plant == "Qn1")
/// summary(fm1)
/// ## fit each plant separately
/// fmlist <- list()
/// for (pp in levels(CO2$Plant)) {
///  fmlist[[pp]] <- nls(uptake ~ SSasymp(conc, Asym, lrc, c0),
/// data = CO2, subset = Plant == pp)
/// }
/// ## check the coefficients by plant
/// print(sapply(fmlist, coef), digits = 3)
/// ```
pub fn co2_plants() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("CO2.plants.csv"))).finish()
}

/// # Student's 3000 Criminals Data
///
/// ## Description:
///
/// Data of 3000 male criminals over 20 years old undergoing their
/// sentences in the chief prisons of England and Wales.
///
/// ## Usage:
///
/// crimtab
///
/// ## Format:
///
/// A ‘table’ object of ‘integer’ counts, of dimension 42 * 22 with a
/// total count, ‘sum(crimtab)’ of 3000.
///
/// The 42 ‘rownames’ (‘"9.4"’, ‘"9.5"’, ...)  correspond to midpoints
/// of intervals of finger lengths whereas the 22 column names
/// (‘colnames’) (‘"142.24"’, ‘"144.78"’, ...) correspond to (body)
/// heights of 3000 criminals, see also below.
///
/// ## Details:
///
/// Student is the pseudonym of William Sealy Gosset.  In his 1908
/// paper he wrote (on page 13) at the beginning of section VI
/// entitled _Practical Test of the forgoing Equations_:
///
/// “Before I had succeeded in solving my problem analytically, I had
/// endeavoured to do so empirically.  The material used was a
/// correlation table containing the height and left middle finger
/// measurements of 3000 criminals, from a paper by W. R. MacDonell
/// (_Biometrika_, Vol. I., p. 219).  The measurements were written
/// out on 3000 pieces of cardboard, which were then very thoroughly
/// shuffled and drawn at random.  As each card was drawn its numbers
/// were written down in a book, which thus contains the measurements
/// of 3000 criminals in a random order.  Finally, each consecutive
/// set of 4 was taken as a sample-750 in all-and the mean, standard
/// deviation, and correlation of each sample determined.  The
/// difference between the mean of each sample and the mean of the
/// population was then divided by the standard deviation of the
/// sample, giving us the _z_ of Section III.”
///
/// The table is in fact page 216 and not page 219 in MacDonell(1902).
/// In the MacDonell table, the middle finger lengths were given in mm
/// and the heights in feet/inches intervals, they are both converted
/// into cm here.  The midpoints of intervals were used, e.g., where
/// MacDonell has 4' 7''9/16 -- 8''9/16, we have 142.24 which is
/// 2.54*56 = 2.54*(4' 8'').
///
/// MacDonell credited the source of data (page 178) as follows: _The
/// data on which the memoir is based were obtained, through the
/// kindness of Dr Garson, from the Central Metric Office, New
/// Scotland Yard..._ He pointed out on page 179 that : _The forms
/// were drawn at random from the mass on the office shelves; we are
///  therefore dealing with a random sampling._
///
/// ## Source:
///
/// <https://pbil.univ-lyon1.fr/R/donnees/criminals1902.txt> thanks to
/// Jean R. Lobry and Anne-Béatrice Dufour.
///
/// ## References:
///
/// Garson, J.G. (1900).  The metric system of identification of
/// criminals, as used in Great Britain and Ireland.  _The Journal of
/// the Anthropological Institute of Great Britain and Ireland_, *30*,
/// 161-198.  doi:10.2307/2842627 <https://doi.org/10.2307/2842627>.
///
/// MacDonell, W.R. (1902).  On criminal anthropometry and the
/// identification of criminals.  _Biometrika_, *1*(2), 177-227.
/// doi:10.2307/2331487 <https://doi.org/10.2307/2331487>.
///
/// Student (1908).  The probable error of a mean.  _Biometrika_, *6*,
/// 1-25.  doi:10.2307/2331554 <https://doi.org/10.2307/2331554>.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// dim(crimtab)
/// utils::str(crimtab)
/// ## for nicer printing:
/// local({cT <- crimtab
///  colnames(cT) <- substring(colnames(cT), 2, 3)
///  print(cT, zero.print = " ")
/// })
///
/// ## Repeat Student's experiment:
///
/// # 1) Reconstitute 3000 raw data for heights in inches and rounded to
/// # nearest integer as in Student's paper:
///
/// (heIn <- round(as.numeric(colnames(crimtab)) / 2.54))
/// d.hei <- data.frame(height = rep(heIn, colSums(crimtab)))
///
/// # 2) shuffle the data:
///
/// set.seed(1)
/// d.hei <- d.hei[sample(1:3000), , drop = FALSE]
///
/// # 3) Make 750 samples each of size 4:
///
/// d.hei$sample <- as.factor(rep(1:750, each = 4))
///
/// # 4) Compute the means and standard deviations (n) for the 750 samples:
///
/// h.mean <- with(d.hei, tapply(height, sample, FUN = mean))
/// h.sd<- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4)
///
/// # 5) Compute the difference between the mean of each sample and
/// # the mean of the population and then divide by the
/// # standard deviation of the sample:
///
/// zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
///
/// # 6) Replace infinite values by +/- 6 as in Student's paper:
///
/// zobs[infZ <- is.infinite(zobs)] # none of them
/// zobs[infZ] <- 6 * sign(zobs[infZ])
///
/// # 7) Plot the distribution:
///
/// require(grDevices); require(graphics)
/// hist(x = zobs, probability = TRUE, xlab = "Student's z",
///  col = grey(0.8), border = grey(0.5),
///  main = "Distribution of Student's z score  for 'crimtab' data")
/// ```
pub fn crimtab() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("crimtab.csv"))).finish()
}

/// # Yearly Numbers of Important Discoveries
///
/// ## Description:
///
/// The numbers of “great” inventions and scientific discoveries in
/// each year from 1860 to 1959.
///
/// ## Usage:
///
/// discoveries
///
/// ## Format:
///
/// A time series of 100 values.
///
/// ## Source:
///
/// The World Almanac and Book of Facts, 1975 Edition, pages 315-318.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(discoveries, ylab = "Number of important discoveries",
///  las = 1)
/// title(main = "discoveries data set")
/// ```
pub fn discoveries() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("discoveries.csv"))).finish()
}

/// # Elisa assay of DNase
///
/// ## Description:
///
/// The ‘DNase’ data frame has 176 rows and 3 columns of data obtained
/// during development of an ELISA assay for the recombinant protein
/// DNase in rat serum.
///
/// ## Usage:
///
/// DNase
///
/// ## Format:
///
/// * An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
/// * Run an ordered factor with levels ‘10’ < ... < ‘3’ indicating the
/// assay run.
/// * conc a numeric vector giving the known concentration of the
/// protein.
/// * density a numeric vector giving the measured optical density
/// (dimensionless) in the assay.  Duplicate optical density
/// measurements were obtained.
///
/// ## Details:
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
/// Repeated Measurement Data_, Chapman & Hall (section 5.2.4, p. 134)
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
///
/// coplot(density ~ conc | Run, data = DNase,
/// show.given = FALSE, type = "b")
/// coplot(density ~ log(conc) | Run, data = DNase,
/// show.given = FALSE, type = "b")
/// ## fit a representative run
/// fm1 <- nls(density ~ SSlogis( log(conc), Asym, xmid, scal ),
/// data = DNase, subset = Run == 1)
/// ## compare with a four-parameter logistic
/// fm2 <- nls(density ~ SSfpl( log(conc), A, B, xmid, scal ),
/// data = DNase, subset = Run == 1)
/// summary(fm2)
/// anova(fm1, fm2)
/// ```
pub fn dnase() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("DNase.csv"))).finish()
}

/// # Smoking, Alcohol and (O)esophageal Cancer
///
/// ## Description:
///
/// Data from a case-control study of (o)esophageal cancer in
/// Ille-et-Vilaine, France.
///
/// ## Usage:
///
/// esoph
///
/// ## Format:
///
/// A data frame with records for 88 age/alcohol/tobacco combinations.
///
/// * \[,1\]  "agegp"Age group1  25-34 years
///     * 2  35-44
///     * 3  45-54
///     * 4  55-64
///     * 5  65-74
///     * 6  75+
/// * \[,2\]  "alcgp"Alcohol consumption  10-39 gm/day
///     * 2  40-79
///     * 3  80-119
///     * 4  120+
/// * \[,3\]  "tobgp"Tobacco consumption  10- 9 gm/day
///     * 2  10-19
///     * 3  20-29
///     * 4  30+
/// * \[,4\]  "ncases"  Number of cases
/// * \[,5\]  "ncontrols"  Number of controls
///
/// ## Author(s):
///
/// Thomas Lumley
///
/// ## Source:
///
/// Breslow, N. E. and Day, N. E. (1980) _Statistical Methods in
/// Cancer Research. Volume 1: The Analysis of Case-Control Studies._
/// IARC Lyon / Oxford University Press.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// require(graphics) # for mosaicplot
/// summary(esoph)
/// ## effects of alcohol, tobacco and interaction, age-adjusted
/// model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
///  data = esoph, family = binomial())
/// anova(model1)
/// ## Try a linear effect of alcohol and tobacco
/// model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp)
///  + unclass(alcgp),
///  data = esoph, family = binomial())
/// summary(model2)
/// ## Re-arrange data for a mosaic plot
/// ttt <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
/// o <- with(esoph, order(tobgp, alcgp, agegp))
/// ttt[ttt == 1] <- esoph$ncases[o]
/// tt1 <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
/// tt1[tt1 == 1] <- esoph$ncontrols[o]
/// tt <- array(c(ttt, tt1), c(dim(ttt),2),
/// c(dimnames(ttt), list(c("Cancer", "control"))))
/// mosaicplot(tt, main = "esoph data set", color = TRUE)
/// ```
pub fn esoph() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("esoph.csv"))).finish()
}

/// # Conversion Rates of Euro Currencies
///
/// ## Description:
///
/// Conversion rates between the various Euro currencies.
///
/// ## Usage:
///
/// euro
/// euro.cross
///
/// ## Format:
///
/// ‘euro’ is a named vector of length 11, ‘euro.cross’ a matrix of
/// size 11 by 11, with dimnames.
///
/// ## Details:
///
/// The data set ‘euro’ contains the value of 1 Euro in all currencies
/// participating in the European monetary union (Austrian Schilling
/// ATS, Belgian Franc BEF, German Mark DEM, Spanish Peseta ESP,
/// Finnish Markka FIM, French Franc FRF, Irish Punt IEP, Italian Lira
/// ITL, Luxembourg Franc LUF, Dutch Guilder NLG and Portuguese Escudo
/// PTE).  These conversion rates were fixed by the European Union on
/// December 31, 1998.  To convert old prices to Euro prices, divide
/// by the respective rate and round to 2 digits.
///
/// The data set ‘euro.cross’ contains conversion rates between the
/// various Euro currencies, i.e., the result of ‘outer(1 / euro,
/// euro)’.
///
/// ## Examples:
///
/// ```r
/// cbind(euro)
///
/// ## These relations hold:
/// euro == signif(euro, 6) # [6 digit precision in Euro's definition]
/// all(euro.cross == outer(1/euro, euro))
///
/// ## Convert 20 Euro to Belgian Franc
/// 20 * euro["BEF"]
/// ## Convert 20 Austrian Schilling to Euro
/// 20 / euro["ATS"]
/// ## Convert 20 Spanish Pesetas to Italian Lira
/// 20 * euro.cross["ESP", "ITL"]
///
/// require(graphics)
/// dotchart(euro,
/// main = "euro data: 1 Euro in currency unit")
/// dotchart(1/euro,
/// main = "euro data: 1 currency unit in Euros")
/// dotchart(log(euro, 10),
/// main = "euro data: log10(1 Euro in currency unit)")
/// ```
pub fn euro_cross() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("euro.cross.csv"))).finish()
}

/// # Conversion Rates of Euro Currencies
///
/// ## Description:
///
/// Conversion rates between the various Euro currencies.
///
/// ## Usage:
///
/// euro
/// euro.cross
///
/// ## Format:
///
/// ‘euro’ is a named vector of length 11, ‘euro.cross’ a matrix of
/// size 11 by 11, with dimnames.
///
/// ## Details:
///
/// The data set ‘euro’ contains the value of 1 Euro in all currencies
/// participating in the European monetary union (Austrian Schilling
/// ATS, Belgian Franc BEF, German Mark DEM, Spanish Peseta ESP,
/// Finnish Markka FIM, French Franc FRF, Irish Punt IEP, Italian Lira
/// ITL, Luxembourg Franc LUF, Dutch Guilder NLG and Portuguese Escudo
/// PTE).  These conversion rates were fixed by the European Union on
/// December 31, 1998.  To convert old prices to Euro prices, divide
/// by the respective rate and round to 2 digits.
///
/// The data set ‘euro.cross’ contains conversion rates between the
/// various Euro currencies, i.e., the result of ‘outer(1 / euro,
/// euro)’.
///
/// ## Examples:
///
/// ```r
/// cbind(euro)
///
/// ## These relations hold:
/// euro == signif(euro, 6) # [6 digit precision in Euro's definition]
/// all(euro.cross == outer(1/euro, euro))
///
/// ## Convert 20 Euro to Belgian Franc
/// 20 * euro["BEF"]
/// ## Convert 20 Austrian Schilling to Euro
/// 20 / euro["ATS"]
/// ## Convert 20 Spanish Pesetas to Italian Lira
/// 20 * euro.cross["ESP", "ITL"]
///
/// require(graphics)
/// dotchart(euro,
/// main = "euro data: 1 Euro in currency unit")
/// dotchart(1/euro,
/// main = "euro data: 1 currency unit in Euros")
/// dotchart(log(euro, 10),
/// main = "euro data: log10(1 Euro in currency unit)")
/// ```
pub fn euro() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("euro.csv"))).finish()
}

/// # Distances Between European Cities and Between US Cities
///
/// ## Description:
///
/// The ‘eurodist’ gives the road distances (in km) between 21 cities
/// in Europe.  The data are taken from a table in _The Cambridge
/// Encyclopaedia_.
///
/// ‘UScitiesD’ gives “straight line” distances between 10 cities in
/// the US.
///
/// ## Usage:
///
/// eurodist
/// UScitiesD
///
/// ## Format:
///
/// ‘dist’ objects based on 21 and 10 objects, respectively.  (You
/// must have the ‘stats’ package loaded to have the methods for this
/// kind of object available).
///
/// ## Source:
///
/// Crystal, D. Ed. (1990) _The Cambridge Encyclopaedia_.  Cambridge:
/// Cambridge University Press,
///
/// The US cities distances were provided by Pierre Legendre.
pub fn eurodist() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("eurodist.csv"))).finish()
}

/// # Daily Closing Prices of Major European Stock Indices, 1991-1998
///
/// ## Description:
///
/// Contains the daily closing prices of major European stock indices:
/// Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE.  The
/// data are sampled in business time, i.e., weekends and holidays are
/// omitted.
///
/// ## Usage:
///
/// EuStockMarkets
///
/// ## Format:
///
/// A multivariate time series with 1860 observations on 4 variables.
/// The object is of class ‘"mts"’.
///
/// ## Source:
///
/// The data were kindly provided by Erste Bank AG, Vienna, Austria.
pub fn eu_stock_markets() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("EuStockMarkets.csv"))).finish()
}

/// # Old Faithful Geyser Data
///
/// ## Description:
///
/// Waiting time between eruptions and the duration of the eruption
/// for the Old Faithful geyser in Yellowstone National Park, Wyoming,
/// USA.
///
/// ## Usage:
///
/// faithful
///
/// ## Format:
///
/// A data frame with 272 observations on 2 variables.
///
/// * \[,1\]  eruptions  numeric  Eruption time in mins
/// * \[,2\]  waiting numeric  Waiting time to next
/// eruption (in mins)
///
/// ## Details:
///
/// A closer look at ‘faithful$eruptions’ reveals that these are
/// heavily rounded times originally in seconds, where multiples of 5
/// are more frequent than expected under non-human measurement.  For
/// a better version of the eruption times, see the example below.
///
/// There are many versions of this dataset around: Azzalini and
/// Bowman (1990) use a more complete version.
///
/// ## Source:
///
/// W. Härdle.
///
/// ## References:
///
/// Härdle, W. (1991).  _Smoothing Techniques with Implementation in
/// S_.  New York: Springer.
///
/// Azzalini, A. and Bowman, A. W. (1990).  A look at some data on the
/// Old Faithful geyser.  _Applied Statistics_, *39*, 357-365.
/// doi:10.2307/2347385 <https://doi.org/10.2307/2347385>.
///
/// ## See Also:
///
/// ‘geyser’ in package ‘MASS’ for the Azzalini-Bowman version.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// f.tit <-  "faithful data: Eruptions of Old Faithful"
///
/// ne60 <- round(e60 <- 60 * faithful$eruptions)
/// all.equal(e60, ne60) # relative diff. ~ 1/10000
/// table(zapsmall(abs(e60 - ne60))) # 0, 0.02 or 0.04
/// faithful$better.eruptions <- ne60 / 60
/// te <- table(ne60)
/// te[te >= 4] # (too) many multiples of 5 !
/// plot(names(te), te, type = "h", main = f.tit, xlab = "Eruption time (sec)")
///
/// plot(faithful[, -3], main = f.tit,
///  xlab = "Eruption time (min)",
///  ylab = "Waiting time to next eruption (min)")
/// lines(lowess(faithful$eruptions, faithful$waiting, f = 2/3, iter = 3),
/// col = "red")
/// ```
pub fn faithful() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("faithful.csv"))).finish()
}

/// # Monthly Deaths from Lung Diseases in the UK
///
/// ## Description:
///
/// Three time series giving the monthly deaths from bronchitis,
/// emphysema and asthma in the UK, 1974-1979, both sexes (‘ldeaths’),
/// males (‘mdeaths’) and females (‘fdeaths’).
///
/// ## Usage:
///
/// ldeaths
/// fdeaths
/// mdeaths
///
/// ## Source:
///
/// P. J. Diggle (1990) _Time Series: A Biostatistical Introduction._
/// Oxford, table A.3
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics) # for time
/// plot(ldeaths)
/// plot(mdeaths, fdeaths)
/// ## Better labels:
/// yr <- floor(tt <- time(mdeaths))
/// plot(mdeaths, fdeaths,
///  xy.labels = paste(month.abb[12*(tt - yr)], yr-1900, sep = "'"))
/// ```
pub fn fdeaths() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("fdeaths.csv"))).finish()
}

/// # Determination of Formaldehyde
///
/// ## Description:
///
/// These data are from a chemical experiment to prepare a standard
/// curve for the determination of formaldehyde by the addition of
/// chromatropic acid and concentrated sulphuric acid and the reading
/// of the resulting purple color on a spectrophotometer.
///
/// ## Usage:
///
/// Formaldehyde
///
/// ## Format:
///
/// A data frame with 6 observations on 2 variables.
///
/// * \[,1\]  carb numeric  Carbohydrate (ml)
/// * \[,2\]  optden  numeric  Optical Density
///
/// ## Source:
///
/// Bennett, N. A. and N. L. Franklin (1954) _Statistical Analysis in
/// Chemistry and the Chemical Industry_.  New York: Wiley.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis._ New York: Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// plot(optden ~ carb, data = Formaldehyde,
///  xlab = "Carbohydrate (ml)", ylab = "Optical Density",
///  main = "Formaldehyde data", col = 4, las = 1)
/// abline(fm1 <- lm(optden ~ carb, data = Formaldehyde))
/// summary(fm1)
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
/// plot(fm1)
/// par(opar)
/// ```
pub fn formaldehyde() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Formaldehyde.csv"))).finish()
}

/// # Freeny's Revenue Data
///
/// ## Description:
///
/// Freeny's data on quarterly revenue and explanatory variables.
///
/// ## Usage:
///
/// freeny
/// freeny.x
/// freeny.y
///
/// ## Format:
///
/// There are three ‘freeny’ data sets.
///
/// * ‘freeny.y’ is a time series with 39 observations on quarterly
/// revenue from (1962,2Q) to (1971,4Q).
/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
/// potential.
/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
/// ‘market.potential’ obtained from the above two data objects.
///
/// ## Source:
///
/// A. E. Freeny (1977) _A Portable Linear Regression Package with
/// Test Programs_.  Bell Laboratories memorandum.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// summary(freeny)
/// pairs(freeny, main = "freeny data")
/// # gives warning: freeny$y has class "ts"
///
/// summary(fm1 <- lm(y ~ ., data = freeny))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
/// ```
pub fn freeny() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("freeny.csv"))).finish()
}

/// # Freeny's Revenue Data
///
/// ## Description:
///
/// Freeny's data on quarterly revenue and explanatory variables.
///
/// ## Usage:
///
/// freeny
/// freeny.x
/// freeny.y
///
/// ## Format:
///
/// There are three ‘freeny’ data sets.
///
/// * ‘freeny.y’ is a time series with 39 observations on quarterly
/// revenue from (1962,2Q) to (1971,4Q).
/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
/// potential.
/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
/// ‘market.potential’ obtained from the above two data objects.
///
/// ## Source:
///
/// A. E. Freeny (1977) _A Portable Linear Regression Package with
/// Test Programs_.  Bell Laboratories memorandum.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// summary(freeny)
/// pairs(freeny, main = "freeny data")
/// # gives warning: freeny$y has class "ts"
///
/// summary(fm1 <- lm(y ~ ., data = freeny))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
/// ```
pub fn freeny_x() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("freeny.x.csv"))).finish()
}

/// # Freeny's Revenue Data
///
/// ## Description:
///
/// Freeny's data on quarterly revenue and explanatory variables.
///
/// ## Usage:
///
/// freeny
/// freeny.x
/// freeny.y
///
/// ## Format:
///
/// There are three ‘freeny’ data sets.
///
/// * ‘freeny.y’ is a time series with 39 observations on quarterly
/// revenue from (1962,2Q) to (1971,4Q).
/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
/// potential.
/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
/// ‘market.potential’ obtained from the above two data objects.
///
/// ## Source:
///
/// A. E. Freeny (1977) _A Portable Linear Regression Package with
/// Test Programs_.  Bell Laboratories memorandum.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// summary(freeny)
/// pairs(freeny, main = "freeny data")
/// # gives warning: freeny$y has class "ts"
///
/// summary(fm1 <- lm(y ~ ., data = freeny))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
/// ```
pub fn freeny_y() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("freeny.y.csv"))).finish()
}

/// # Hair and Eye Color of Statistics Students
///
/// ## Description:
///
/// Distribution of hair and eye color and sex in 592 statistics
/// students.
///
/// ## Usage:
///
/// HairEyeColor
///
/// ## Format:
///
/// A 3-dimensional array resulting from cross-tabulating 592
/// observations on 3 variables.  The variables and their levels are
/// as follows:
///
/// | No | Name  | Levels                    |
/// |----|-------|---------------------------|
/// | 1  | Hair  | Black, Brown, Red, Blond  |
/// | 2  | Eye   | Brown, Blue, Hazel, Green |
/// | 3  | Sex   | Male, Female              |
///
/// ## Details:
///
/// The Hair x Eye table comes from a survey of students at the
/// University of Delaware reported by Snee (1974).  The split by
/// ‘Sex’ was added by Friendly (1992a) for didactic purposes.
///
/// This data set is useful for illustrating various techniques for
/// the analysis of contingency tables, such as the standard
/// chi-squared test or, more generally, log-linear modelling, and
/// graphical methods such as mosaic plots, sieve diagrams or
/// association plots.
///
/// ## Source:
///
/// <http://www.datavis.ca/sas/vcd/catdata/haireye.sas>
///
/// Snee (1974) gives the two-way table aggregated over ‘Sex’.  The
/// ‘Sex’ split of the ‘Brown hair, Brown eye’ cell was changed to
/// agree with that used by Friendly (2000).
///
/// ## References:
///
/// Snee, R. D. (1974).  Graphical display of two-way contingency
/// tables.  _The American Statistician_, *28*, 9-12.
/// doi:10.2307/2683520 <https://doi.org/10.2307/2683520>.
///
/// Friendly, M. (1992a).  Graphical methods for categorical data.
/// _SAS User Group International Conference Proceedings_, *17*,
/// 190-200.  <http://datavis.ca/papers/sugi/sugi17.pdf>
///
/// Friendly, M. (1992b).  Mosaic displays for loglinear models.
/// _Proceedings of the Statistical Graphics Section_, American
/// Statistical Association, pp. 61-68.
/// <http://www.datavis.ca/papers/asa92.html>
///
/// Friendly, M. (2000).  _Visualizing Categorical Data_.  SAS
/// Institute, ISBN 1-58025-660-0.
///
/// ## See Also:
///
/// ‘chisq.test’, ‘loglin’, ‘mosaicplot’
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// ## Full mosaic
/// mosaicplot(HairEyeColor)
/// ## Aggregate over sex (as in Snee's original data)
/// x <- apply(HairEyeColor, c(1, 2), sum)
/// x
/// mosaicplot(x, main = "Relation between hair and eye color")
/// ```
pub fn hair_eye_color() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("HairEyeColor.csv"))).finish()
}

/// # Harman Example 2.3
///
/// ## Description:
///
/// A correlation matrix of eight physical measurements on 305 girls
/// between ages seven and seventeen.
///
/// ## Usage:
///
/// Harman23.cor
///
/// ## Source:
///
/// Harman, H. H. (1976) _Modern Factor Analysis_, Third Edition
/// Revised, University of Chicago Press, Table 2.3.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// (Harman23.FA <- factanal(factors = 1, covmat = Harman23.cor))
/// for(factors in 2:4) print(update(Harman23.FA, factors = factors))
/// ```
pub fn harman23_cor() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
    let mut center = Vec::new();
    let mut n_obs = 0;
    CsvReader::new(Cursor::new(include_str!("Harman23.cor.center.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| center.push(d.unwrap() as usize));
    CsvReader::new(Cursor::new(include_str!("Harman23.cor.n.obs.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| n_obs = d.unwrap() as usize);
    Ok((
        CsvReader::new(Cursor::new(include_str!("Harman23.cor.cov.csv"))).finish()?,
        center,
        n_obs,
    ))
}

/// # Harman Example 7.4
///
/// ## Description:
///
/// A correlation matrix of 24 psychological tests given to 145
/// seventh and eight-grade children in a Chicago suburb by Holzinger
/// and Swineford.
///
/// ## Usage:
///
/// Harman74.cor
///
/// ## Source:
///
/// Harman, H. H. (1976) _Modern Factor Analysis_, Third Edition
/// Revised, University of Chicago Press, Table 7.4.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// (Harman74.FA <- factanal(factors = 1, covmat = Harman74.cor))
/// for(factors in 2:5) print(update(Harman74.FA, factors = factors))
/// Harman74.FA <- factanal(factors = 5, covmat = Harman74.cor,
/// rotation = "promax")
/// print(Harman74.FA$loadings, sort = TRUE)
/// ```
pub fn harman74() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
    let mut center = Vec::new();
    let mut n_obs = 0;
    CsvReader::new(Cursor::new(include_str!("Harman74.cor.center.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| center.push(d.unwrap() as usize));
    CsvReader::new(Cursor::new(include_str!("Harman74.cor.n.obs.csv")))
        .finish()?
        .column("x")?
        .cast(&DataType::Float64)?
        .f64()?
        .for_each(|d| n_obs = d.unwrap() as usize);
    Ok((
        CsvReader::new(Cursor::new(include_str!("Harman74.cor.cov.csv"))).finish()?,
        center,
        n_obs,
    ))
}

/// # Pharmacokinetics of Indomethacin
///
/// ## Description:
///
/// The ‘Indometh’ data frame has 66 rows and 3 columns of data on the
/// pharmacokinetics of indometacin (or, older spelling,
/// ‘indomethacin’).
///
/// ## Usage:
///
/// Indometh
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// * Subject an ordered factor with containing the subject codes.  The
/// ordering is according to increasing maximum response.
/// * time a numeric vector of times at which blood samples were drawn
/// (hr).
/// * conc a numeric vector of plasma concentrations of indometacin
/// (mcg/ml).
///
/// ## Details:
///
/// Each of the six subjects were given an intravenous injection of
/// indometacin.
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Kwan, Breault, Umbenhauer, McMahon and Duggan (1976) Kinetics of
/// Indomethacin absorption, elimination, and enterohepatic
/// circulation in man.  _Journal of Pharmacokinetics and
/// Biopharmaceutics_ *4*, 255-280.
///
/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
/// Repeated Measurement Data_, Chapman & Hall (section 5.2.4, p. 129)
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## See Also:
///
/// ‘SSbiexp’ for models fitted to this dataset.
pub fn indometh() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Indometh.csv"))).finish()
}

/// # Infertility after Spontaneous and Induced Abortion
///
/// ## Description:
///
/// This is a matched case-control study dating from before the
/// availability of conditional logistic regression.
///
/// ## Usage:
///
/// infert
///
/// ## Format:
///
/// 1.  Education
///     * 0 = 0-5  years
///     * 1 = 6-11 years
///     * 2 = 12+  years
/// 2.  age  age in years of case
/// 3.  parity  count
/// 4.  number of prior induced abortions
///     * 0 = 0
///     * 1 = 1
///     * 2 = 2 or more
/// 5.  case status
///     * 1 = case
///     * 0 = control
/// 6.  number of prior spontaneous abortions
///     * 0 = 0
///     * 1 = 1
///     * 2 = 2 or more
/// 7.  matched set number  1-83
/// 8.  stratum number 1-63
///
/// ## Note:
///
/// One case with two prior spontaneous abortions and two prior
/// induced abortions is omitted.
///
/// ## Source:
///
/// Trichopoulos _et al_ (1976) _Br. J. of Obst. and Gynaec._ *83*,
/// 645-650.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// model1 <- glm(case ~ spontaneous+induced, data = infert, family = binomial())
/// summary(model1)
/// ## adjusted for other potential confounders:
/// summary(model2 <- glm(case ~ age+parity+education+spontaneous+induced,
/// data = infert, family = binomial()))
/// ## Really should be analysed by conditional logistic regression
/// ## which is in the survival package
/// if(require(survival)){
///  model3 <- clogit(case ~ spontaneous+induced+strata(stratum), data = infert)
///  print(summary(model3))
///  detach()  # survival (conflicts)
/// }
/// ```
pub fn infert() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("infert.csv"))).finish()
}

/// # Effectiveness of Insect Sprays
///
/// ## Description:
///
/// The counts of insects in agricultural experimental units treated
/// with different insecticides.
///
/// ## Usage:
///
/// InsectSprays
///
/// ## Format:
///
/// A data frame with 72 observations on 2 variables.
///
/// * \[,1\]  count  numeric  Insect count
/// * \[,2\]  spray  factorThe type of spray
///
/// ## Source:
///
/// Beall, G., (1942) The Transformation of data from entomological
/// field experiments, _Biometrika_, *29*, 243-262.
///
/// ## References:
///
/// McNeil, D. (1977) _Interactive Data Analysis_.  New York: Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// boxplot(count ~ spray, data = InsectSprays,
///  xlab = "Type of spray", ylab = "Insect count",
///  main = "InsectSprays data", varwidth = TRUE, col = "lightgray")
/// fm1 <- aov(count ~ spray, data = InsectSprays)
/// summary(fm1)
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
/// plot(fm1)
/// fm2 <- aov(sqrt(count) ~ spray, data = InsectSprays)
/// summary(fm2)
/// plot(fm2)
/// par(opar)
/// ```
pub fn insect_sprays() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("InsectSprays.csv"))).finish()
}

/// # Edgar Anderson's Iris Data
///
/// ## Description:
///
/// This famous (Fisher's or Anderson's) iris data set gives the
/// measurements in centimeters of the variables sepal length and
/// width and petal length and width, respectively, for 50 flowers
/// from each of 3 species of iris.  The species are _Iris setosa_,
/// _versicolor_, and _virginica_.
///
/// ## Usage:
///
/// iris
/// iris3
///
/// ## Format:
///
/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
/// ‘Petal.Width’, and ‘Species’.
///
/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
/// gives the case number within the species subsample, the second the
/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
/// ‘Petal W.’, and the third the species.
///
/// ## Source:
///
/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
///
/// The data were collected by Anderson, Edgar (1935).  The irises of
/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
/// *59*, 2-5.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
///
/// ## See Also:
///
/// ‘matplot’ some examples of which use ‘iris’.
///
/// ## Examples:
///
/// ```r
/// dni3 <- dimnames(iris3)
/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
/// dimnames = list(NULL, sub(" L.",".Length",
/// sub(" W.",".Width", dni3[[2]])))),
/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
/// all.equal(ii, iris) # TRUE
/// ```
pub fn iris() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("iris.csv"))).finish()
}

/// # Edgar Anderson's Iris Data
///
/// ## Description:
///
/// This famous (Fisher's or Anderson's) iris data set gives the
/// measurements in centimeters of the variables sepal length and
/// width and petal length and width, respectively, for 50 flowers
/// from each of 3 species of iris.  The species are _Iris setosa_,
/// _versicolor_, and _virginica_.
///
/// ## Usage:
///
/// iris
/// iris3
///
/// ## Format:
///
/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
/// ‘Petal.Width’, and ‘Species’.
///
/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
/// gives the case number within the species subsample, the second the
/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
/// ‘Petal W.’, and the third the species.
///
/// ## Source:
///
/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
///
/// The data were collected by Anderson, Edgar (1935).  The irises of
/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
/// *59*, 2-5.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
///
/// ## See Also:
///
/// ‘matplot’ some examples of which use ‘iris’.
///
/// ## Examples:
///
/// ```r
/// dni3 <- dimnames(iris3)
/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
/// dimnames = list(NULL, sub(" L.",".Length",
/// sub(" W.",".Width", dni3[[2]])))),
/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
/// all.equal(ii, iris) # TRUE
/// ```
pub fn iris3_setosa() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("iris3.Setosa.csv"))).finish()
}

/// # Edgar Anderson's Iris Data
///
/// ## Description:
///
/// This famous (Fisher's or Anderson's) iris data set gives the
/// measurements in centimeters of the variables sepal length and
/// width and petal length and width, respectively, for 50 flowers
/// from each of 3 species of iris.  The species are _Iris setosa_,
/// _versicolor_, and _virginica_.
///
/// ## Usage:
///
/// iris
/// iris3
///
/// ## Format:
///
/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
/// ‘Petal.Width’, and ‘Species’.
///
/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
/// gives the case number within the species subsample, the second the
/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
/// ‘Petal W.’, and the third the species.
///
/// ## Source:
///
/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
///
/// The data were collected by Anderson, Edgar (1935).  The irises of
/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
/// *59*, 2-5.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
///
/// ## See Also:
///
/// ‘matplot’ some examples of which use ‘iris’.
///
/// ## Examples:
///
/// ```r
/// dni3 <- dimnames(iris3)
/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
/// dimnames = list(NULL, sub(" L.",".Length",
/// sub(" W.",".Width", dni3[[2]])))),
/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
/// all.equal(ii, iris) # TRUE
/// ```
pub fn iris3_versicolor() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("iris3.Versicolor.csv"))).finish()
}

/// # Edgar Anderson's Iris Data
///
/// ## Description:
///
/// This famous (Fisher's or Anderson's) iris data set gives the
/// measurements in centimeters of the variables sepal length and
/// width and petal length and width, respectively, for 50 flowers
/// from each of 3 species of iris.  The species are _Iris setosa_,
/// _versicolor_, and _virginica_.
///
/// ## Usage:
///
/// iris
/// iris3
///
/// ## Format:
///
/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
/// ‘Petal.Width’, and ‘Species’.
///
/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
/// gives the case number within the species subsample, the second the
/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
/// ‘Petal W.’, and the third the species.
///
/// ## Source:
///
/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
///
/// The data were collected by Anderson, Edgar (1935).  The irises of
/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
/// *59*, 2-5.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
///
/// ## See Also:
///
/// ‘matplot’ some examples of which use ‘iris’.
///
/// ## Examples:
///
/// ```r
/// dni3 <- dimnames(iris3)
/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
/// dimnames = list(NULL, sub(" L.",".Length",
/// sub(" W.",".Width", dni3[[2]])))),
/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
/// all.equal(ii, iris) # TRUE
/// ```
pub fn iris3_virginica() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("iris3.Virginica.csv"))).finish()
}

/// # Areas of the World's Major Landmasses
///
/// ## Description:
///
/// The areas in thousands of square miles of the landmasses which
/// exceed 10,000 square miles.
///
/// ## Usage:
///
/// islands
///
/// ## Format:
///
/// A named vector of length 48.
///
/// ## Source:
///
/// The World Almanac and Book of Facts, 1975, page 406.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// dotchart(log(islands, 10),
/// main = "islands data: log10(area) (log10(sq. miles))")
/// dotchart(log(islands[order(islands)], 10),
/// main = "islands data: log10(area) (log10(sq. miles))")
/// ```
pub fn islands() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("islands.csv"))).finish()
}

/// # Quarterly Earnings per Johnson & Johnson Share
///
/// ## Description:
///
/// Quarterly earnings (dollars) per Johnson & Johnson share 1960-80.
///
/// ## Usage:
///
/// JohnsonJohnson
///
/// ## Format:
///
/// A quarterly time series
///
/// ## Source:
///
/// Shumway, R. H. and Stoffer, D. S. (2000) _Time Series Analysis and
/// its Applications_.  Second Edition.  Springer.  Example 1.1.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// JJ <- log10(JohnsonJohnson)
/// plot(JJ)
/// ## This example gives a possible-non-convergence warning on some
/// ## platforms, but does seem to converge on x86 Linux and Windows.
/// (fit <- StructTS(JJ, type = "BSM"))
/// tsdiag(fit)
/// sm <- tsSmooth(fit)
/// plot(cbind(JJ, sm[, 1], sm[, 3]-0.5), plot.type = "single",
///  col = c("black", "green", "blue"))
/// abline(h = -0.5, col = "grey60")
///
/// monthplot(fit)
/// ```
pub fn johnson_johnson() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("JohnsonJohnson.csv"))).finish()
}

/// # Level of Lake Huron 1875-1972
///
/// ## Description:
///
/// Annual measurements of the level, in feet, of Lake Huron
/// 1875-1972.
///
/// ## Usage:
///
/// LakeHuron
///
/// ## Format:
///
/// A time series of length 98.
///
/// ## Source:
///
/// Brockwell, P. J. and Davis, R. A. (1991).  _Time Series and
/// Forecasting Methods_.  Second edition. Springer, New York. Series
/// A, page 555.
///
/// Brockwell, P. J. and Davis, R. A. (1996).  _Introduction to Time
/// Series and Forecasting_.  Springer, New York.  Sections 5.1 and
/// 7.6.
pub fn lake_huron() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("LakeHuron.csv"))).finish()
}

/// # Monthly Deaths from Lung Diseases in the UK
///
/// ## Description:
///
/// Three time series giving the monthly deaths from bronchitis,
/// emphysema and asthma in the UK, 1974-1979, both sexes (‘ldeaths’),
/// males (‘mdeaths’) and females (‘fdeaths’).
///
/// ## Usage:
///
/// ldeaths
/// fdeaths
/// mdeaths
///
/// ## Source:
///
/// P. J. Diggle (1990) _Time Series: A Biostatistical Introduction._
/// Oxford, table A.3
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics) # for time
/// plot(ldeaths)
/// plot(mdeaths, fdeaths)
/// ## Better labels:
/// yr <- floor(tt <- time(mdeaths))
/// plot(mdeaths, fdeaths,
///  xy.labels = paste(month.abb[12*(tt - yr)], yr-1900, sep = "'"))
/// ```
pub fn ldeaths() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("ldeaths.csv"))).finish()
}

/// # Luteinizing Hormone in Blood Samples
///
/// ## Description:
///
/// A regular time series giving the luteinizing hormone in blood
/// samples at 10 mins intervals from a human female, 48 samples.
///
/// ## Usage:
///
/// lh
///
/// ## Source:
///
/// P.J. Diggle (1990) _Time Series: A Biostatistical Introduction._
/// Oxford, table A.1, series 3
pub fn lh() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("lh.csv"))).finish()
}

/// # Intercountry Life-Cycle Savings Data
///
/// ## Description:
///
/// Data on the savings ratio 1960-1970.
///
/// ## Usage:
///
/// LifeCycleSavings
///
/// ## Format:
///
/// A data frame with 50 observations on 5 variables.
///
/// * \[,1\]  sr  numeric  aggregate personal savings
/// * \[,2\]  pop15  numeric  % of population under 15
/// * \[,3\]  pop75  numeric  % of population over 75
/// * \[,4\]  dpi numeric  real per-capita disposable
/// income
/// * \[,5\]  ddpinumeric  % growth rate of dpi
///
/// ## Details:
///
/// Under the life-cycle savings hypothesis as developed by Franco
/// Modigliani, the savings ratio (aggregate personal saving divided
/// by disposable income) is explained by per-capita disposable
/// income, the percentage rate of change in per-capita disposable
/// income, and two demographic variables: the percentage of
/// population less than 15 years old and the percentage of the
/// population over 75 years old.  The data are averaged over the
/// decade 1960-1970 to remove the business cycle or other short-term
/// fluctuations.
///
/// # Source:
///
/// The data were obtained from Belsley, Kuh and Welsch (1980).  They
/// in turn obtained the data from Sterling (1977).
///
/// ## References:
///
/// Sterling, Arnie (1977) Unpublished BS Thesis.  Massachusetts
/// Institute of Technology.
///
/// Belsley, D. A., Kuh. E. and Welsch, R. E. (1980) _Regression
/// Diagnostics_.  New York: Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// pairs(LifeCycleSavings, panel = panel.smooth,
/// main = "LifeCycleSavings data")
/// fm1 <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LifeCycleSavings)
/// summary(fm1)
/// ```
pub fn life_cycle_savings() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("LifeCycleSavings.csv"))).finish()
}

/// # Growth of Loblolly pine trees
///
/// ## Description:
///
/// The ‘Loblolly’ data frame has 84 rows and 3 columns of records of
/// the growth of Loblolly pine trees.
///
/// ## Usage:
///
/// Loblolly
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// * height a numeric vector of tree heights (ft).
/// * age a numeric vector of tree ages (yr).
/// * Seed an ordered factor indicating the seed source for the tree.
/// The ordering is according to increasing maximum height.
///
/// ## Details:
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Kung, F. H. (1986), Fitting logistic growth curve with
/// predetermined carrying capacity, in _Proceedings of the
/// Statistical Computing Section, American Statistical Association_,
/// 340-343.
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// plot(height ~ age, data = Loblolly, subset = Seed == 329,
///  xlab = "Tree age (yr)", las = 1,
///  ylab = "Tree height (ft)",
///  main = "Loblolly data and fitted curve (Seed 329 only)")
/// fm1 <- nls(height ~ SSasymp(age, Asym, R0, lrc),
///  data = Loblolly, subset = Seed == 329)
/// age <- seq(0, 30, length.out = 101)
/// lines(age, predict(fm1, list(age = age)))
/// ```
pub fn loblolly() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Loblolly.csv"))).finish()
}

/// # Longley's Economic Regression Data
///
/// ## Description:
///
/// A macroeconomic data set which provides a well-known example for a
/// highly collinear regression.
///
/// ## Usage:
///
/// longley
///
/// ## Format:
///
/// A data frame with 7 economical variables, observed yearly from
/// 1947 to 1962 (n=16).
///
/// * ‘GNP.deflator’ GNP implicit price deflator (1954=100)
/// * ‘GNP’ Gross National Product.
/// * ‘Unemployed’ number of unemployed.
/// * ‘Armed.Forces’ number of people in the armed forces.
/// * ‘Population’ ‘noninstitutionalized’ population >= 14 years of age.
/// * ‘Year’ the year (time).
/// * ‘Employed’ number of people employed.
/// * The regression ‘lm(Employed ~ .)’ is known to be highly collinear.
///
/// ## Source:
///
/// J. W. Longley (1967) An appraisal of least-squares programs from
/// the point of view of the user.  _Journal of the American
/// Statistical Association_ *62*, 819-841.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// ## give the data set in the form it is used in S-PLUS:
/// longley.x <- data.matrix(longley[, 1:6])
/// longley.y <- longley[, "Employed"]
/// pairs(longley, main = "longley data")
/// summary(fm1 <- lm(Employed ~ ., data = longley))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
/// mar = c(4.1, 4.1, 2.1, 1.1))
/// plot(fm1)
/// par(opar)
/// ```
pub fn longley() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("longley.csv"))).finish()
}

/// # Annual Canadian Lynx trappings 1821-1934
///
/// ## Description:
///
/// Annual numbers of lynx trappings for 1821-1934 in Canada. Taken
/// from Brockwell & Davis (1991), this appears to be the series
/// considered by Campbell & Walker (1977).
///
/// ## Usage:
///
/// lynx
///
/// ## Source:
///
/// Brockwell, P. J. and Davis, R. A. (1991).  _Time Series and
/// Forecasting Methods_.  Second edition.  Springer. Series G (page
/// 557).
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988).  _The New
/// S Language_.  Wadsworth & Brooks/Cole.
///
/// Campbell, M. J. and Walker, A. M. (1977).  A Survey of statistical
/// work on the Mackenzie River series of annual Canadian lynx
/// trappings for the years 1821-1934 and a new analysis.  _Journal of
/// the Royal Statistical Society Series A_, *140*, 411-431.
/// doi:10.2307/2345277 <https://doi.org/10.2307/2345277>.
pub fn lynx() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("lynx.csv"))).finish()
}

/// # Michelson Speed of Light Data
///
/// ## Description:
///
/// A classical data of Michelson (but not this one with Morley) on
/// measurements done in 1879 on the speed of light.  The data
/// consists of five experiments, each consisting of 20 consecutive
/// ‘runs’.  The response is the speed of light measurement, suitably
/// coded (km/sec, with ‘299000’ subtracted).
///
/// ## Usage:
///
/// morley
///
/// ## Format:
///
/// A data frame with 100 observations on the following 3 variables.
///
/// * ‘Expt’ The experiment number, from 1 to 5.
/// * ‘Run’ The run number within each experiment.
/// * ‘Speed’ Speed-of-light measurement.
///
/// ## Details:
///
/// The data is here viewed as a randomized block experiment with
/// ‘experiment’ and ‘run’ as the factors.  ‘run’ may also be
/// considered a quantitative variate to account for linear (or
/// polynomial) changes in the measurement over the course of a single
/// experiment.
///
/// ## Note:
///
/// This is the same dataset as ‘michelson’ in package ‘MASS’.
///
/// ## Source:
///
/// A. J. Weekes (1986) _A Genstat Primer_.  London: Edward Arnold.
///
/// S. M. Stigler (1977) Do robust estimators work with real data?
/// _Annals of Statistics_ *5*, 1055-1098. (See Table 6.)
///
/// A. A. Michelson (1882) Experimental determination of the velocity
/// of light made at the United States Naval Academy, Annapolis.
/// _Astronomic Papers_ *1* 135-8.  U.S. Nautical Almanac Office.
/// (See Table 24.)
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// michelson <- transform(morley,
///  Expt = factor(Expt), Run = factor(Run))
/// xtabs(~ Expt + Run, data = michelson)  # 5 x 20 balanced (two-way)
/// plot(Speed ~ Expt, data = michelson,
///  main = "Speed of Light Data", xlab = "Experiment No.")
/// fm <- aov(Speed ~ Run + Expt, data = michelson)
/// summary(fm)
/// fm0 <- update(fm, . ~ . - Run)
/// anova(fm0, fm)
/// ```
pub fn morley() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("morley.csv"))).finish()
}

/// # Motor Trend Car Road Tests
///
/// ## Description:
///
/// The data was extracted from the 1974 _Motor Trend_ US magazine,
/// and comprises fuel consumption and 10 aspects of automobile design
/// and performance for 32 automobiles (1973-74 models).
///
/// ## Usage:
///
/// mtcars
///
/// ## Format:
///
/// A data frame with 32 observations on 11 (numeric) variables.
///
/// * \[, 1\]  mpgMiles/(US) gallon
/// * \[, 2\]  cylNumber of cylinders
/// * \[, 3\]  disp  Displacement (cu.in.)
/// * \[, 4\]  hp Gross horsepower
/// * \[, 5\]  drat  Rear axle ratio
/// * \[, 6\]  wt Weight (1000 lbs)
/// * \[, 7\]  qsec  1/4 mile time
/// * \[, 8\]  vs Engine (0 = V-shaped, 1 = straight)
/// * \[, 9\]  am Transmission (0 = automatic, 1 = manual)
/// * \[,10\]  gear  Number of forward gears
/// * \[,11\]  carb  Number of carburetors
///
/// ## Note:
///
/// Henderson and Velleman (1981) comment in a footnote to Table 1:
/// ‘Hocking [original transcriber]'s noncrucial coding of the Mazda's
/// rotary engine as a straight six-cylinder engine and the Porsche's
/// flat engine as a V engine, as well as the inclusion of the diesel
/// Mercedes 240D, have been retained to enable direct comparisons to
/// be made with previous analyses.’
///
/// ## Source:
///
/// Henderson and Velleman (1981), Building multiple regression models
/// interactively.  _Biometrics_, *37*, 391-411.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// pairs(mtcars, main = "mtcars data", gap = 1/4)
/// coplot(mpg ~ disp | as.factor(cyl), data = mtcars,
/// panel = panel.smooth, rows = 1)
/// ## possibly more meaningful, e.g., for summary() or bivariate plots:
/// mtcars2 <- within(mtcars, {
/// vs <- factor(vs, labels = c("V", "S"))
/// am <- factor(am, labels = c("automatic", "manual"))
/// cyl  <- ordered(cyl)
/// gear <- ordered(gear)
/// carb <- ordered(carb)
/// })
/// summary(mtcars2)
/// ```
pub fn mtcars() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("mtcars.csv"))).finish()
}

/// # Average Yearly Temperatures in New Haven
///
/// ## Description:
///
/// The mean annual temperature in degrees Fahrenheit in New Haven,
/// Connecticut, from 1912 to 1971.
///
/// ## Usage:
///
/// nhtemp
///
/// ## Format:
///
/// A time series of 60 observations.
///
/// ## Source:
///
/// Vaux, J. E. and Brinker, N. B. (1972) _Cycles_, *1972*, 117-121.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// plot(nhtemp, main = "nhtemp data",
///  ylab = "Mean annual temperature in New Haven, CT (deg. F)")
/// ```
pub fn nhtemp() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("nhtemp.csv"))).finish()
}

/// # Flow of the River Nile
///
/// ## Description:
///
/// Measurements of the annual flow of the river Nile at Aswan
/// (formerly ‘Assuan’), 1871-1970, in 10^8 m^3, “with apparent
/// changepoint near 1898” (Cobb(1978), Table 1, p.249).
///
/// ## Usage:
///
/// Nile
///
/// ## Format:
///
/// A time series of length 100.
///
/// ## Source:
///
/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
/// State Space Methods_.  Oxford University Press.
///
/// ## References:
///
/// Balke, N. S. (1993).  Detecting level shifts in time series.
/// _Journal of Business and Economic Statistics_, *11*, 81-92.
/// doi:10.2307/1391308 <https://doi.org/10.2307/1391308>.
///
/// Cobb, G. W. (1978).  The problem of the Nile: conditional solution
/// to a change-point problem.  _Biometrika_ *65*, 243-51.
/// doi:10.2307/2335202 <https://doi.org/10.2307/2335202>.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// par(mfrow = c(2, 2))
/// plot(Nile)
/// acf(Nile)
/// pacf(Nile)
/// ar(Nile) # selects order 2
/// cpgram(ar(Nile)$resid)
/// par(mfrow = c(1, 1))
/// arima(Nile, c(2, 0, 0))
///
/// ## Now consider missing values, following Durbin & Koopman
/// NileNA <- Nile
/// NileNA[c(21:40, 61:80)] <- NA
/// arima(NileNA, c(2, 0, 0))
/// plot(NileNA)
/// pred <-
/// predict(arima(window(NileNA, 1871, 1890), c(2, 0, 0)), n.ahead = 20)
/// lines(pred$pred, lty = 3, col = "red")
/// lines(pred$pred + 2*pred$se, lty = 2, col = "blue")
/// lines(pred$pred - 2*pred$se, lty = 2, col = "blue")
/// pred <-
/// predict(arima(window(NileNA, 1871, 1930), c(2, 0, 0)), n.ahead = 20)
/// lines(pred$pred, lty = 3, col = "red")
/// lines(pred$pred + 2*pred$se, lty = 2, col = "blue")
/// lines(pred$pred - 2*pred$se, lty = 2, col = "blue")
///
/// ## Structural time series models
/// par(mfrow = c(3, 1))
/// plot(Nile)
/// ## local level model
/// (fit <- StructTS(Nile, type = "level"))
/// lines(fitted(fit), lty = 2)  # contemporaneous smoothing
/// lines(tsSmooth(fit), lty = 2, col = 4)# fixed-interval smoothing
/// plot(residuals(fit)); abline(h = 0, lty = 3)
/// ## local trend model
/// (fit2 <- StructTS(Nile, type = "trend")) ## constant trend fitted
/// pred <- predict(fit, n.ahead = 30)
/// ## with 50% confidence interval
/// ts.plot(Nile, pred$pred,
///  pred$pred + 0.67*pred$se, pred$pred -0.67*pred$se)
///
/// ## Now consider missing values
/// plot(NileNA)
/// (fit3 <- StructTS(NileNA, type = "level"))
/// lines(fitted(fit3), lty = 2)
/// lines(tsSmooth(fit3), lty = 3)
/// plot(residuals(fit3)); abline(h = 0, lty = 3)
/// ```
pub fn nile() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Nile.csv"))).finish()
}

/// # Average Monthly Temperatures at Nottingham, 1920-1939
///
/// ## Description:
///
/// A time series object containing average air temperatures at
/// Nottingham Castle in degrees Fahrenheit for 20 years.
///
/// ## Usage:
///
/// nottem
///
/// ## Source:
///
/// Anderson, O. D. (1976) _Time Series Analysis and Forecasting: The
/// Box-Jenkins approach._ Butterworths. Series R.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// nott <- window(nottem, end = c(1936,12))
/// fit <- arima(nott, order = c(1,0,0), list(order = c(2,1,0), period = 12))
/// nott.fore <- predict(fit, n.ahead = 36)
/// ts.plot(nott, nott.fore$pred, nott.fore$pred+2*nott.fore$se,
///  nott.fore$pred-2*nott.fore$se, gpars = list(col = c(1,1,4,4)))
/// ```
pub fn nottem() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("nottem.csv"))).finish()
}

/// # Classical N, P, K Factorial Experiment
///
/// ## Description:
///
/// A classical N, P, K (nitrogen, phosphate, potassium) factorial
/// experiment on the growth of peas conducted on 6 blocks. Each half
/// of a fractional factorial design confounding the NPK interaction
/// was used on 3 of the plots.
///
/// ## Usage:
///
/// npk
///
/// ## Format:
///
/// The ‘npk’ data frame has 24 rows and 5 columns:
///
/// * ‘block’ which block (label 1 to 6).
/// * ‘N’ indicator (0/1) for the application of nitrogen.
/// * ‘P’ indicator (0/1) for the application of phosphate.
/// * ‘K’ indicator (0/1) for the application of potassium.
/// * ‘yield’ Yield of peas, in pounds/plot (the plots were (1/70)
/// acre).
///
/// ## Source:
///
/// Imperial College, London, M.Sc. exercise sheet.
///
/// ## References:
///
/// Venables, W. N. and Ripley, B. D. (2002) _Modern Applied
/// Statistics with S._ Fourth edition.  Springer.
///
/// ## Examples:
///
/// ```r
/// options(contrasts = c("contr.sum", "contr.poly"))
/// npk.aov <- aov(yield ~ block + N*P*K, npk)
/// npk.aov
/// summary(npk.aov)
/// coef(npk.aov)
/// options(contrasts = c("contr.treatment", "contr.poly"))
/// npk.aov1 <- aov(yield ~ block + N + K, data = npk)
/// summary.lm(npk.aov1)
/// se.contrast(npk.aov1, list(N=="0", N=="1"), data = npk)
/// model.tables(npk.aov1, type = "means", se = TRUE)
/// ```
pub fn npk() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("npk.csv"))).finish()
}

/// # Occupational Status of Fathers and their Sons
///
/// ## Description:
///
/// Cross-classification of a sample of British males according to
/// each subject's occupational status and his father's occupational
/// status.
///
/// ## Usage:
///
/// occupationalStatus
///
/// ## Format:
///
/// A ‘table’ of counts, with classifying factors ‘origin’ (father's
/// occupational status; levels ‘1:8’) and ‘destination’ (son's
/// occupational status; levels ‘1:8’).
///
/// ## Source:
///
/// Goodman, L. A. (1979) Simple Models for the Analysis of
/// Association in Cross-Classifications having Ordered Categories.
/// _J. Am. Stat. Assoc._, *74* (367), 537-552.
///
/// The data set has been in package ‘gnm’ and been provided by the
/// package authors.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
///
/// plot(occupationalStatus)
///
/// ##  Fit a uniform association model separating diagonal effects
/// Diag <- as.factor(diag(1:8))
/// Rscore <- scale(as.numeric(row(occupationalStatus)), scale = FALSE)
/// Cscore <- scale(as.numeric(col(occupationalStatus)), scale = FALSE)
/// modUnif <- glm(Freq ~ origin + destination + Diag + Rscore:Cscore,
/// family = poisson, data = occupationalStatus)
///
/// summary(modUnif)
/// plot(modUnif) # 4 plots, with warning about  h_ii ~= 1
/// ```
pub fn occupational_status() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("occupationalStatus.csv"))).finish()
}

/// # Growth of Orange Trees
///
/// ## Description:
///
/// The ‘Orange’ data frame has 35 rows and 3 columns of records of
/// the growth of orange trees.
///
/// ## Usage:
///
/// Orange
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// * Tree an ordered factor indicating the tree on which the
/// measurement is made.  The ordering is according to increasing
/// maximum diameter.
/// * age a numeric vector giving the age of the tree (days since
/// 1968/12/31)
/// * circumference a numeric vector of trunk circumferences (mm).  This
/// is probably “circumference at breast height”, a standard
/// measurement in forestry.
///
/// ## Details:
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Draper, N. R. and Smith, H. (1998), _Applied Regression Analysis
/// (3rd ed)_, Wiley (exercise 24.N).
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// coplot(circumference ~ age | Tree, data = Orange, show.given = FALSE)
/// fm1 <- nls(circumference ~ SSlogis(age, Asym, xmid, scal),
///  data = Orange, subset = Tree == 3)
/// plot(circumference ~ age, data = Orange, subset = Tree == 3,
///  xlab = "Tree age (days since 1968/12/31)",
///  ylab = "Tree circumference (mm)", las = 1,
///  main = "Orange tree data and fitted model (Tree 3 only)")
/// age <- seq(0, 1600, length.out = 101)
/// lines(age, predict(fm1, list(age = age)))
/// ```
pub fn orange() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Orange.csv"))).finish()
}

/// # Potency of Orchard Sprays
///
/// ## Description:
///
/// An experiment was conducted to assess the potency of various
/// constituents of orchard sprays in repelling honeybees, using a
/// Latin square design.
///
/// ## Usage:
///
/// OrchardSprays
///
/// ## Format:
///
/// A data frame with 64 observations on 4 variables.
///
/// * \[,1\]  rowpos  numeric  Row of the design
/// * \[,2\]  colpos  numeric  Column of the design
/// * \[,3\]  treatment  factorTreatment level
/// * \[,4\]  decreasenumeric  Response
///
/// ## Details:
///
/// Individual cells of dry comb were filled with measured amounts of
/// lime sulphur emulsion in sucrose solution.  Seven different
/// concentrations of lime sulphur ranging from a concentration of
/// 1/100 to 1/1,562,500 in successive factors of 1/5 were used as
/// well as a solution containing no lime sulphur.
///
/// The responses for the different solutions were obtained by
/// releasing 100 bees into the chamber for two hours, and then
/// measuring the decrease in volume of the solutions in the various
/// cells.
///
/// An 8 x 8 Latin square design was used and the treatments were
/// coded as follows:
///
/// * A  highest level of lime sulphur
/// * B  next highest level of lime sulphur
/// * ...
/// * G  lowest level of lime sulphur
/// * H  no lime sulphur
///
/// ## Source:
///
/// Finney, D. J. (1947) _Probit Analysis_.  Cambridge.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// pairs(OrchardSprays, main = "OrchardSprays data")
/// ```
pub fn orchard_sprays() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("OrchardSprays.csv"))).finish()
}

/// # Results from an Experiment on Plant Growth
///
/// ## Description:
///
/// Results from an experiment to compare yields (as measured by dried
/// weight of plants) obtained under a control and two different
/// treatment conditions.
///
/// ## Usage:
///
/// PlantGrowth
///
/// ## Format:
///
/// A data frame of 30 cases on 2 variables.
///
/// * \[, 1\]  weight  numeric
/// * \[, 2\]  groupfactor
///
/// The levels of ‘group’ are ‘ctrl’, ‘trt1’, and ‘trt2’.
///
/// ## Source:
///
/// Dobson, A. J. (1983) _An Introduction to Statistical Modelling_.
/// London: Chapman and Hall.
///
/// ## Examples:
///
/// ```r
/// ## One factor ANOVA example from Dobson's book, cf. Table 7.4:
/// require(stats); require(graphics)
/// boxplot(weight ~ group, data = PlantGrowth, main = "PlantGrowth data",
///  ylab = "Dried weight of plants", col = "lightgray",
///  notch = TRUE, varwidth = TRUE)
/// anova(lm(weight ~ group, data = PlantGrowth))
/// ```
pub fn plant_growth() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("PlantGrowth.csv"))).finish()
}

/// # Annual Precipitation in US Cities
///
/// ## Description:
///
/// The average amount of precipitation (rainfall) in inches for each
/// of 70 United States (and Puerto Rico) cities.
///
/// ## Usage:
///
/// precip
///
/// ## Format:
///
/// A named vector of length 70.
///
/// ## Note:
///
/// The dataset version up to Nov.16, 2016 had a typo in
/// ‘"Cincinnati"’'s name.  The examples show how to recreate that
/// version.
///
/// ## Source:
///
/// Statistical Abstracts of the United States, 1975.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// dotchart(precip[order(precip)], main = "precip data")
/// title(sub = "Average annual precipitation (in.)")
///
/// ## Old ("wrong") version of dataset (just name change):
/// precip.O <- local({
/// p <- precip; names(p)[names(p) == "Cincinnati"] <- "Cincinati" ; p })
/// stopifnot(all(precip == precip.O),
/// match("Cincinnati", names(precip)) == 46,
/// identical(names(precip)[-46], names(precip.O)[-46]))
/// ```
pub fn precip() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("precip.csv"))).finish()
}

/// # Quarterly Approval Ratings of US Presidents
///
/// ## Description:
///
/// The (approximately) quarterly approval rating for the President of
/// the United States from the first quarter of 1945 to the last
/// quarter of 1974.
///
/// ## Usage:
///
/// presidents
///
/// ## Format:
///
/// A time series of 120 values.
///
/// ## Details:
///
/// The data are actually a fudged version of the approval ratings.
/// See McNeil's book for details.
///
/// ## Source:
///
/// The Gallup Organisation.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// plot(presidents, las = 1, ylab = "Approval rating (%)",
///  main = "presidents data")
/// ```
pub fn presidents() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("presidents.csv"))).finish()
}

/// # Vapor Pressure of Mercury as a Function of Temperature
///
/// ## Description:
///
/// Data on the relation between temperature in degrees Celsius and
/// vapor pressure of mercury in millimeters (of mercury).
///
/// ## Usage:
///
/// pressure
///
/// ## Format:
///
/// A data frame with 19 observations on 2 variables.
///
/// * \[, 1\]  temperature  numeric  temperature (deg C)
/// * \[, 2\]  pressure  numeric  pressure (mm)
///
/// ## Source:
///
/// Weast, R. C., ed. (1973) _Handbook of Chemistry and Physics_.  CRC
/// Press.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(pressure, xlab = "Temperature (deg C)",
///  ylab = "Pressure (mm of Hg)",
///  main = "pressure data: Vapor Pressure of Mercury")
/// plot(pressure, xlab = "Temperature (deg C)",  log = "y",
///  ylab = "Pressure (mm of Hg)",
///  main = "pressure data: Vapor Pressure of Mercury")
/// ```
pub fn pressure() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("pressure.csv"))).finish()
}

/// # Reaction Velocity of an Enzymatic Reaction
///
/// ## Description:
///
/// The ‘Puromycin’ data frame has 23 rows and 3 columns of the
/// reaction velocity versus substrate concentration in an enzymatic
/// reaction involving untreated cells or cells treated with
/// Puromycin.
///
/// ## Usage:
///
/// Puromycin
///
/// ## Format:
///
/// This data frame contains the following columns:
///
/// * ‘conc’ a numeric vector of substrate concentrations (ppm)
/// * ‘rate’ a numeric vector of instantaneous reaction rates
/// (counts/min/min)
/// * ‘state’ a factor with levels ‘treated’ ‘untreated’
///
/// ## Details:
///
/// Data on the velocity of an enzymatic reaction were obtained by
/// Treloar (1974).  The number of counts per minute of radioactive
/// product from the reaction was measured as a function of substrate
/// concentration in parts per million (ppm) and from these counts the
/// initial rate (or velocity) of the reaction was calculated
/// (counts/min/min).  The experiment was conducted once with the
/// enzyme treated with Puromycin, and once with the enzyme untreated.
///
/// ## Source:
///
/// Bates, D.M. and Watts, D.G. (1988), _Nonlinear Regression Analysis
/// and Its Applications_, Wiley, Appendix A1.3.
///
/// Treloar, M. A. (1974), _Effects of Puromycin on
/// Galactosyltransferase in Golgi Membranes_, M.Sc. Thesis, U. of
/// Toronto.
///
/// ## See Also:
///
/// ‘SSmicmen’ for other models fitted to this dataset.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
///
/// plot(rate ~ conc, data = Puromycin, las = 1,
///  xlab = "Substrate concentration (ppm)",
///  ylab = "Reaction velocity (counts/min/min)",
///  pch = as.integer(Puromycin$state),
///  col = as.integer(Puromycin$state),
///  main = "Puromycin data and fitted Michaelis-Menten curves")
/// ## simplest form of fitting the Michaelis-Menten model to these data
/// fm1 <- nls(rate ~ Vm * conc/(K + conc), data = Puromycin,
///  subset = state == "treated",
///  start = c(Vm = 200, K = 0.05))
/// fm2 <- nls(rate ~ Vm * conc/(K + conc), data = Puromycin,
///  subset = state == "untreated",
///  start = c(Vm = 160, K = 0.05))
/// summary(fm1)
/// summary(fm2)
/// ## add fitted lines to the plot
/// conc <- seq(0, 1.2, length.out = 101)
/// lines(conc, predict(fm1, list(conc = conc)), lty = 1, col = 1)
/// lines(conc, predict(fm2, list(conc = conc)), lty = 2, col = 2)
/// legend(0.8, 120, levels(Puromycin$state),
/// col = 1:2, lty = 1:2, pch = 1:2)
///
/// ## using partial linearity
/// fm3 <- nls(rate ~ conc/(K + conc), data = Puromycin,
///  subset = state == "treated", start = c(K = 0.05),
///  algorithm = "plinear")
/// ```
pub fn puromycin() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Puromycin.csv"))).finish()
}

/// # Locations of Earthquakes off Fiji
///
/// ## Description:
///
/// The data set give the locations of 1000 seismic events of MB >
/// 4.0.  The events occurred in a cube near Fiji since 1964.
///
/// ## Usage:
///
/// quakes
///
/// ## Format:
///
/// A data frame with 1000 observations on 5 variables.
///
/// * \[,1\]  lat numeric  Latitude of event
/// * \[,2\]  longnumeric  Longitude
/// * \[,3\]  depth  numeric  Depth (km)
/// * \[,4\]  mag numeric  Richter Magnitude
/// * \[,5\]  stations  numeric  Number of stations reporting
///
/// ## Details:
///
/// There are two clear planes of seismic activity.  One is a major
/// plate junction; the other is the Tonga trench off New Zealand.
/// These data constitute a subsample from a larger dataset of
/// containing 5000 observations.
///
/// ## Source:
///
/// This is one of the Harvard PRIM-H project data sets.  They in turn
/// obtained it from Dr. John Woodhouse, Dept. of Geophysics, Harvard
/// University.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// pairs(quakes, main = "Fiji Earthquakes, N = 1000", cex.main = 1.2, pch = ".")
/// ```
pub fn quakes() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("quakes.csv"))).finish()
}

/// # Random Numbers from Congruential Generator RANDU
///
/// ## Description:
///
/// 400 triples of successive random numbers were taken from the VAX
/// FORTRAN function RANDU running under VMS 1.5.
///
/// ## Usage:
///
/// randu
///
/// ## Format:
///
/// A data frame with 400 observations on 3 variables named ‘x’, ‘y’
/// and ‘z’ which give the first, second and third random number in
/// the triple.
///
/// ## Details:
///
/// In three dimensional displays it is evident that the triples fall
/// on 15 parallel planes in 3-space. This can be shown theoretically
/// to be true for all triples from the RANDU generator.
///
/// These particular 400 triples start 5 apart in the sequence, that
/// is they are ((U\[5i+1\], U\[5i+2\], U\[5i+3\]), i= 0, ..., 399), and
/// they are rounded to 6 decimal places.
///
/// Under VMS versions 2.0 and higher, this problem has been fixed.
///
/// ## Source:
///
/// David Donoho
///
/// ## Examples:
///
/// ```r
/// ## We could re-generate the dataset by the following R code
/// seed <- as.double(1)
/// RANDU <- function() {
/// seed <<- ((2^16 + 3) * seed) %% (2^31)
/// seed/(2^31)
/// }
/// for(i in 1:400) {
/// U <- c(RANDU(), RANDU(), RANDU(), RANDU(), RANDU())
/// print(round(U[1:3], 6))
/// }
/// ```
pub fn randu() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("randu.csv"))).finish()
}

/// # Lengths of Major North American Rivers
///
/// ## Description:
///
/// This data set gives the lengths (in miles) of 141 “major” rivers
/// in North America, as compiled by the US Geological Survey.
///
/// ## Usage:
///
/// rivers
///
/// ## Format:
///
/// A vector containing 141 observations.
///
/// ## Source:
///
/// World Almanac and Book of Facts, 1975, page 406.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
pub fn rivers() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("rivers.csv"))).finish()
}

/// # Measurements on Petroleum Rock Samples
///
/// ## Description:
///
/// Measurements on 48 rock samples from a petroleum reservoir.
///
/// ## Usage:
///
/// rock
///
/// ## Format:
///
/// A data frame with 48 rows and 4 numeric columns.
///
/// * \[,1\]  areaarea of pores space, in pixels
/// out of 256 by 256
/// * \[,2\]  periperimeter in pixels
/// * \[,3\]  shape  perimeter/sqrt(area)
/// * \[,4\]  permpermeability in milli-Darcies
///
/// ## Details:
///
/// Twelve core samples from petroleum reservoirs were sampled by 4
/// cross-sections.  Each core sample was measured for permeability,
/// and each cross-section has total area of pores, total perimeter of
/// pores, and shape.
///
/// ## Source:
///
/// Data from BP Research, image analysis by Ronit Katz, U. Oxford.
pub fn rock() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("rock.csv"))).finish()
}

/// # Road Casualties in Great Britain 1969-84
///
/// ## Description:
///
/// ‘UKDriverDeaths’ is a time series giving the monthly totals of car
/// drivers in Great Britain killed or seriously injured Jan 1969 to
/// Dec 1984.  Compulsory wearing of seat belts was introduced on 31
/// Jan 1983.
///
/// ‘Seatbelts’ is more information on the same problem.
///
/// ## Usage:
///
/// UKDriverDeaths
/// Seatbelts
///
/// ## Format:
///
/// * ‘Seatbelts’ is a multiple time series, with columns
/// * ‘DriversKilled’ car drivers killed.
/// * ‘drivers’ same as ‘UKDriverDeaths’.
/// * ‘front’ front-seat passengers killed or seriously injured.
/// * ‘rear’ rear-seat passengers killed or seriously injured.
/// * ‘kms’ distance driven.
/// * ‘PetrolPrice’ petrol price.
/// * ‘VanKilled’ number of van (‘light goods vehicle’) drivers.
/// * ‘law’ 0/1: was the law in effect that month?
///
/// ## Source:
///
/// Harvey, A.C. (1989).  _Forecasting, Structural Time Series Models
/// and the Kalman Filter_.  Cambridge University Press, pp. 519-523.
///
/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
/// State Space Methods_.  Oxford University Press.
///
/// References:
///
/// Harvey, A. C. and Durbin, J. (1986).  The effects of seat belt
/// legislation on British road casualties: A case study in structural
/// time series modelling.  _Journal of the Royal Statistical Society_
/// series A, *149*, 187-227.  doi:10.2307/2981553
/// <https://doi.org/10.2307/2981553>.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// ## work with pre-seatbelt period to identify a model, use logs
/// work <- window(log10(UKDriverDeaths), end = 1982+11/12)
/// par(mfrow = c(3, 1))
/// plot(work); acf(work); pacf(work)
/// par(mfrow = c(1, 1))
/// (fit <- arima(work, c(1, 0, 0), seasonal = list(order = c(1, 0, 0))))
/// z <- predict(fit, n.ahead = 24)
/// ts.plot(log10(UKDriverDeaths), z$pred, z$pred+2*z$se, z$pred-2*z$se,
///  lty = c(1, 3, 2, 2), col = c("black", "red", "blue", "blue"))
///
/// ## now see the effect of the explanatory variables
/// X <- Seatbelts[, c("kms", "PetrolPrice", "law")]
/// X[, 1] <- log10(X[, 1]) - 4
/// arima(log10(Seatbelts[, "drivers"]), c(1, 0, 0),
/// seasonal = list(order = c(1, 0, 0)), xreg = X)
/// ```
pub fn seatbelts() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Seatbelts.csv"))).finish()
}

/// # Student's Sleep Data
///
/// ## Description:
///
/// Data which show the effect of two soporific drugs (increase in
/// hours of sleep compared to control) on 10 patients.
///
/// ## Usage:
///
/// sleep
///
/// ## Format:
///
/// A data frame with 20 observations on 3 variables.
///
/// * \[, 1\]  extra  numeric  increase in hours of sleep
/// * \[, 2\]  group  factordrug given
/// * \[, 3\]  ID  factorpatient ID
///
/// ## Details:
///
/// The ‘group’ variable name may be misleading about the data: They
/// represent measurements on 10 persons, not in groups.
///
/// ## Source:
///
/// Cushny, A. R. and Peebles, A. R. (1905) The action of optical
/// isomers: II hyoscines.  _The Journal of Physiology_ *32*, 501-510.
///
/// Student (1908) The probable error of the mean.  _Biometrika_, *6*,
/// 20.
///
/// ## References:
///
/// Scheffé, Henry (1959) _The Analysis of Variance_.  New York, NY:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// ## Student's paired t-test
/// with(sleep,
///  t.test(extra[group == 1],
/// extra[group == 2], paired = TRUE))
///
/// ## The sleep *prolongations*
/// sleep1 <- with(sleep, extra[group == 2] - extra[group == 1])
/// summary(sleep1)
/// stripchart(sleep1, method = "stack", xlab = "hours",
///  main = "Sleep prolongation (n = 10)")
/// boxplot(sleep1, horizontal = TRUE, add = TRUE,
///  at = .6, pars = list(boxwex = 0.5, staplewex = 0.25))
/// ```
pub fn sleep() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("sleep.csv"))).finish()
}

/// # Brownlee's Stack Loss Plant Data
///
/// ## Description:
///
/// Operational data of a plant for the oxidation of ammonia to nitric
/// acid.
///
/// ## Usage:
///
/// stackloss
///
/// stack.x
/// stack.loss
///
/// ## Format:
///
/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
///
/// * \[,1\]  ‘Air Flow’ Flow of cooling air
/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
/// Temperature
/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
/// 1000, minus 500\]
/// * \[,4\]  ‘stack.loss’  Stack loss
///
/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
/// with the first three (independent) variables of the data frame,
/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
/// variable, are provided as well.
///
/// ## Details:
///
/// “Obtained from 21 days of operation of a plant for the oxidation
/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
/// produced are absorbed in a countercurrent absorption tower”.
/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
///
/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
/// Temp’ is the temperature of cooling water circulated through coils
/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
/// acid circulating, minus 50, times 10: that is, 89 corresponds to
/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
/// times the percentage of the ingoing ammonia to the plant that
/// escapes from the absorption column unabsorbed; that is, an
/// (inverse) measure of the over-all efficiency of the plant.
///
/// ## Source:
///
/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
/// Methodology in Science and Engineering_.  New York: Wiley. pp.
/// 491-500.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
/// Statistics_ *109*, Springer-Verlag, New York.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// summary(lm.stack <- lm(stack.loss ~ stack.x))
/// ```
pub fn stack_loss() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("stack.loss.csv"))).finish()
}

/// # Brownlee's Stack Loss Plant Data
///
/// ## Description:
///
/// Operational data of a plant for the oxidation of ammonia to nitric
/// acid.
///
/// ## Usage:
///
/// stackloss
///
/// stack.x
/// stack.loss
///
/// ## Format:
///
/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
///
/// * \[,1\]  ‘Air Flow’ Flow of cooling air
/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
/// Temperature
/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
/// 1000, minus 500\]
/// * \[,4\]  ‘stack.loss’  Stack loss
///
/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
/// with the first three (independent) variables of the data frame,
/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
/// variable, are provided as well.
///
/// ## Details:
///
/// “Obtained from 21 days of operation of a plant for the oxidation
/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
/// produced are absorbed in a countercurrent absorption tower”.
/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
///
/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
/// Temp’ is the temperature of cooling water circulated through coils
/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
/// acid circulating, minus 50, times 10: that is, 89 corresponds to
/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
/// times the percentage of the ingoing ammonia to the plant that
/// escapes from the absorption column unabsorbed; that is, an
/// (inverse) measure of the over-all efficiency of the plant.
///
/// ## Source:
///
/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
/// Methodology in Science and Engineering_.  New York: Wiley. pp.
/// 491-500.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
/// Statistics_ *109*, Springer-Verlag, New York.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// summary(lm.stack <- lm(stack.loss ~ stack.x))
/// ```
pub fn stack_x() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("stack.x.csv"))).finish()
}

/// # Brownlee's Stack Loss Plant Data
///
/// ## Description:
///
/// Operational data of a plant for the oxidation of ammonia to nitric
/// acid.
///
/// ## Usage:
///
/// stackloss
///
/// stack.x
/// stack.loss
///
/// ## Format:
///
/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
///
/// * \[,1\]  ‘Air Flow’ Flow of cooling air
/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
/// Temperature
/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
/// 1000, minus 500\]
/// * \[,4\]  ‘stack.loss’  Stack loss
///
/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
/// with the first three (independent) variables of the data frame,
/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
/// variable, are provided as well.
///
/// ## Details:
///
/// “Obtained from 21 days of operation of a plant for the oxidation
/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
/// produced are absorbed in a countercurrent absorption tower”.
/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
///
/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
/// Temp’ is the temperature of cooling water circulated through coils
/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
/// acid circulating, minus 50, times 10: that is, 89 corresponds to
/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
/// times the percentage of the ingoing ammonia to the plant that
/// escapes from the absorption column unabsorbed; that is, an
/// (inverse) measure of the over-all efficiency of the plant.
///
/// ## Source:
///
/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
/// Methodology in Science and Engineering_.  New York: Wiley. pp.
/// 491-500.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
/// Statistics_ *109*, Springer-Verlag, New York.
///
/// ## Examples:
///
/// ```r
/// require(stats)
/// summary(lm.stack <- lm(stack.loss ~ stack.x))
/// ```
pub fn stackloss() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("stackloss.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_abb() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.abb.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_area() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.area.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_center() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.center.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_division() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.division.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_name() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.name.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_region() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.region.csv"))).finish()
}

/// # US State Facts and Figures
///
/// ## Description:
///
/// Data sets related to the 50 states of the United States of
/// America.
///
/// ## Usage:
///
/// state.abb
/// state.area
/// state.center
/// state.division
/// state.name
/// state.region
/// state.x77
///
/// ## Details:
///
/// R currently contains the following “state” data sets.  Note that
/// all data are arranged according to alphabetical order of the state
/// names.
///
/// * ‘state.abb’: character vector of 2-letter abbreviations for the
/// state names.
/// * ‘state.area’: numeric vector of state areas (in square miles).
/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
/// approximate geographic center of each state in negative
/// longitude and latitude.  Alaska and Hawaii are placed just
/// off the West Coast.  See ‘Examples’ on how to “correct”.
/// * ‘state.division’: ‘factor’ giving state divisions (New England,
/// Middle Atlantic, South Atlantic, East South Central, West
/// South Central, East North Central, West North Central,
/// Mountain, and Pacific).
/// * ‘state.name’: character vector giving the full state names.
/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
/// North Central, West) that each state belongs to.
/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
/// following statistics in the respective columns.
/// * ‘Population’: population estimate as of July 1, 1975
/// * ‘Income’: per capita income (1974)
/// * ‘Illiteracy’: illiteracy (1970, percent of population)
/// * ‘Life Exp’: life expectancy in years (1969-71)
/// * ‘Murder’: murder and non-negligent manslaughter rate per
/// 100,000 population (1976)
/// * ‘HS Grad’: percent high-school graduates (1970)
/// * ‘Frost’: mean number of days with minimum temperature below
/// freezing (1931-1960) in capital or large city
/// * ‘Area’: land area in square miles
///
/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
///
/// ## Source:
///
/// U.S. Department of Commerce, Bureau of the Census (1977)
/// _Statistical Abstract of the United States_.
///
/// U.S. Department of Commerce, Bureau of the Census (1977) _County
/// and City Data Book_.
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
/// dst[c("AK", "HI"),]
/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
/// ##Hawaii20°15' N., 156°20' W., off Maui Island
/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
/// state.center2 <- as.list(dxy)
///
/// plot(dxy, asp=1.2, pch=3, col=2)
/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
/// i <- c("AK","HI")
/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
/// col=adjustcolor(4, .7), length=1/8))
/// points(dst[i,], col=2)
/// if(FALSE) { # if(require("maps")) {
/// map("state", interior = FALSE, add = TRUE)
/// map("state", boundary = FALSE, lty = 2, add = TRUE)
/// }
/// ```
pub fn state_x77() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("state.x77.csv"))).finish()
}

/// # Monthly Sunspot Data, from 1749 to "Present"
///
/// ## Description:
///
/// Monthly numbers of sunspots, as from the World Data Center, aka
/// SIDC.  This is the version of the data that will occasionally be
/// updated when new counts become available.
///
/// ## Usage:
///
/// sunspot.month
///
/// ## Format:
///
/// The univariate time series ‘sunspot.year’ and ‘sunspot.month’
/// contain 289 and 2988 observations, respectively.  The objects are
/// of class ‘"ts"’.
///
/// ## Author(s):
///
/// R
///
/// ## Source:
///
/// WDC-SILSO, Solar Influences Data Analysis Center (SIDC), Royal
/// Observatory of Belgium, Av. Circulaire, 3, B-1180 BRUSSELS
/// Currently at <http://www.sidc.be/silso/datafiles>
///
/// ## See Also:
///
/// ‘sunspot.month’ is a longer version of ‘sunspots’; the latter runs
/// until 1983 and is kept fixed (for reproducibility as example
/// dataset).
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// ## Compare the monthly series
/// plot (sunspot.month,
/// main="sunspot.month & sunspots [package'datasets']", col=2)
/// lines(sunspots) # -> faint differences where they overlap
///
/// ## Now look at the difference :
/// all(tsp(sunspots)  [c(1,3)] ==
/// tsp(sunspot.month)[c(1,3)]) ## Start & Periodicity are the same
/// n1 <- length(sunspots)
/// table(eq <- sunspots == sunspot.month[1:n1]) #>  132  are different !
/// i <- which(!eq)
/// rug(time(eq)[i])
/// s1 <- sunspots[i] ; s2 <- sunspot.month[i]
/// cbind(i = i, time = time(sunspots)[i], sunspots = s1, ss.month = s2,
/// perc.diff = round(100*2*abs(s1-s2)/(s1+s2), 1))
///
/// ## How to recreate the "old" sunspot.month (R <= 3.0.3):
/// .sunspot.diff <- cbind(
/// i = c(1202L, 1256L, 1258L, 1301L, 1407L, 1429L, 1452L, 1455L,
/// 1663L, 2151L, 2329L, 2498L, 2594L, 2694L, 2819L),
/// res10 = c(1L, 1L, 1L, -1L, -1L, -1L, 1L, -1L,
/// 1L, 1L, 1L, 1L, 1L, 20L, 1L))
/// ssm0 <- sunspot.month[1:2988]
/// with(as.data.frame(.sunspot.diff), ssm0[i] <<- ssm0[i] - res10/10)
/// sunspot.month.0 <- ts(ssm0, start = 1749, frequency = 12)
/// ```
pub fn sunspot_month() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("sunspot.month.csv"))).finish()
}

/// # Yearly Sunspot Data, 1700-1988
///
/// ## Description:
///
/// Yearly numbers of sunspots from 1700 to 1988 (rounded to one
/// digit).
///
/// Note that monthly numbers are available as ‘sunspot.month’, though
/// starting slightly later.
///
/// ## Usage:
///
/// sunspot.year
///
/// ### Format:
///
/// The univariate time series ‘sunspot.year’ contains 289
/// observations, and is of class ‘"ts"’.
///
/// ## Source:
///
/// H. Tong (1996) _Non-Linear Time Series_. Clarendon Press, Oxford,
/// p. 471.
///
/// ## See Also:
///
/// For _monthly_ sunspot numbers, see ‘sunspot.month’ and ‘sunspots’.
///
/// Regularly updated yearly sunspot numbers are available from
/// WDC-SILSO, Royal Observatory of Belgium, at
/// <http://www.sidc.be/silso/datafiles>
///
/// ## Examples:
///
/// ```r
/// utils::str(sm <- sunspots)# the monthly version we keep unchanged
/// utils::str(sy <- sunspot.year)
/// ## The common time interval
/// (t1 <- c(max(start(sm), start(sy)),  1)) # Jan 1749
/// (t2 <- c(min(  end(sm)[1],end(sy)[1]), 12)) # Dec 1983
/// s.m <- window(sm, start=t1, end=t2)
/// s.y <- window(sy, start=t1, end=t2[1]) # {irrelevant warning}
/// stopifnot(length(s.y) * 12 == length(s.m),
/// ## The yearly series *is* close to the averages of the monthly one:
/// all.equal(s.y, aggregate(s.m, FUN = mean), tolerance = 0.0020))
/// ## NOTE: Strangely, correctly weighting the number of days per month
/// ## (using 28.25 for February) is *not* closer than the simple mean:
/// ndays <- c(31, 28.25, rep(c(31,30, 31,30, 31), 2))
/// all.equal(s.y, aggregate(s.m, FUN = mean))# 0.0013
/// all.equal(s.y, aggregate(s.m, FUN = weighted.mean, w = ndays)) # 0.0017
/// ```
pub fn sunspot_year() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("sunspot.year.csv"))).finish()
}

/// # Monthly Sunspot Numbers, 1749-1983
///
/// ## Description:
///
/// Monthly mean relative sunspot numbers from 1749 to 1983.
/// Collected at Swiss Federal Observatory, Zurich until 1960, then
/// Tokyo Astronomical Observatory.
///
/// ## Usage:
///
/// sunspots
///
/// ## Format:
///
/// A time series of monthly data from 1749 to 1983.
///
/// ## Source:
///
/// Andrews, D. F. and Herzberg, A. M. (1985) _Data: A Collection of
/// Problems from Many Fields for the Student and Research Worker_.
/// New York: Springer-Verlag.
///
/// ## See Also:
///
/// ‘sunspot.month’ has a longer (and a bit different) series,
/// ‘sunspot.year’ is a much shorter one.  See there for getting more
/// current sunspot numbers.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(sunspots, main = "sunspots data", xlab = "Year",
///  ylab = "Monthly sunspot numbers")
/// ```
pub fn sunspots() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("sunspots.csv"))).finish()
}

/// # Swiss Fertility and Socioeconomic Indicators (1888) Data
///
/// ## Description:
///
/// Standardized fertility measure and socio-economic indicators for
/// each of 47 French-speaking provinces of Switzerland at about 1888.
///
/// ## Usage:
///
/// swiss
///
/// ## Format:
///
/// A data frame with 47 observations on 6 variables, _each_ of which
/// is in percent, i.e., in [0, 100].
///
/// * \[,1\]  FertilityIg,‘common standardized fertility measure’
/// * \[,2\]  Agriculture % of males involved in agriculture
/// as occupation
/// * \[,3\]  Examination % draftees receiving highest mark
/// on army examination
/// * \[,4\]  Education% education beyond primary school for draftees.
/// * \[,5\]  Catholic % ‘catholic’ (as opposed to ‘protestant’).
/// * \[,6\]  Infant.Mortality  live births who live less than 1year.
///
/// All variables but ‘Fertility’ give proportions of the population.
///
/// ## Details:
///
/// (paraphrasing Mosteller and Tukey):
///
/// Switzerland, in 1888, was entering a period known as the
/// _demographic transition_; i.e., its fertility was beginning to
/// fall from the high level typical of underdeveloped countries.
///
/// The data collected are for 47 French-speaking “provinces” at about
/// 1888.
///
/// Here, all variables are scaled to \[0, 100\], where in the original,
/// all but ‘"Catholic"’ were scaled to \[0, 1\].
///
/// ## Note:
///
/// Files for all 182 districts in 1888 and other years have been
/// available at <https://opr.princeton.edu/archive/pefp/switz.aspx>.
///
/// They state that variables ‘Examination’ and ‘Education’ are
/// averages for 1887, 1888 and 1889.
///
/// ## Source:
///
/// Project “16P5”, pages 549-551 in
///
/// Mosteller, F. and Tukey, J. W. (1977) _Data Analysis and
/// Regression: A Second Course in Statistics_.  Addison-Wesley,
/// Reading Mass.
///
/// indicating their source as “Data used by permission of Franice van
/// de Walle. Office of Population Research, Princeton University,
/// 1976.  Unpublished data assembled under NICHD contract number No
/// 1-HD-O-2077.”
///
/// ## References:
///
/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
/// Language_.  Wadsworth & Brooks/Cole.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// pairs(swiss, panel = panel.smooth, main = "swiss data",
/// col = 3 + (swiss$Catholic > 50))
/// summary(lm(Fertility ~ . , data = swiss))
/// ```
pub fn swiss() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("swiss.csv"))).finish()
}

/// # Pharmacokinetics of Theophylline
///
/// ## Description:
///
/// The ‘Theoph’ data frame has 132 rows and 5 columns of data from an
/// experiment on the pharmacokinetics of theophylline.
///
/// ## Usage:
///
/// Theoph
///
/// ## Format:
///
/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
/// "groupedData", "data.frame")’ containing the following columns:
///
/// Subject an ordered factor with levels ‘1’, ..., ‘12’ identifying
///  the subject on whom the observation was made.  The ordering
///  is by increasing maximum concentration of theophylline
///  observed.
///
/// * Wt weight of the subject (kg).
/// * Dose dose of theophylline administered orally to the subject
/// (mg/kg).
/// * Time time since drug administration when the sample was drawn
/// (hr).
/// * conc theophylline concentration in the sample (mg/L).
///
/// ## Details:
///
/// Boeckmann, Sheiner and Beal (1994) report data from a study by Dr.
/// Robert Upton of the kinetics of the anti-asthmatic drug
/// theophylline.  Twelve subjects were given oral doses of
/// theophylline then serum concentrations were measured at 11 time
/// points over the next 25 hours.
///
/// These data are analyzed in Davidian and Giltinan (1995) and
/// Pinheiro and Bates (2000) using a two-compartment open
/// pharmacokinetic model, for which a self-starting model function,
/// ‘SSfol’, is available.
///
/// This dataset was originally part of package ‘nlme’, and that has
/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
/// for its grouped-data classes.
///
/// ## Source:
///
/// Boeckmann, A. J., Sheiner, L. B. and Beal, S. L. (1994), _NONMEM
/// Users Guide: Part V_, NONMEM Project Group, University of
/// California, San Francisco.
///
/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
/// Repeated Measurement Data_, Chapman & Hall (section 5.5, p. 145
/// and section 6.6, p. 176)
///
/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
/// and S-PLUS_, Springer (Appendix A.29)
///
/// ## See Also:
///
/// ‘SSfol’
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
///
/// coplot(conc ~ Time | Subject, data = Theoph, show.given = FALSE)
/// Theoph.4 <- subset(Theoph, Subject == 4)
/// fm1 <- nls(conc ~ SSfol(Dose, Time, lKe, lKa, lCl),
///  data = Theoph.4)
/// summary(fm1)
/// plot(conc ~ Time, data = Theoph.4,
///  xlab = "Time since drug administration (hr)",
///  ylab = "Theophylline concentration (mg/L)",
///  main = "Observed concentrations and fitted model",
///  sub  = "Theophylline data - Subject 4 only",
///  las = 1, col = 4)
/// xvals <- seq(0, par("usr")[2], length.out = 55)
/// lines(xvals, predict(fm1, newdata = list(Time = xvals)),
/// col = 4)
/// ```
pub fn theoph() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Theoph.csv"))).finish()
}

/// # Survival of passengers on the Titanic
///
/// ## Description:
///
/// This data set provides information on the fate of passengers on
/// the fatal maiden voyage of the ocean liner ‘Titanic’, summarized
/// according to economic status (class), sex, age and survival.
///
/// ## Usage:
///
/// Titanic
///
/// ## Format:
///
/// A 4-dimensional array resulting from cross-tabulating 2201
/// observations on 4 variables.  The variables and their levels are
/// as follows:
///
/// | No | Name     | Levels              |
/// |----|----------|---------------------|
/// | 1  | Class    | 1st, 2nd, 3rd, Crew |
/// | 2  | Sex      | Male, Female        |
/// | 3  | Age      | Child, Adult        |
/// | 4  | Survived | No, Yes             |
///
/// ## Details:
///
/// The sinking of the Titanic is a famous event, and new books are
/// still being published about it.  Many well-known facts-from the
/// proportions of first-class passengers to the ‘women and children
/// first’ policy, and the fact that that policy was not entirely
/// successful in saving the women and children in the third class-are
/// reflected in the survival rates for various classes of passenger.
///
/// These data were originally collected by the British Board of Trade
/// in their investigation of the sinking.  Note that there is not
/// complete agreement among primary sources as to the exact numbers
/// on board, rescued, or lost.
///
/// Due in particular to the very successful film ‘Titanic’, the last
/// years saw a rise in public interest in the Titanic.  Very detailed
/// data about the passengers is now available on the Internet, at
/// sites such as _Encyclopedia Titanica_
/// (<https://www.encyclopedia-titanica.org/>).
///
/// ## Source:
///
/// Dawson, Robert J. MacG. (1995), The ‘Unusual Episode’ Data
/// Revisited.  _Journal of Statistics Education_, *3*.
/// doi:10.1080/10691898.1995.11910499
/// <https://doi.org/10.1080/10691898.1995.11910499>.
///
/// The source provides a data set recording class, sex, age, and
/// survival status for each person on board of the Titanic, and is
/// based on data originally collected by the British Board of Trade
/// and reprinted in:
///
/// British Board of Trade (1990), _Report on the Loss of the
/// ‘Titanic’ (S.S.)_.  British Board of Trade Inquiry Report
/// (reprint).  Gloucester, UK: Allan Sutton Publishing.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// mosaicplot(Titanic, main = "Survival on the Titanic")
/// ## Higher survival rates in children?
/// apply(Titanic, c(3, 4), sum)
/// ## Higher survival rates in females?
/// apply(Titanic, c(2, 4), sum)
/// ## Use loglm() in package 'MASS' for further analysis ...
/// ```
pub fn titanic() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("Titanic.csv"))).finish()
}

/// # The Effect of Vitamin C on Tooth Growth in Guinea Pigs
///
/// ## Description:
///
/// The response is the length of odontoblasts (cells responsible for
/// tooth growth) in 60 guinea pigs.  Each animal received one of
/// three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of
/// two delivery methods, orange juice or ascorbic acid (a form of
/// vitamin C and coded as ‘VC’).
///
/// ## Usage:
///
/// ToothGrowth
///
/// ## Format:
///
/// A data frame with 60 observations on 3 variables.
///
/// * \[,1\]  lennumeric  Tooth length
/// * \[,2\]  supp  factorSupplement type (VC or OJ).
/// * \[,3\]  dose  numeric  Dose in milligrams/day
///
/// ## Source:
///
/// C. I. Bliss (1952).  _The Statistics of Bioassay_.  Academic
/// Press.
///
/// ## References:
///
/// McNeil, D. R. (1977).  _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// Crampton, E. W. (1947).  The growth of the odontoblast of the
/// incisor teeth as a criterion of vitamin C intake of the guinea
/// pig.  _The Journal of Nutrition_, *33*(5), 491-504.
/// doi:10.1093/jn/33.5.491 <https://doi.org/10.1093/jn/33.5.491>.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// coplot(len ~ dose | supp, data = ToothGrowth, panel = panel.smooth,
/// xlab = "ToothGrowth data: length vs dose, given type of supplement")
/// ```
pub fn tooth_growth() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("ToothGrowth.csv"))).finish()
}

/// # Yearly Treering Data, -6000-1979
///
/// ## Description:
///
/// Contains normalized tree-ring widths in dimensionless units.
///
/// ## Usage:
///
/// treering
///
/// ## Format:
///
/// A univariate time series with 7981 observations. The object is of
/// class ‘"ts"’.
///
/// Each tree ring corresponds to one year.
///
/// ## Details:
///
/// The data were recorded by Donald A. Graybill, 1980, from Gt Basin
/// Bristlecone Pine 2805M, 3726-11810 in Methuselah Walk, California.
///
/// ## Source:
///
/// Time Series Data Library: <https://robjhyndman.com/TSDL/>, series
/// ‘CA535.DAT’
///
/// ## References:
///
/// For some photos of Methuselah Walk see
/// <https://web.archive.org/web/20110523225828/http://www.ltrr.arizona.edu/~hallman/sitephotos/meth.html>
pub fn tree_ring() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("treering.csv"))).finish()
}

/// # Diameter, Height and Volume for Black Cherry Trees
///
/// ## Description:
///
/// This data set provides measurements of the diameter, height and
/// volume of timber in 31 felled black cherry trees.  Note that the
/// diameter (in inches) is erroneously labelled Girth in the data. It
/// is measured at 4 ft 6 in above the ground.
///
/// ## Usage:
///
/// trees
///
/// ## Format:
///
/// A data frame with 31 observations on 3 variables.
///
/// * ‘\[,1\]’  ‘Girth’numeric  Tree diameter (rather than girth, actually) in inches
/// * ‘\[,2\]’  ‘Height’  numeric  Height in ft
/// * ‘\[,3\]’  ‘Volume’  numeric  Volume of timber in cubic ft
///
/// ##  Source:
///
/// Ryan, T. A., Joiner, B. L. and Ryan, B. F. (1976) _The Minitab
/// Student Handbook_.  Duxbury Press.
///
/// ## References:
///
/// Atkinson, A. C. (1985) _Plots, Transformations and Regression_.
/// Oxford University Press.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// pairs(trees, panel = panel.smooth, main = "trees data")
/// plot(Volume ~ Girth, data = trees, log = "xy")
/// coplot(log(Volume) ~ log(Girth) | Height, data = trees,
/// panel = panel.smooth)
/// summary(fm1 <- lm(log(Volume) ~ log(Girth), data = trees))
/// summary(fm2 <- update(fm1, ~ . + log(Height), data = trees))
/// step(fm2)
/// ## i.e., Volume ~= c * Height * Girth^2  seems reasonable
/// ```
pub fn trees() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("trees.csv"))).finish()
}

/// # Student Admissions at UC Berkeley
///
/// ## Description:
///
/// Aggregate data on applicants to graduate school at Berkeley for
/// the six largest departments in 1973 classified by admission and
/// sex.
///
/// ## Usage:
///
/// UCBAdmissions
///
/// ## Format:
///
/// A 3-dimensional array resulting from cross-tabulating 4526
/// observations on 3 variables.  The variables and their levels are
/// as follows:
///
/// | No | Name   | Levels             |
/// |----|--------|--------------------|
/// | 1  | Admit  | Admitted, Rejected |
/// | 2  | Gender | Male, Female       |
/// | 3  | Dept   | A, B, C, D, E, F   |
///
/// ## Details:
///
/// This data set is frequently used for illustrating Simpson's
/// paradox, see Bickel _et al_ (1975).  At issue is whether the data
/// show evidence of sex bias in admission practices.  There were 2691
/// male applicants, of whom 1198 (44.5%) were admitted, compared with
/// 1835 female applicants of whom 557 (30.4%) were admitted.  This
/// gives a sample odds ratio of 1.83, indicating that males were
/// almost twice as likely to be admitted.  In fact, graphical methods
/// (as in the example below) or log-linear modelling show that the
/// apparent association between admission and sex stems from
/// differences in the tendency of males and females to apply to the
/// individual departments (females used to apply _more_ to
/// departments with higher rejection rates).
///
/// This data set can also be used for illustrating methods for
/// graphical display of categorical data, such as the general-purpose
/// mosaicplot or the fourfoldplot for 2-by-2-by-k tables.
///
/// ## References:
///
/// Bickel, P. J., Hammel, E. A., and O'Connell, J. W. (1975).  Sex
/// bias in graduate admissions: Data from Berkeley.  _Science_,
/// *187*, 398-403.  doi:10.1126/science.187.4175.398
/// <https://doi.org/10.1126/science.187.4175.398>.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// ## Data aggregated over departments
/// apply(UCBAdmissions, c(1, 2), sum)
/// mosaicplot(apply(UCBAdmissions, c(1, 2), sum),
///  main = "Student admissions at UC Berkeley")
/// ## Data for individual departments
/// opar <- par(mfrow = c(2, 3), oma = c(0, 0, 2, 0))
/// for(i in 1:6)
///  mosaicplot(UCBAdmissions[,,i],
/// xlab = "Admit", ylab = "Sex",
/// main = paste("Department", LETTERS[i]))
/// mtext(expression(bold("Student admissions at UC Berkeley")),
/// outer = TRUE, cex = 1.5)
/// par(opar)
/// ```
pub fn ucb_admissions() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("UCBAdmissions.csv"))).finish()
}

/// # Road Casualties in Great Britain 1969-84
///
/// ## Description:
///
/// ‘UKDriverDeaths’ is a time series giving the monthly totals of car
/// drivers in Great Britain killed or seriously injured Jan 1969 to
/// Dec 1984.  Compulsory wearing of seat belts was introduced on 31
/// Jan 1983.
///
/// ‘Seatbelts’ is more information on the same problem.
///
/// ## Usage:
///
/// UKDriverDeaths
/// Seatbelts
///
/// ## Format:
///
/// * ‘Seatbelts’ is a multiple time series, with columns
/// * ‘DriversKilled’ car drivers killed.
/// * ‘drivers’ same as ‘UKDriverDeaths’.
/// * ‘front’ front-seat passengers killed or seriously injured.
/// * ‘rear’ rear-seat passengers killed or seriously injured.
/// * ‘kms’ distance driven.
/// * ‘PetrolPrice’ petrol price.
/// * ‘VanKilled’ number of van (‘light goods vehicle’) drivers.
/// * ‘law’ 0/1: was the law in effect that month?
///
/// ##  Source:
///
/// Harvey, A.C. (1989).  _Forecasting, Structural Time Series Models
/// and the Kalman Filter_.  Cambridge University Press, pp. 519-523.
///
/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
/// State Space Methods_.  Oxford University Press.
///
/// ## References:
///
/// Harvey, A. C. and Durbin, J. (1986).  The effects of seat belt
/// legislation on British road casualties: A case study in structural
/// time series modelling.  _Journal of the Royal Statistical Society_
/// series A, *149*, 187-227.  doi:10.2307/2981553
/// <https://doi.org/10.2307/2981553>.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// ## work with pre-seatbelt period to identify a model, use logs
/// work <- window(log10(UKDriverDeaths), end = 1982+11/12)
/// par(mfrow = c(3, 1))
/// plot(work); acf(work); pacf(work)
/// par(mfrow = c(1, 1))
/// (fit <- arima(work, c(1, 0, 0), seasonal = list(order = c(1, 0, 0))))
/// z <- predict(fit, n.ahead = 24)
/// ts.plot(log10(UKDriverDeaths), z$pred, z$pred+2*z$se, z$pred-2*z$se,
///  lty = c(1, 3, 2, 2), col = c("black", "red", "blue", "blue"))
///
/// ## now see the effect of the explanatory variables
/// X <- Seatbelts[, c("kms", "PetrolPrice", "law")]
/// X[, 1] <- log10(X[, 1]) - 4
/// arima(log10(Seatbelts[, "drivers"]), c(1, 0, 0),
/// seasonal = list(order = c(1, 0, 0)), xreg = X)
/// ```
pub fn uk_driver_deaths() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("UKDriverDeaths.csv"))).finish()
}

/// # UK Quarterly Gas Consumption
///
/// ## Description:
///
/// Quarterly UK gas consumption from 1960Q1 to 1986Q4, in millions of
/// therms.
///
/// ## Usage:
///
/// UKgas
///
/// ## Format:
///
/// A quarterly time series of length 108.
///
/// ## Source:
///
/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
/// State Space Methods_.  Oxford University Press.
///
/// ## Examples:
///
/// ```r
/// ## maybe str(UKgas) ; plot(UKgas) ...
/// ```
pub fn uk_gas() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("UKgas.csv"))).finish()
}

/// # Accidental Deaths in the US 1973-1978
///
/// ## Description:
///
/// A time series giving the monthly totals of accidental deaths in
/// the USA.  The values for the first six months of 1979 are 7798
/// 7406 8363 8460 9217 9316.
///
/// ## Usage:
///
/// USAccDeaths
///
/// ## Source:
///
/// P. J. Brockwell and R. A. Davis (1991) _Time Series: Theory and
/// Methods._ Springer, New York.
pub fn us_acc_deaths() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("USAccDeaths.csv"))).finish()
}

/// # Violent Crime Rates by US State
///
/// ## Description:
///
/// This data set contains statistics, in arrests per 100,000
/// residents for assault, murder, and rape in each of the 50 US
/// states in 1973.  Also given is the percent of the population
/// living in urban areas.
///
/// ## Usage:
///
/// USArrests
///
/// ## Format:
///
/// A data frame with 50 observations on 4 variables.
///
/// * \[,1\]  Murder numeric  Murder arrests (per 100,000)
/// * \[,2\]  Assaultnumeric  Assault arrests (per 100,000)
/// * \[,3\]  UrbanPop  numeric  Percent urban population
/// * \[,4\]  Rapenumeric  Rape arrests (per 100,000)
///
/// ## Note:
///
/// ‘USArrests’ contains the data as in McNeil's monograph.  For the
/// ‘UrbanPop’ percentages, a review of the table (No. 21) in the
/// Statistical Abstracts 1975 reveals a transcription error for
/// Maryland (and that McNeil used the same “round to even” rule that
/// R's ‘round()’ uses), as found by Daniel S Coven (Arizona).
///
/// See the example below on how to correct the error and improve
/// accuracy for the ‘<n>.5’ percentages.
///
/// ## Source:
///
/// World Almanac and Book of facts 1975.  (Crime rates).
///
/// Statistical Abstracts of the United States 1975, p.20, (Urban
/// rates), possibly available as
/// <https://books.google.ch/books?id=zl9qAAAAMAAJ&pg=PA20>.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## See Also:
///
/// The ‘state’ data sets.
///
/// ## Examples:
///
/// ```r
/// summary(USArrests)
///
/// require(graphics)
/// pairs(USArrests, panel = panel.smooth, main = "USArrests data")
///
/// ## Difference between 'USArrests' and its correction
/// USArrests["Maryland", "UrbanPop"] # 67 -- the transcription error
/// UA.C <- USArrests
/// UA.C["Maryland", "UrbanPop"] <- 76.6
///
/// ## also +/- 0.5 to restore the original  <n>.5  percentages
/// s5u <- c("Colorado", "Florida", "Mississippi", "Wyoming")
/// s5d <- c("Nebraska", "Pennsylvania")
/// UA.C[s5u, "UrbanPop"] <- UA.C[s5u, "UrbanPop"] + 0.5
/// UA.C[s5d, "UrbanPop"] <- UA.C[s5d, "UrbanPop"] - 0.5
///
/// ## ==> UA.C  is now a *C*orrected version of  USArrests
/// ```
pub fn us_arrests() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("USArrests.csv"))).finish()
}

/// # Distances Between European Cities and Between US Cities
///
/// ## Description:
///
/// The ‘eurodist’ gives the road distances (in km) between 21 cities
/// in Europe.  The data are taken from a table in _The Cambridge
/// Encyclopaedia_.
///
/// ‘UScitiesD’ gives “straight line” distances between 10 cities in
/// the US.
///
/// ## Usage:
///
/// eurodist
/// UScitiesD
///
/// ## Format:
///
/// ‘dist’ objects based on 21 and 10 objects, respectively.  (You
/// must have the ‘stats’ package loaded to have the methods for this
/// kind of object available).
///
/// ## Source:
///
/// Crystal, D. Ed. (1990) _The Cambridge Encyclopaedia_.  Cambridge:
/// Cambridge University Press,
///
/// The US cities distances were provided by Pierre Legendre.
pub fn us_cities_d() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("UScitiesD.csv"))).finish()
}

/// # Lawyers' Ratings of State Judges in the US Superior Court
///
/// ## Description:
///
/// Lawyers' ratings of state judges in the US Superior Court.
///
/// ## Usage:
///
/// USJudgeRatings
///
/// ## Format:
///
/// A data frame containing 43 observations on 12 numeric variables.
///
/// * \[,1\]  CONT  Number of contacts of lawyer with judge.
/// * \[,2\]  INTG  Judicial integrity.
/// * \[,3\]  DMNR  Demeanor.
/// * \[,4\]  DILG  Diligence.
/// * \[,5\]  CFMG  Case flow managing.
/// * \[,6\]  DECI  Prompt decisions.
/// * \[,7\]  PREP  Preparation for trial.
/// * \[,8\]  FAMI  Familiarity with law.
/// * \[,9\]  ORAL  Sound oral rulings.
/// * \[,10\]  WRIT  Sound written rulings.
/// * \[,11\]  PHYS  Physical ability.
/// * \[,12\]  RTEN  Worthy of retention.
///
/// ## Source:
///
/// New Haven Register, 14 January, 1977 (from John Hartigan).
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// pairs(USJudgeRatings, main = "USJudgeRatings data")
/// ```
pub fn us_judge_ratings() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("USJudgeRatings.csv"))).finish()
}

/// # Personal Expenditure Data
///
/// ## Description:
///
/// This data set consists of United States personal expenditures (in
/// billions of dollars) in the categories; food and tobacco,
/// household operation, medical and health, personal care, and
/// private education for the years 1940, 1945, 1950, 1955 and 1960.
///
/// ## Usage:
///
/// USPersonalExpenditure
///
/// ## Format:
///
/// A matrix with 5 rows and 5 columns.
///
/// ## Source:
///
/// The World Almanac and Book of Facts, 1962, page 756.
///
/// ## References:
///
/// Tukey, J. W. (1977) _Exploratory Data Analysis_.  Addison-Wesley.
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats) # for medpolish
/// USPersonalExpenditure
/// medpolish(log10(USPersonalExpenditure))
/// ```
pub fn us_personal_expenditure() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("USPersonalExpenditure.csv"))).finish()
}

/// # Populations Recorded by the US Census
///
/// ## Description:
///
/// This data set gives the population of the United States (in
/// millions) as recorded by the decennial census for the period
/// 1790-1970.
///
/// ## Usage:
///
/// uspop
///
/// ## Format:
///
/// A time series of 19 values.
///
/// ## Source:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(uspop, log = "y", main = "uspop data", xlab = "Year",
///  ylab = "U.S. Population (millions)")
/// ```
pub fn us_pop() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("uspop.csv"))).finish()
}

/// # Death Rates in Virginia (1940)
///
/// ## Description:
///
/// Death rates per 1000 in Virginia in 1940.
///
/// ## Usage:
///
/// VADeaths
///
/// ## Format:
///
/// A matrix with 5 rows and 4 columns.
///
/// ## Details:
///
/// The death rates are measured per 1000 population per year.  They
/// are cross-classified by age group (rows) and population group
/// (columns).  The age groups are: 50-54, 55-59, 60-64, 65-69, 70-74
/// and the population groups are Rural/Male, Rural/Female, Urban/Male
/// and Urban/Female.
///
/// This provides a rather nice 3-way analysis of variance example.
///
/// ## Source:
///
/// Molyneaux, L., Gilliam, S. K., and Florant, L. C.(1947)
/// Differences in Virginia death rates by color, sex, age, and rural
/// or urban residence.  _American Sociological Review_, *12*,
/// 525-535.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// n <- length(dr <- c(VADeaths))
/// nam <- names(VADeaths)
/// d.VAD <- data.frame(
/// Drate = dr,
/// age = rep(ordered(rownames(VADeaths)), length.out = n),
/// gender = gl(2, 5, n, labels = c("M", "F")),
/// site =  gl(2, 10, labels = c("rural", "urban")))
/// coplot(Drate ~ as.numeric(age) | gender * site, data = d.VAD,
/// panel = panel.smooth, xlab = "VADeaths data - Given: gender")
/// summary(aov.VAD <- aov(Drate ~ .^2, data = d.VAD))
/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
/// plot(aov.VAD)
/// par(opar)
/// ```
pub fn va_deaths() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("VADeaths.csv"))).finish()
}

/// # Topographic Information on Auckland's Maunga Whau Volcano
///
/// ## Description:
///
/// Maunga Whau (Mt Eden) is one of about 50 volcanos in the Auckland
/// volcanic field.  This data set gives topographic information for
/// Maunga Whau on a 10m by 10m grid.
///
/// ## Usage:
///
/// volcano
///
/// ## Format:
///
/// A matrix with 87 rows and 61 columns, rows corresponding to grid
/// lines running east to west and columns to grid lines running south
/// to north.
///
/// ## Source:
///
/// Digitized from a topographic map by Ross Ihaka.  These data should
/// not be regarded as accurate.
///
/// ## See Also:
///
/// ‘filled.contour’ for a nice plot.
///
/// ## Examples:
///
/// ```r
/// require(grDevices); require(graphics)
/// filled.contour(volcano, color.palette = terrain.colors, asp = 1)
/// title(main = "volcano data: filled contour map")
/// ```
pub fn volcano() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("volcano.csv"))).finish()
}

/// # The Number of Breaks in Yarn during Weaving
///
/// ## Description:
///
/// This data set gives the number of warp breaks per loom, where a
/// loom corresponds to a fixed length of yarn.
///
/// ## Usage:
///
/// warpbreaks
///
/// ## Format:
///
/// A data frame with 54 observations on 3 variables.
///
/// * ‘\[,1\]’  ‘breaks’numeric  The number of breaks
/// * ‘\[,2\]’  ‘wool’  factorThe type of wool (A or B)
/// * ‘\[,3\]’  ‘tension’  factorThe level of tension (L, M, H)
///
/// There are measurements on 9 looms for each of the six types of
/// warp (‘AL’, ‘AM’, ‘AH’, ‘BL’, ‘BM’, ‘BH’).
///
/// ## Source:
///
/// Tippett, L. H. C. (1950) _Technological Applications of
/// Statistics_.  Wiley.  Page 106.
///
/// ## References:
///
/// Tukey, J. W. (1977) _Exploratory Data Analysis_.  Addison-Wesley.
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## See Also:
///
/// ‘xtabs’ for ways to display these data as a table.
///
/// ## Examples:
///
/// ```r
/// require(stats); require(graphics)
/// summary(warpbreaks)
/// opar <- par(mfrow = c(1, 2), oma = c(0, 0, 1.1, 0))
/// plot(breaks ~ tension, data = warpbreaks, col = "lightgray",
///  varwidth = TRUE, subset = wool == "A", main = "Wool A")
/// plot(breaks ~ tension, data = warpbreaks, col = "lightgray",
///  varwidth = TRUE, subset = wool == "B", main = "Wool B")
/// mtext("warpbreaks data", side = 3, outer = TRUE)
/// par(opar)
/// summary(fm1 <- lm(breaks ~ wool*tension, data = warpbreaks))
/// anova(fm1)
/// ```
pub fn warp_breaks() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("warpbreaks.csv"))).finish()
}

/// # Average Heights and Weights for American Women
///
/// ## Description:
///
/// This data set gives the average heights and weights for American
/// women aged 30-39.
///
/// ## Usage:
///
/// women
///
/// ## Format:
///
/// A data frame with 15 observations on 2 variables.
///
/// * ‘\[,1\]’  ‘height’  numeric  Height (in)
/// * ‘\[,2\]’  ‘weight’  numeric  Weight (lbs)
///
/// ## Details:
///
/// The data set appears to have been taken from the American Society
/// of Actuaries _Build and Blood Pressure Study_ for some (unknown to
/// us) earlier year.
///
/// The World Almanac notes: “The figures represent weights in
/// ordinary indoor clothing and shoes, and heights with shoes”.
///
/// ## Source:
///
/// The World Almanac and Book of Facts, 1975.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// plot(women, xlab = "Height (in)", ylab = "Weight (lb)",
///  main = "women data: American women aged 30-39")
/// ```
pub fn women() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("women.csv"))).finish()
}

/// # The World's Telephones
///
/// ## Description:
///
/// The number of telephones in various regions of the world (in
/// thousands).
///
/// ## Usage:
///
/// WorldPhones
///
/// ## Format:
///
/// A matrix with 7 rows and 8 columns.  The columns of the matrix
/// give the figures for a given region, and the rows the figures for
/// a year.
///
/// The regions are: North America, Europe, Asia, South America,
/// Oceania, Africa, Central America.
///
/// The years are: 1951, 1956, 1957, 1958, 1959, 1960, 1961.
///
/// ## Source:
///
/// AT&T (1961) _The World's Telephones_.
///
/// ## References:
///
/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
/// Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// matplot(rownames(WorldPhones), WorldPhones, type = "b", log = "y",
///  xlab = "Year", ylab = "Number of telephones (1000's)")
/// legend(1951.5, 80000, colnames(WorldPhones), col = 1:6, lty = 1:5,
/// pch = rep(21, 7))
/// title(main = "World phones data: log scale for response")
/// ```
pub fn world_phones() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("WorldPhones.csv"))).finish()
}

/// # Internet Usage per Minute
///
/// ## Description:
///
/// A time series of the numbers of users connected to the Internet
/// through a server every minute.
///
/// ## Usage:
///
/// WWWusage
///
/// ## Format:
///
/// A time series of length 100.
///
/// ## Source:
///
/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
/// State Space Methods_.  Oxford University Press.
///
/// ## References:
///
/// Makridakis, S., Wheelwright, S. C. and Hyndman, R. J. (1998).
/// _Forecasting: Methods and Applications_.  Wiley.
///
/// ## Examples:
///
/// ```r
/// require(graphics)
/// work <- diff(WWWusage)
/// par(mfrow = c(2, 1)); plot(WWWusage); plot(work)
/// ## Not run:
///
/// require(stats)
/// aics <- matrix(, 6, 6, dimnames = list(p = 0:5, q = 0:5))
/// for(q in 1:5) aics[1, 1+q] <- arima(WWWusage, c(0, 1, q),
/// optim.control = list(maxit = 500))$aic
/// for(p in 1:5)
/// for(q in 0:5) aics[1+p, 1+q] <- arima(WWWusage, c(p, 1, q),
/// optim.control = list(maxit = 500))$aic
/// round(aics - min(aics, na.rm = TRUE), 2)
/// ## End(Not run)
/// ```
pub fn www_usage() -> PolarsResult<DataFrame> {
    CsvReader::new(Cursor::new(include_str!("WWWusage.csv"))).finish()
}