r2rs_datasets/data/
mod.rs

1use std::io::Cursor;
2
3use polars::{
4    error::PolarsResult,
5    frame::DataFrame,
6    prelude::{CsvReader, DataType, SerReader},
7};
8
9/// # Ability and Intelligence Tests
10///
11/// ## Description:
12///
13/// Six tests were given to 112 individuals. The covariance matrix is
14/// given in this object.
15///
16/// ## Usage:
17///
18/// ability.cov
19///
20/// ## Details:
21///
22/// The tests are described as
23///
24/// * general: a non-verbal measure of general intelligence using
25/// Cattell's culture-fair test.
26/// * picture: a picture-completion test
27/// * blocks: block design
28/// * maze: mazes
29/// * reading: reading comprehension
30/// * vocab: vocabulary
31/// * Bartholomew gives both covariance and correlation matrices, but
32/// these are inconsistent.  Neither are in the original paper.
33///
34/// ## Source:
35///
36/// Bartholomew, D. J. (1987).  _Latent Variable Analysis and Factor
37/// Analysis_.  Griffin.
38///
39/// Bartholomew, D. J. and Knott, M. (1990).  _Latent Variable
40/// Analysis and Factor Analysis_.  Second Edition, Arnold.
41///
42/// ## References:
43///
44/// Smith, G. A. and Stanley G. (1983).  Clocking g: relating
45/// intelligence and measures of timed performance.  _Intelligence_,
46/// *7*, 353-368.  doi:10.1016/0160-2896(83)90010-7
47/// <https://doi.org/10.1016/0160-2896%2883%2990010-7>.
48///
49/// ## Examples:
50///
51/// ```r
52/// require(stats)
53/// (ability.FA <- factanal(factors = 1, covmat = ability.cov))
54/// update(ability.FA, factors = 2)
55/// ## The signs of factors and hence the signs of correlations are
56/// ## arbitrary with promax rotation.
57/// update(ability.FA, factors = 2, rotation = "promax")
58/// ```
59pub fn ability_cov() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
60    let mut center = Vec::new();
61    let mut n_obs = 0;
62    CsvReader::new(Cursor::new(include_str!("ability.cov.center.csv")))
63        .finish()?
64        .column("x")?
65        .cast(&DataType::Float64)?
66        .f64()?
67        .for_each(|d| center.push(d.unwrap() as usize));
68    CsvReader::new(Cursor::new(include_str!("ability.cov.n.obs.csv")))
69        .finish()?
70        .column("x")?
71        .cast(&DataType::Float64)?
72        .f64()?
73        .for_each(|d| n_obs = d.unwrap() as usize);
74    Ok((
75        CsvReader::new(Cursor::new(include_str!("ability.cov.cov.csv"))).finish()?,
76        center,
77        n_obs,
78    ))
79}
80
81/// # Passenger Miles on Commercial US Airlines, 1937-1960
82///
83/// ## Description:
84///
85/// The revenue passenger miles flown by commercial airlines in the
86/// United States for each year from 1937 to 1960.
87///
88/// ## Usage:
89///
90/// airmiles
91///
92/// ## Format:
93///
94/// A time series of 24 observations; yearly, 1937-1960.
95///
96/// ## Source:
97///
98/// F.A.A. Statistical Handbook of Aviation.
99///
100/// ## References:
101///
102/// Brown, R. G. (1963) _Smoothing, Forecasting and Prediction of
103/// Discrete Time Series_.  Prentice-Hall.
104///
105/// ## Examples:
106///
107/// ```r
108/// require(graphics)
109/// plot(airmiles, main = "airmiles data",
110///  xlab = "Passenger-miles flown by U.S. commercial airlines", col = 4)
111/// ```
112pub fn airmiles() -> PolarsResult<DataFrame> {
113    CsvReader::new(Cursor::new(include_str!("airmiles.csv"))).finish()
114}
115
116/// # Monthly Airline Passenger Numbers 1949-1960
117///
118/// ## Description:
119///
120/// The classic Box & Jenkins airline data.  Monthly totals of
121/// international airline passengers, 1949 to 1960.
122///
123/// ## Usage:
124///
125/// AirPassengers
126///
127/// ## Format:
128///
129/// A monthly time series, in thousands.
130///
131/// ## Source:
132///
133/// Box, G. E. P., Jenkins, G. M. and Reinsel, G. C. (1976) _Time
134/// Series Analysis, Forecasting and Control._ Third Edition.
135/// Holden-Day. Series G.
136///
137/// ## Examples:
138///
139/// ```r
140/// ## Not run:
141///
142/// ## These are quite slow and so not run by example(AirPassengers)
143///
144/// ## The classic 'airline model', by full ML
145/// (fit <- arima(log10(AirPassengers), c(0, 1, 1),
146///  seasonal = list(order = c(0, 1, 1), period = 12)))
147/// update(fit, method = "CSS")
148/// update(fit, x = window(log10(AirPassengers), start = 1954))
149/// pred <- predict(fit, n.ahead = 24)
150/// tl <- pred$pred - 1.96 * pred$se
151/// tu <- pred$pred + 1.96 * pred$se
152/// ts.plot(AirPassengers, 10^tl, 10^tu, log = "y", lty = c(1, 2, 2))
153///
154/// ## full ML fit is the same if the series is reversed, CSS fit is not
155/// ap0 <- rev(log10(AirPassengers))
156/// attributes(ap0) <- attributes(AirPassengers)
157/// arima(ap0, c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
158/// arima(ap0, c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12),
159/// method = "CSS")
160///
161/// ## Structural Time Series
162/// ap <- log10(AirPassengers) - 2
163/// (fit <- StructTS(ap, type = "BSM"))
164/// par(mfrow = c(1, 2))
165/// plot(cbind(ap, fitted(fit)), plot.type = "single")
166/// plot(cbind(ap, tsSmooth(fit)), plot.type = "single")
167/// ## End(Not run)
168/// ```
169pub fn air_passengers() -> PolarsResult<DataFrame> {
170    CsvReader::new(Cursor::new(include_str!("AirPassengers.csv"))).finish()
171}
172
173/// # New York Air Quality Measurements
174///
175/// ## Description:
176///
177/// Daily air quality measurements in New York, May to September 1973.
178///
179/// ## Usage:
180///
181/// airquality
182///
183/// ## Format:
184///
185/// A data frame with 153 observations on 6 variables.
186///
187///  * ‘\[,1\]’  ‘Ozone’ numeric  Ozone (ppb)
188///  * ‘\[,2\]’  ‘Solar.R’  numeric  Solar R (lang)
189///  * ‘\[,3\]’  ‘Wind’  numeric  Wind (mph)
190///  * ‘\[,4\]’  ‘Temp’  numeric  Temperature (degrees F)
191///  * ‘\[,5\]’  ‘Month’ numeric  Month (1-12)
192///  * ‘\[,6\]’  ‘Day’numeric  Day of month (1-31)
193///
194/// ## Details:
195///
196/// Daily readings of the following air quality values for May 1, 1973
197/// (a Tuesday) to September 30, 1973.
198///
199/// * ‘Ozone’: Mean ozone in parts per billion from 1300 to 1500
200/// hours at Roosevelt Island
201/// * ‘Solar.R’: Solar radiation in Langleys in the frequency band
202/// 4000-7700 Angstroms from 0800 to 1200 hours at Central Park
203/// * ‘Wind’: Average wind speed in miles per hour at 0700 and 1000
204/// hours at LaGuardia Airport
205/// * ‘Temp’: Maximum daily temperature in degrees Fahrenheit at La
206/// Guardia Airport.
207///
208/// ## Source:
209///
210/// The data were obtained from the New York State Department of
211/// Conservation (ozone data) and the National Weather Service
212/// (meteorological data).
213///
214/// ## References:
215///
216/// Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P. A.
217/// (1983) _Graphical Methods for Data Analysis_.  Belmont, CA:
218/// Wadsworth.
219///
220/// ## Examples:
221///
222/// ```r
223/// require(graphics)
224/// pairs(airquality, panel = panel.smooth, main = "airquality data")
225/// ```
226pub fn air_quality() -> PolarsResult<DataFrame> {
227    CsvReader::new(Cursor::new(include_str!("airquality.csv"))).finish()
228}
229
230/// # Anscombe's Quartet of 'Identical' Simple Linear Regressions
231///
232/// ## Description:
233///
234/// Four x-y datasets which have the same traditional statistical
235/// properties (mean, variance, correlation, regression line, etc.),
236/// yet are quite different.
237///
238/// ## Usage:
239///
240/// anscombe
241///
242/// ## Format:
243///
244/// A data frame with 11 observations on 8 variables.
245///
246/// * x1 == x2 == x3  the integers 4:14, specially arranged
247/// * x4  values 8 and 19
248/// * y1, y2, y3, y4  numbers in (3, 12.5) with mean 7.5 and sdev 2.03
249///
250/// ## Source:
251///
252/// Tufte, Edward R. (1989).  _The Visual Display of Quantitative
253/// Information_, 13-14.  Graphics Press.
254///
255/// ## References:
256///
257/// Anscombe, Francis J. (1973).  Graphs in statistical analysis.
258/// _The American Statistician_, *27*, 17-21.  doi:10.2307/2682899
259/// <https://doi.org/10.2307/2682899>.
260///
261/// ## Examples:
262///
263/// ```r
264/// require(stats); require(graphics)
265/// summary(anscombe)
266///
267/// ##-- now some "magic" to do the 4 regressions in a loop:
268/// ff <- y ~ x
269/// mods <- setNames(as.list(1:4), paste0("lm", 1:4))
270/// for(i in 1:4) {
271///  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
272///  ## orff[[2]] <- as.name(paste0("y", i))
273///  ##ff[[3]] <- as.name(paste0("x", i))
274///  mods[[i]] <- lmi <- lm(ff, data = anscombe)
275///  print(anova(lmi))
276/// }
277///
278/// ## See how close they are (numerically!)
279/// sapply(mods, coef)
280/// lapply(mods, function(fm) coef(summary(fm)))
281///
282/// ## Now, do what you should have done in the first place: PLOTS
283/// op <- par(mfrow = c(2, 2), mar = 0.1+c(4,4,1,1), oma =  c(0, 0, 2, 0))
284/// for(i in 1:4) {
285///  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
286///  plot(ff, data = anscombe, col = "red", pch = 21, bg = "orange", cex = 1.2,
287/// xlim = c(3, 19), ylim = c(3, 13))
288///  abline(mods[[i]], col = "blue")
289/// }
290/// mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)
291/// par(op)
292/// ```
293pub fn anscombe() -> PolarsResult<DataFrame> {
294    CsvReader::new(Cursor::new(include_str!("anscombe.csv"))).finish()
295}
296
297/// # The Joyner-Boore Attenuation Data
298///
299/// ## Description:
300///
301/// This data gives peak accelerations measured at various observation
302/// stations for 23 earthquakes in California.  The data have been
303/// used by various workers to estimate the attenuating affect of
304/// distance on ground acceleration.
305///
306/// ## Usage:
307///
308/// attenu
309///
310/// ## Format:
311///
312/// A data frame with 182 observations on 5 variables.
313///
314/// * \[,1\]  event numeric  Event Number
315/// * \[,2\]  magnumeric  Moment Magnitude
316/// * \[,3\]  station  factorStation Number
317/// * \[,4\]  dist  numeric  Station-hypocenter distance (km)
318/// * \[,5\]  accel numeric  Peak acceleration (g)
319///
320/// ## Source:
321///
322/// Joyner, W.B., D.M. Boore and R.D. Porcella (1981).  Peak
323/// horizontal acceleration and velocity from strong-motion records
324/// including records from the 1979 Imperial Valley, California
325/// earthquake.  USGS Open File report 81-365. Menlo Park, Ca.
326///
327/// ## References:
328///
329/// Boore, D. M. and Joyner, W. B.(1982).  The empirical prediction of
330/// ground motion, _Bulletin of the Seismological Society of America_,
331/// *72*, S269-S268.
332///
333/// Bolt, B. A. and Abrahamson, N. A. (1982).  New attenuation
334/// relations for peak and expected accelerations of strong ground
335/// motion.  _Bulletin of the Seismological Society of America_, *72*,
336/// 2307-2321.
337///
338/// Bolt B. A. and Abrahamson, N. A. (1983).  Reply to W. B. Joyner &
339/// D. M. Boore's “Comments on: New attenuation relations for peak and
340/// expected accelerations for peak and expected accelerations of
341/// strong ground motion”, _Bulletin of the Seismological Society of
342/// America_, *73*, 1481-1483.
343///
344/// Brillinger, D. R. and Preisler, H. K. (1984).  An exploratory
345/// analysis of the Joyner-Boore attenuation data, _Bulletin of the
346/// Seismological Society of America_, *74*, 1441-1449.
347///
348/// Brillinger, D. R. and Preisler, H. K. (1984).  _Further analysis
349/// of the Joyner-Boore attenuation data_.  Manuscript.
350///
351/// ## Examples:
352///
353/// ```r
354/// require(graphics)
355/// ## check the data class of the variables
356/// sapply(attenu, data.class)
357/// summary(attenu)
358/// pairs(attenu, main = "attenu data")
359/// coplot(accel ~ dist | as.factor(event), data = attenu, show.given = FALSE)
360/// coplot(log(accel) ~ log(dist) | as.factor(event),
361///  data = attenu, panel = panel.smooth, show.given = FALSE)
362/// ```
363pub fn attenu() -> PolarsResult<DataFrame> {
364    CsvReader::new(Cursor::new(include_str!("attenu.csv"))).finish()
365}
366
367/// # The Chatterjee-Price Attitude Data
368///
369/// ## Description:
370///
371/// From a survey of the clerical employees of a large financial
372/// organization, the data are aggregated from the questionnaires of
373/// the approximately 35 employees for each of 30 (randomly selected)
374/// departments.  The numbers give the percent proportion of
375/// favourable responses to seven questions in each department.
376///
377/// ## Usage:
378///
379/// attitude
380///
381/// ## Format:
382///
383/// A data frame with 30 observations on 7 variables. The first column
384/// are the short names from the reference, the second one the
385/// variable names in the data frame:
386///
387/// * Y  ratingnumeric  Overall rating
388/// * X\[1\]  complaints  numeric  Handling of employee complaints
389/// * X\[2\]  privileges  numeric  Does not allow special privileges
390/// * X\[3\]  learning numeric  Opportunity to learn
391/// * X\[4\]  raisesnumeric  Raises based on performance
392/// * X\[5\]  critical numeric  Too critical
393/// * X\[6\]  advance  numeric  Advancement
394///
395/// ## Source:
396///
397/// Chatterjee, S. and Price, B. (1977) _Regression Analysis by
398/// Example_.  New York: Wiley.  (Section 3.7, p.68ff of 2nd
399/// ed.(1991).)
400///
401/// ## Examples:
402///
403/// ```r
404/// require(stats); require(graphics)
405/// pairs(attitude, main = "attitude data")
406/// summary(attitude)
407/// summary(fm1 <- lm(rating ~ ., data = attitude))
408/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
409/// mar = c(4.1, 4.1, 2.1, 1.1))
410/// plot(fm1)
411/// summary(fm2 <- lm(rating ~ complaints, data = attitude))
412/// plot(fm2)
413/// par(opar)
414/// ```
415pub fn attitude() -> PolarsResult<DataFrame> {
416    CsvReader::new(Cursor::new(include_str!("attitude.csv"))).finish()
417}
418
419/// # Quarterly Time Series of the Number of Australian Residents
420///
421/// ## Description:
422///
423/// Numbers (in thousands) of Australian residents measured quarterly
424/// from March 1971 to March 1994.  The object is of class ‘"ts"’.
425///
426/// ## Usage:
427///
428/// austres
429///
430/// ## Source:
431///
432/// P. J. Brockwell and R. A. Davis (1996) _Introduction to Time
433/// Series and Forecasting._ Springer
434pub fn austres() -> PolarsResult<DataFrame> {
435    CsvReader::new(Cursor::new(include_str!("austres.csv"))).finish()
436}
437
438/// # Body Temperature Series of Two Beavers
439///
440/// ## Description:
441///
442/// Reynolds (1994) describes a small part of a study of the long-term
443/// temperature dynamics of beaver _Castor canadensis_ in
444/// north-central Wisconsin.  Body temperature was measured by
445/// telemetry every 10 minutes for four females, but data from a one
446/// period of less than a day for each of two animals is used there.
447///
448/// ## Usage:
449///
450/// beaver1
451/// beaver2
452///
453/// ## Format:
454///
455/// The ‘beaver1’ data frame has 114 rows and 4 columns on body
456/// temperature measurements at 10 minute intervals.
457///
458/// The ‘beaver2’ data frame has 100 rows and 4 columns on body
459/// temperature measurements at 10 minute intervals.
460///
461/// The variables are as follows:
462///
463/// * day Day of observation (in days since the beginning of 1990),
464/// December 12-13 (‘beaver1’) and November 3-4 (‘beaver2’).
465/// * time Time of observation, in the form ‘0330’ for 3:30am
466/// * temp Measured body temperature in degrees Celsius.
467/// * activ Indicator of activity outside the retreat.
468///
469/// ## Note:
470///
471/// The observation at 22:20 is missing in ‘beaver1’.
472///
473/// ## Source:
474///
475/// P. S. Reynolds (1994) Time-series analyses of beaver body
476/// temperatures.  Chapter 11 of Lange, N., Ryan, L., Billard, L.,
477/// Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994) _Case
478/// Studies in Biometry._ New York: John Wiley and Sons.
479///
480/// ## Examples:
481///
482/// ```r
483/// require(graphics)
484/// (yl <- range(beaver1$temp, beaver2$temp))
485///
486/// beaver.plot <- function(bdat, ...) {
487///  nam <- deparse(substitute(bdat))
488///  with(bdat, {
489/// # Hours since start of day:
490/// hours <- time %/% 100 + 24*(day - day[1]) + (time %% 100)/60
491/// plot (hours, temp, type = "l", ...,
492/// main = paste(nam, "body temperature"))
493/// abline(h = 37.5, col = "gray", lty = 2)
494/// is.act <- activ == 1
495/// points(hours[is.act], temp[is.act], col = 2, cex = .8)
496///  })
497/// }
498/// op <- par(mfrow = c(2, 1), mar = c(3, 3, 4, 2), mgp = 0.9 * 2:0)
499/// beaver.plot(beaver1, ylim = yl)
500/// beaver.plot(beaver2, ylim = yl)
501/// par(op)
502/// ```
503pub fn beaver1() -> PolarsResult<DataFrame> {
504    CsvReader::new(Cursor::new(include_str!("beaver1.csv"))).finish()
505}
506
507/// # Body Temperature Series of Two Beavers
508///
509/// ## Description:
510///
511/// Reynolds (1994) describes a small part of a study of the long-term
512/// temperature dynamics of beaver _Castor canadensis_ in
513/// north-central Wisconsin.  Body temperature was measured by
514/// telemetry every 10 minutes for four females, but data from a one
515/// period of less than a day for each of two animals is used there.
516///
517/// ## Usage:
518///
519/// beaver1
520/// beaver2
521///
522/// ## Format:
523///
524/// The ‘beaver1’ data frame has 114 rows and 4 columns on body
525/// temperature measurements at 10 minute intervals.
526///
527/// The ‘beaver2’ data frame has 100 rows and 4 columns on body
528/// temperature measurements at 10 minute intervals.
529///
530/// The variables are as follows:
531///
532/// * day Day of observation (in days since the beginning of 1990),
533/// December 12-13 (‘beaver1’) and November 3-4 (‘beaver2’).
534/// * time Time of observation, in the form ‘0330’ for 3:30am
535/// * temp Measured body temperature in degrees Celsius.
536/// * activ Indicator of activity outside the retreat.
537///
538/// ## Note:
539///
540/// The observation at 22:20 is missing in ‘beaver1’.
541///
542/// ## Source:
543///
544/// P. S. Reynolds (1994) Time-series analyses of beaver body
545/// temperatures.  Chapter 11 of Lange, N., Ryan, L., Billard, L.,
546/// Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994) _Case
547/// Studies in Biometry._ New York: John Wiley and Sons.
548///
549/// ## Examples:
550///
551/// ```r
552/// require(graphics)
553/// (yl <- range(beaver1$temp, beaver2$temp))
554///
555/// beaver.plot <- function(bdat, ...) {
556///  nam <- deparse(substitute(bdat))
557///  with(bdat, {
558/// # Hours since start of day:
559/// hours <- time %/% 100 + 24*(day - day[1]) + (time %% 100)/60
560/// plot (hours, temp, type = "l", ...,
561/// main = paste(nam, "body temperature"))
562/// abline(h = 37.5, col = "gray", lty = 2)
563/// is.act <- activ == 1
564/// points(hours[is.act], temp[is.act], col = 2, cex = .8)
565///  })
566/// }
567/// op <- par(mfrow = c(2, 1), mar = c(3, 3, 4, 2), mgp = 0.9 * 2:0)
568/// beaver.plot(beaver1, ylim = yl)
569/// beaver.plot(beaver2, ylim = yl)
570/// par(op)
571/// ```
572pub fn beaver2() -> PolarsResult<DataFrame> {
573    CsvReader::new(Cursor::new(include_str!("beaver2.csv"))).finish()
574}
575
576/// # Sales Data with Leading Indicator
577///
578/// ## Description:
579///
580/// The sales time series ‘BJsales’ and leading indicator
581/// ‘BJsales.lead’ each contain 150 observations.  The objects are of
582/// class ‘"ts"’.
583///
584/// ## Usage:
585///
586/// BJsales
587/// BJsales.lead
588///
589/// ## Source:
590///
591/// The data are given in Box & Jenkins (1976).  Obtained from the
592/// Time Series Data Library at <https://robjhyndman.com/TSDL/>
593///
594/// ## References:
595///
596/// G. E. P. Box and G. M. Jenkins (1976): _Time Series Analysis,
597/// Forecasting and Control_, Holden-Day, San Francisco, p. 537.
598///
599/// P. J. Brockwell and R. A. Davis (1991): _Time Series: Theory and
600/// Methods_, Second edition, Springer Verlag, NY, pp. 414.
601pub fn bjsales() -> PolarsResult<DataFrame> {
602    CsvReader::new(Cursor::new(include_str!("BJsales.csv"))).finish()
603}
604
605/// # Sales Data with Leading Indicator
606///
607/// ## Description:
608///
609/// The sales time series ‘BJsales’ and leading indicator
610/// ‘BJsales.lead’ each contain 150 observations.  The objects are of
611/// class ‘"ts"’.
612///
613/// ## Usage:
614///
615/// BJsales
616/// BJsales.lead
617///
618/// ## Source:
619///
620/// The data are given in Box & Jenkins (1976).  Obtained from the
621/// Time Series Data Library at <https://robjhyndman.com/TSDL/>
622///
623/// ## References:
624///
625/// G. E. P. Box and G. M. Jenkins (1976): _Time Series Analysis,
626/// Forecasting and Control_, Holden-Day, San Francisco, p. 537.
627///
628/// P. J. Brockwell and R. A. Davis (1991): _Time Series: Theory and
629/// Methods_, Second edition, Springer Verlag, NY, pp. 414.
630pub fn bjsales_lead() -> PolarsResult<DataFrame> {
631    CsvReader::new(Cursor::new(include_str!("BJsales.lead.csv"))).finish()
632}
633
634/// # Biochemical Oxygen Demand
635///
636/// ## Description:
637///
638/// The ‘BOD’ data frame has 6 rows and 2 columns giving the
639/// biochemical oxygen demand versus time in an evaluation of water
640/// quality.
641///
642/// ## Usage:
643///
644/// BOD
645///
646/// ## Format:
647///
648/// This data frame contains the following columns:
649///
650/// * ‘Time’ A numeric vector giving the time of the measurement (days).
651/// * ‘demand’ A numeric vector giving the biochemical oxygen demand
652/// (mg/l).
653///
654/// ## Source:
655///
656/// Bates, D.M. and Watts, D.G. (1988), _Nonlinear Regression Analysis
657/// and Its Applications_, Wiley, Appendix A1.4.
658///
659/// Originally from Marske (1967), _Biochemical Oxygen Demand Data
660/// Interpretation Using Sum of Squares Surface_ M.Sc. Thesis,
661/// University of Wisconsin - Madison.
662///
663/// ## Examples:
664///
665/// ```r
666/// require(stats)
667/// # simplest form of fitting a first-order model to these data
668/// fm1 <- nls(demand ~ A*(1-exp(-exp(lrc)*Time)), data = BOD,
669///   start = c(A = 20, lrc = log(.35)))
670/// coef(fm1)
671/// fm1
672/// # using the plinear algorithm  (trace o/p differs by platform)
673/// ## IGNORE_RDIFF_BEGIN
674/// fm2 <- nls(demand ~ (1-exp(-exp(lrc)*Time)), data = BOD,
675///   start = c(lrc = log(.35)), algorithm = "plinear", trace = TRUE)
676/// ## IGNORE_RDIFF_END
677/// # using a self-starting model
678/// fm3 <- nls(demand ~ SSasympOrig(Time, A, lrc), data = BOD)
679/// summary(fm3)
680/// ```
681pub fn bod() -> PolarsResult<DataFrame> {
682    CsvReader::new(Cursor::new(include_str!("BOD.csv"))).finish()
683}
684
685/// # Speed and Stopping Distances of Cars
686///
687/// ## Description:
688///
689/// The data give the speed of cars and the distances taken to stop.
690/// Note that the data were recorded in the 1920s.
691///
692/// ## Usage:
693///
694/// cars
695///
696/// ## Format:
697///
698/// A data frame with 50 observations on 2 variables.
699///
700/// * \[,1\]  speed  numeric  Speed (mph)
701/// * \[,2\]  distnumeric  Stopping distance (ft)
702///
703/// ## Source:
704///
705/// Ezekiel, M. (1930) _Methods of Correlation Analysis_.  Wiley.
706///
707/// ## References:
708///
709/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
710///
711/// ## Examples:
712///
713/// ```r
714/// require(stats); require(graphics)
715/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
716///  las = 1)
717/// lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
718/// title(main = "cars data")
719/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
720///  las = 1, log = "xy")
721/// title(main = "cars data (logarithmic scales)")
722/// lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
723/// summary(fm1 <- lm(log(dist) ~ log(speed), data = cars))
724/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
725/// mar = c(4.1, 4.1, 2.1, 1.1))
726/// plot(fm1)
727/// par(opar)
728///
729/// ## An example of polynomial regression
730/// plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
731/// las = 1, xlim = c(0, 25))
732/// d <- seq(0, 25, length.out = 200)
733/// for(degree in 1:4) {
734///  fm <- lm(dist ~ poly(speed, degree), data = cars)
735///  assign(paste("cars", degree, sep = "."), fm)
736///  lines(d, predict(fm, data.frame(speed = d)), col = degree)
737/// }
738/// anova(cars.1, cars.2, cars.3, cars.4)
739/// ```
740pub fn cars() -> PolarsResult<DataFrame> {
741    CsvReader::new(Cursor::new(include_str!("cars.csv"))).finish()
742}
743
744/// # Weight versus age of chicks on different diets
745///
746/// ## Description:
747///
748/// The ‘ChickWeight’ data frame has 578 rows and 4 columns from an
749/// experiment on the effect of diet on early growth of chicks.
750///
751/// ## Usage:
752///
753/// ChickWeight
754///
755/// ## Format:
756///
757/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
758/// "groupedData", "data.frame")’ containing the following columns:
759///
760/// * weight a numeric vector giving the body weight of the chick (gm).
761/// * Time a numeric vector giving the number of days since birth when
762/// the measurement was made.
763/// * Chick an ordered factor with levels ‘18’ < ... < ‘48’ giving a
764/// unique identifier for the chick.  The ordering of the levels
765/// groups chicks on the same diet together and orders them
766/// according to their final weight (lightest to heaviest) within
767/// diet.
768/// * Diet a factor with levels 1, ..., 4 indicating which experimental
769/// diet the chick received.
770///
771/// ## Details:
772///
773/// The body weights of the chicks were measured at birth and every
774/// second day thereafter until day 20.  They were also measured on
775/// day 21.  There were four groups on chicks on different protein
776/// diets.
777///
778/// This dataset was originally part of package ‘nlme’, and that has
779/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
780/// for its grouped-data classes.
781///
782/// ## Source:
783///
784/// Crowder, M. and Hand, D. (1990), _Analysis of Repeated Measures_,
785/// Chapman and Hall (example 5.3)
786///
787/// Hand, D. and Crowder, M. (1996), _Practical Longitudinal Data
788/// Analysis_, Chapman and Hall (table A.2)
789///
790/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
791/// and S-PLUS_, Springer.
792///
793/// ## See Also:
794///
795/// ‘SSlogis’ for models fitted to this dataset.
796///
797/// ## Examples:
798///
799/// ```r
800/// require(graphics)
801/// coplot(weight ~ Time | Chick, data = ChickWeight,
802///  type = "b", show.given = FALSE)
803/// ```
804pub fn chick_weight() -> PolarsResult<DataFrame> {
805    CsvReader::new(Cursor::new(include_str!("ChickWeight.csv"))).finish()
806}
807
808/// # Chicken Weights by Feed Type
809///
810/// ## Description:
811///
812/// An experiment was conducted to measure and compare the
813/// effectiveness of various feed supplements on the growth rate of
814/// chickens.
815///
816/// ## Usage:
817///
818/// chickwts
819///
820/// ## Format:
821///
822/// A data frame with 71 observations on the following 2 variables.
823///
824/// * ‘weight’ a numeric variable giving the chick weight.
825/// * ‘feed’ a factor giving the feed type.
826///
827/// ## Details:
828///
829/// Newly hatched chicks were randomly allocated into six groups, and
830/// each group was given a different feed supplement.  Their weights
831/// in grams after six weeks are given along with feed types.
832///
833/// ## Source:
834///
835/// Anonymous (1948) _Biometrika_, *35*, 214.
836///
837/// ## References:
838///
839/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
840/// Wiley.
841///
842/// ## Examples:
843///
844/// ```r
845/// require(stats); require(graphics)
846/// boxplot(weight ~ feed, data = chickwts, col = "lightgray",
847/// varwidth = TRUE, notch = TRUE, main = "chickwt data",
848/// ylab = "Weight at six weeks (gm)")
849/// anova(fm1 <- lm(weight ~ feed, data = chickwts))
850/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
851/// mar = c(4.1, 4.1, 2.1, 1.1))
852/// plot(fm1)
853/// par(opar)
854/// ```
855pub fn chickwts() -> PolarsResult<DataFrame> {
856    CsvReader::new(Cursor::new(include_str!("chickwts.csv"))).finish()
857}
858
859/// # Mauna Loa Atmospheric CO2 Concentration
860///
861/// ## Description:
862///
863/// Atmospheric concentrations of CO2 are expressed in parts per
864/// million (ppm) and reported in the preliminary 1997 SIO manometric
865/// mole fraction scale.
866///
867/// ## Usage:
868///
869/// co2
870///
871/// ## Format:
872///
873/// A time series of 468 observations; monthly from 1959 to 1997.
874///
875/// ## Details:
876///
877/// The values for February, March and April of 1964 were missing and
878/// have been obtained by interpolating linearly between the values
879/// for January and May of 1964.
880///
881/// ## Source:
882///
883/// Keeling, C. D. and Whorf, T. P., Scripps Institution of
884/// Oceanography (SIO), University of California, La Jolla, California
885/// USA 92093-0220.
886///
887/// <https://scrippsco2.ucsd.edu/data/atmospheric_co2/>.
888///
889/// Note that the data are subject to revision (based on recalibration
890/// of standard gases) by the Scripps institute, and hence may not
891/// agree exactly with the data provided by R.
892///
893/// ## References:
894///
895/// Cleveland, W. S. (1993) _Visualizing Data_.  New Jersey: Summit
896/// Press.
897///
898/// ## Examples:
899///
900/// ```r
901/// require(graphics)
902/// plot(co2, ylab = expression("Atmospheric concentration of CO"[2]),
903///   las = 1)
904/// title(main = "co2 data set")
905/// ```
906pub fn co2_mauna() -> PolarsResult<DataFrame> {
907    CsvReader::new(Cursor::new(include_str!("co2.mauna.csv"))).finish()
908}
909
910/// # Carbon Dioxide Uptake in Grass Plants
911///
912/// ## Description:
913///
914/// The ‘CO2’ data frame has 84 rows and 5 columns of data from an
915/// experiment on the cold tolerance of the grass species _Echinochloa
916/// crus-galli_.
917///
918/// ## Usage:
919///
920/// CO2
921///
922/// ## Format:
923///
924/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
925/// "groupedData", "data.frame")’ containing the following columns:
926///
927/// * Plant an ordered factor with levels ‘Qn1’ < ‘Qn2’ < ‘Qn3’ < ... <
928/// ‘Mc1’ giving a unique identifier for each plant.
929/// * Type a factor with levels ‘Quebec’ ‘Mississippi’ giving the origin
930/// of the plant
931/// * Treatment a factor with levels ‘nonchilled’ ‘chilled’
932/// * conc a numeric vector of ambient carbon dioxide concentrations
933/// (mL/L).
934/// * uptake a numeric vector of carbon dioxide uptake rates (umol/m^2
935/// sec).
936///
937/// ## Details:
938///
939/// The CO2 uptake of six plants from Quebec and six plants from
940/// Mississippi was measured at several levels of ambient CO2
941/// concentration.  Half the plants of each type were chilled
942/// overnight before the experiment was conducted.
943///
944/// This dataset was originally part of package ‘nlme’, and that has
945/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
946/// for its grouped-data classes.
947///
948/// ## Source:
949///
950/// Potvin, C., Lechowicz, M. J. and Tardif, S. (1990) “The
951/// statistical analysis of ecophysiological response curves obtained
952/// from experiments involving repeated measures”, _Ecology_, *71*,
953/// 1389-1400.
954///
955/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
956/// and S-PLUS_, Springer.
957///
958/// ## Examples:
959///
960/// ```r
961/// require(stats); require(graphics)
962///
963/// coplot(uptake ~ conc | Plant, data = CO2, show.given = FALSE, type = "b")
964/// ## fit the data for the first plant
965/// fm1 <- nls(uptake ~ SSasymp(conc, Asym, lrc, c0),
966/// data = CO2, subset = Plant == "Qn1")
967/// summary(fm1)
968/// ## fit each plant separately
969/// fmlist <- list()
970/// for (pp in levels(CO2$Plant)) {
971///  fmlist[[pp]] <- nls(uptake ~ SSasymp(conc, Asym, lrc, c0),
972/// data = CO2, subset = Plant == pp)
973/// }
974/// ## check the coefficients by plant
975/// print(sapply(fmlist, coef), digits = 3)
976/// ```
977pub fn co2_plants() -> PolarsResult<DataFrame> {
978    CsvReader::new(Cursor::new(include_str!("CO2.plants.csv"))).finish()
979}
980
981/// # Student's 3000 Criminals Data
982///
983/// ## Description:
984///
985/// Data of 3000 male criminals over 20 years old undergoing their
986/// sentences in the chief prisons of England and Wales.
987///
988/// ## Usage:
989///
990/// crimtab
991///
992/// ## Format:
993///
994/// A ‘table’ object of ‘integer’ counts, of dimension 42 * 22 with a
995/// total count, ‘sum(crimtab)’ of 3000.
996///
997/// The 42 ‘rownames’ (‘"9.4"’, ‘"9.5"’, ...)  correspond to midpoints
998/// of intervals of finger lengths whereas the 22 column names
999/// (‘colnames’) (‘"142.24"’, ‘"144.78"’, ...) correspond to (body)
1000/// heights of 3000 criminals, see also below.
1001///
1002/// ## Details:
1003///
1004/// Student is the pseudonym of William Sealy Gosset.  In his 1908
1005/// paper he wrote (on page 13) at the beginning of section VI
1006/// entitled _Practical Test of the forgoing Equations_:
1007///
1008/// “Before I had succeeded in solving my problem analytically, I had
1009/// endeavoured to do so empirically.  The material used was a
1010/// correlation table containing the height and left middle finger
1011/// measurements of 3000 criminals, from a paper by W. R. MacDonell
1012/// (_Biometrika_, Vol. I., p. 219).  The measurements were written
1013/// out on 3000 pieces of cardboard, which were then very thoroughly
1014/// shuffled and drawn at random.  As each card was drawn its numbers
1015/// were written down in a book, which thus contains the measurements
1016/// of 3000 criminals in a random order.  Finally, each consecutive
1017/// set of 4 was taken as a sample-750 in all-and the mean, standard
1018/// deviation, and correlation of each sample determined.  The
1019/// difference between the mean of each sample and the mean of the
1020/// population was then divided by the standard deviation of the
1021/// sample, giving us the _z_ of Section III.”
1022///
1023/// The table is in fact page 216 and not page 219 in MacDonell(1902).
1024/// In the MacDonell table, the middle finger lengths were given in mm
1025/// and the heights in feet/inches intervals, they are both converted
1026/// into cm here.  The midpoints of intervals were used, e.g., where
1027/// MacDonell has 4' 7''9/16 -- 8''9/16, we have 142.24 which is
1028/// 2.54*56 = 2.54*(4' 8'').
1029///
1030/// MacDonell credited the source of data (page 178) as follows: _The
1031/// data on which the memoir is based were obtained, through the
1032/// kindness of Dr Garson, from the Central Metric Office, New
1033/// Scotland Yard..._ He pointed out on page 179 that : _The forms
1034/// were drawn at random from the mass on the office shelves; we are
1035///  therefore dealing with a random sampling._
1036///
1037/// ## Source:
1038///
1039/// <https://pbil.univ-lyon1.fr/R/donnees/criminals1902.txt> thanks to
1040/// Jean R. Lobry and Anne-Béatrice Dufour.
1041///
1042/// ## References:
1043///
1044/// Garson, J.G. (1900).  The metric system of identification of
1045/// criminals, as used in Great Britain and Ireland.  _The Journal of
1046/// the Anthropological Institute of Great Britain and Ireland_, *30*,
1047/// 161-198.  doi:10.2307/2842627 <https://doi.org/10.2307/2842627>.
1048///
1049/// MacDonell, W.R. (1902).  On criminal anthropometry and the
1050/// identification of criminals.  _Biometrika_, *1*(2), 177-227.
1051/// doi:10.2307/2331487 <https://doi.org/10.2307/2331487>.
1052///
1053/// Student (1908).  The probable error of a mean.  _Biometrika_, *6*,
1054/// 1-25.  doi:10.2307/2331554 <https://doi.org/10.2307/2331554>.
1055///
1056/// ## Examples:
1057///
1058/// ```r
1059/// require(stats)
1060/// dim(crimtab)
1061/// utils::str(crimtab)
1062/// ## for nicer printing:
1063/// local({cT <- crimtab
1064///  colnames(cT) <- substring(colnames(cT), 2, 3)
1065///  print(cT, zero.print = " ")
1066/// })
1067///
1068/// ## Repeat Student's experiment:
1069///
1070/// # 1) Reconstitute 3000 raw data for heights in inches and rounded to
1071/// # nearest integer as in Student's paper:
1072///
1073/// (heIn <- round(as.numeric(colnames(crimtab)) / 2.54))
1074/// d.hei <- data.frame(height = rep(heIn, colSums(crimtab)))
1075///
1076/// # 2) shuffle the data:
1077///
1078/// set.seed(1)
1079/// d.hei <- d.hei[sample(1:3000), , drop = FALSE]
1080///
1081/// # 3) Make 750 samples each of size 4:
1082///
1083/// d.hei$sample <- as.factor(rep(1:750, each = 4))
1084///
1085/// # 4) Compute the means and standard deviations (n) for the 750 samples:
1086///
1087/// h.mean <- with(d.hei, tapply(height, sample, FUN = mean))
1088/// h.sd<- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4)
1089///
1090/// # 5) Compute the difference between the mean of each sample and
1091/// # the mean of the population and then divide by the
1092/// # standard deviation of the sample:
1093///
1094/// zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
1095///
1096/// # 6) Replace infinite values by +/- 6 as in Student's paper:
1097///
1098/// zobs[infZ <- is.infinite(zobs)] # none of them
1099/// zobs[infZ] <- 6 * sign(zobs[infZ])
1100///
1101/// # 7) Plot the distribution:
1102///
1103/// require(grDevices); require(graphics)
1104/// hist(x = zobs, probability = TRUE, xlab = "Student's z",
1105///  col = grey(0.8), border = grey(0.5),
1106///  main = "Distribution of Student's z score  for 'crimtab' data")
1107/// ```
1108pub fn crimtab() -> PolarsResult<DataFrame> {
1109    CsvReader::new(Cursor::new(include_str!("crimtab.csv"))).finish()
1110}
1111
1112/// # Yearly Numbers of Important Discoveries
1113///
1114/// ## Description:
1115///
1116/// The numbers of “great” inventions and scientific discoveries in
1117/// each year from 1860 to 1959.
1118///
1119/// ## Usage:
1120///
1121/// discoveries
1122///
1123/// ## Format:
1124///
1125/// A time series of 100 values.
1126///
1127/// ## Source:
1128///
1129/// The World Almanac and Book of Facts, 1975 Edition, pages 315-318.
1130///
1131/// ## References:
1132///
1133/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
1134///
1135/// ## Examples:
1136///
1137/// ```r
1138/// require(graphics)
1139/// plot(discoveries, ylab = "Number of important discoveries",
1140///  las = 1)
1141/// title(main = "discoveries data set")
1142/// ```
1143pub fn discoveries() -> PolarsResult<DataFrame> {
1144    CsvReader::new(Cursor::new(include_str!("discoveries.csv"))).finish()
1145}
1146
1147/// # Elisa assay of DNase
1148///
1149/// ## Description:
1150///
1151/// The ‘DNase’ data frame has 176 rows and 3 columns of data obtained
1152/// during development of an ELISA assay for the recombinant protein
1153/// DNase in rat serum.
1154///
1155/// ## Usage:
1156///
1157/// DNase
1158///
1159/// ## Format:
1160///
1161/// * An object of class ‘c("nfnGroupedData", "nfGroupedData",
1162/// "groupedData", "data.frame")’ containing the following columns:
1163/// * Run an ordered factor with levels ‘10’ < ... < ‘3’ indicating the
1164/// assay run.
1165/// * conc a numeric vector giving the known concentration of the
1166/// protein.
1167/// * density a numeric vector giving the measured optical density
1168/// (dimensionless) in the assay.  Duplicate optical density
1169/// measurements were obtained.
1170///
1171/// ## Details:
1172///
1173/// This dataset was originally part of package ‘nlme’, and that has
1174/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
1175/// for its grouped-data classes.
1176///
1177/// ## Source:
1178///
1179/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
1180/// Repeated Measurement Data_, Chapman & Hall (section 5.2.4, p. 134)
1181///
1182/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
1183/// and S-PLUS_, Springer.
1184///
1185/// ## Examples:
1186///
1187/// ```r
1188/// require(stats); require(graphics)
1189///
1190/// coplot(density ~ conc | Run, data = DNase,
1191/// show.given = FALSE, type = "b")
1192/// coplot(density ~ log(conc) | Run, data = DNase,
1193/// show.given = FALSE, type = "b")
1194/// ## fit a representative run
1195/// fm1 <- nls(density ~ SSlogis( log(conc), Asym, xmid, scal ),
1196/// data = DNase, subset = Run == 1)
1197/// ## compare with a four-parameter logistic
1198/// fm2 <- nls(density ~ SSfpl( log(conc), A, B, xmid, scal ),
1199/// data = DNase, subset = Run == 1)
1200/// summary(fm2)
1201/// anova(fm1, fm2)
1202/// ```
1203pub fn dnase() -> PolarsResult<DataFrame> {
1204    CsvReader::new(Cursor::new(include_str!("DNase.csv"))).finish()
1205}
1206
1207/// # Smoking, Alcohol and (O)esophageal Cancer
1208///
1209/// ## Description:
1210///
1211/// Data from a case-control study of (o)esophageal cancer in
1212/// Ille-et-Vilaine, France.
1213///
1214/// ## Usage:
1215///
1216/// esoph
1217///
1218/// ## Format:
1219///
1220/// A data frame with records for 88 age/alcohol/tobacco combinations.
1221///
1222/// * \[,1\]  "agegp"Age group1  25-34 years
1223///     * 2  35-44
1224///     * 3  45-54
1225///     * 4  55-64
1226///     * 5  65-74
1227///     * 6  75+
1228/// * \[,2\]  "alcgp"Alcohol consumption  10-39 gm/day
1229///     * 2  40-79
1230///     * 3  80-119
1231///     * 4  120+
1232/// * \[,3\]  "tobgp"Tobacco consumption  10- 9 gm/day
1233///     * 2  10-19
1234///     * 3  20-29
1235///     * 4  30+
1236/// * \[,4\]  "ncases"  Number of cases
1237/// * \[,5\]  "ncontrols"  Number of controls
1238///
1239/// ## Author(s):
1240///
1241/// Thomas Lumley
1242///
1243/// ## Source:
1244///
1245/// Breslow, N. E. and Day, N. E. (1980) _Statistical Methods in
1246/// Cancer Research. Volume 1: The Analysis of Case-Control Studies._
1247/// IARC Lyon / Oxford University Press.
1248///
1249/// ## Examples:
1250///
1251/// ```r
1252/// require(stats)
1253/// require(graphics) # for mosaicplot
1254/// summary(esoph)
1255/// ## effects of alcohol, tobacco and interaction, age-adjusted
1256/// model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
1257///  data = esoph, family = binomial())
1258/// anova(model1)
1259/// ## Try a linear effect of alcohol and tobacco
1260/// model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp)
1261///  + unclass(alcgp),
1262///  data = esoph, family = binomial())
1263/// summary(model2)
1264/// ## Re-arrange data for a mosaic plot
1265/// ttt <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
1266/// o <- with(esoph, order(tobgp, alcgp, agegp))
1267/// ttt[ttt == 1] <- esoph$ncases[o]
1268/// tt1 <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
1269/// tt1[tt1 == 1] <- esoph$ncontrols[o]
1270/// tt <- array(c(ttt, tt1), c(dim(ttt),2),
1271/// c(dimnames(ttt), list(c("Cancer", "control"))))
1272/// mosaicplot(tt, main = "esoph data set", color = TRUE)
1273/// ```
1274pub fn esoph() -> PolarsResult<DataFrame> {
1275    CsvReader::new(Cursor::new(include_str!("esoph.csv"))).finish()
1276}
1277
1278/// # Conversion Rates of Euro Currencies
1279///
1280/// ## Description:
1281///
1282/// Conversion rates between the various Euro currencies.
1283///
1284/// ## Usage:
1285///
1286/// euro
1287/// euro.cross
1288///
1289/// ## Format:
1290///
1291/// ‘euro’ is a named vector of length 11, ‘euro.cross’ a matrix of
1292/// size 11 by 11, with dimnames.
1293///
1294/// ## Details:
1295///
1296/// The data set ‘euro’ contains the value of 1 Euro in all currencies
1297/// participating in the European monetary union (Austrian Schilling
1298/// ATS, Belgian Franc BEF, German Mark DEM, Spanish Peseta ESP,
1299/// Finnish Markka FIM, French Franc FRF, Irish Punt IEP, Italian Lira
1300/// ITL, Luxembourg Franc LUF, Dutch Guilder NLG and Portuguese Escudo
1301/// PTE).  These conversion rates were fixed by the European Union on
1302/// December 31, 1998.  To convert old prices to Euro prices, divide
1303/// by the respective rate and round to 2 digits.
1304///
1305/// The data set ‘euro.cross’ contains conversion rates between the
1306/// various Euro currencies, i.e., the result of ‘outer(1 / euro,
1307/// euro)’.
1308///
1309/// ## Examples:
1310///
1311/// ```r
1312/// cbind(euro)
1313///
1314/// ## These relations hold:
1315/// euro == signif(euro, 6) # [6 digit precision in Euro's definition]
1316/// all(euro.cross == outer(1/euro, euro))
1317///
1318/// ## Convert 20 Euro to Belgian Franc
1319/// 20 * euro["BEF"]
1320/// ## Convert 20 Austrian Schilling to Euro
1321/// 20 / euro["ATS"]
1322/// ## Convert 20 Spanish Pesetas to Italian Lira
1323/// 20 * euro.cross["ESP", "ITL"]
1324///
1325/// require(graphics)
1326/// dotchart(euro,
1327/// main = "euro data: 1 Euro in currency unit")
1328/// dotchart(1/euro,
1329/// main = "euro data: 1 currency unit in Euros")
1330/// dotchart(log(euro, 10),
1331/// main = "euro data: log10(1 Euro in currency unit)")
1332/// ```
1333pub fn euro_cross() -> PolarsResult<DataFrame> {
1334    CsvReader::new(Cursor::new(include_str!("euro.cross.csv"))).finish()
1335}
1336
1337/// # Conversion Rates of Euro Currencies
1338///
1339/// ## Description:
1340///
1341/// Conversion rates between the various Euro currencies.
1342///
1343/// ## Usage:
1344///
1345/// euro
1346/// euro.cross
1347///
1348/// ## Format:
1349///
1350/// ‘euro’ is a named vector of length 11, ‘euro.cross’ a matrix of
1351/// size 11 by 11, with dimnames.
1352///
1353/// ## Details:
1354///
1355/// The data set ‘euro’ contains the value of 1 Euro in all currencies
1356/// participating in the European monetary union (Austrian Schilling
1357/// ATS, Belgian Franc BEF, German Mark DEM, Spanish Peseta ESP,
1358/// Finnish Markka FIM, French Franc FRF, Irish Punt IEP, Italian Lira
1359/// ITL, Luxembourg Franc LUF, Dutch Guilder NLG and Portuguese Escudo
1360/// PTE).  These conversion rates were fixed by the European Union on
1361/// December 31, 1998.  To convert old prices to Euro prices, divide
1362/// by the respective rate and round to 2 digits.
1363///
1364/// The data set ‘euro.cross’ contains conversion rates between the
1365/// various Euro currencies, i.e., the result of ‘outer(1 / euro,
1366/// euro)’.
1367///
1368/// ## Examples:
1369///
1370/// ```r
1371/// cbind(euro)
1372///
1373/// ## These relations hold:
1374/// euro == signif(euro, 6) # [6 digit precision in Euro's definition]
1375/// all(euro.cross == outer(1/euro, euro))
1376///
1377/// ## Convert 20 Euro to Belgian Franc
1378/// 20 * euro["BEF"]
1379/// ## Convert 20 Austrian Schilling to Euro
1380/// 20 / euro["ATS"]
1381/// ## Convert 20 Spanish Pesetas to Italian Lira
1382/// 20 * euro.cross["ESP", "ITL"]
1383///
1384/// require(graphics)
1385/// dotchart(euro,
1386/// main = "euro data: 1 Euro in currency unit")
1387/// dotchart(1/euro,
1388/// main = "euro data: 1 currency unit in Euros")
1389/// dotchart(log(euro, 10),
1390/// main = "euro data: log10(1 Euro in currency unit)")
1391/// ```
1392pub fn euro() -> PolarsResult<DataFrame> {
1393    CsvReader::new(Cursor::new(include_str!("euro.csv"))).finish()
1394}
1395
1396/// # Distances Between European Cities and Between US Cities
1397///
1398/// ## Description:
1399///
1400/// The ‘eurodist’ gives the road distances (in km) between 21 cities
1401/// in Europe.  The data are taken from a table in _The Cambridge
1402/// Encyclopaedia_.
1403///
1404/// ‘UScitiesD’ gives “straight line” distances between 10 cities in
1405/// the US.
1406///
1407/// ## Usage:
1408///
1409/// eurodist
1410/// UScitiesD
1411///
1412/// ## Format:
1413///
1414/// ‘dist’ objects based on 21 and 10 objects, respectively.  (You
1415/// must have the ‘stats’ package loaded to have the methods for this
1416/// kind of object available).
1417///
1418/// ## Source:
1419///
1420/// Crystal, D. Ed. (1990) _The Cambridge Encyclopaedia_.  Cambridge:
1421/// Cambridge University Press,
1422///
1423/// The US cities distances were provided by Pierre Legendre.
1424pub fn eurodist() -> PolarsResult<DataFrame> {
1425    CsvReader::new(Cursor::new(include_str!("eurodist.csv"))).finish()
1426}
1427
1428/// # Daily Closing Prices of Major European Stock Indices, 1991-1998
1429///
1430/// ## Description:
1431///
1432/// Contains the daily closing prices of major European stock indices:
1433/// Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE.  The
1434/// data are sampled in business time, i.e., weekends and holidays are
1435/// omitted.
1436///
1437/// ## Usage:
1438///
1439/// EuStockMarkets
1440///
1441/// ## Format:
1442///
1443/// A multivariate time series with 1860 observations on 4 variables.
1444/// The object is of class ‘"mts"’.
1445///
1446/// ## Source:
1447///
1448/// The data were kindly provided by Erste Bank AG, Vienna, Austria.
1449pub fn eu_stock_markets() -> PolarsResult<DataFrame> {
1450    CsvReader::new(Cursor::new(include_str!("EuStockMarkets.csv"))).finish()
1451}
1452
1453/// # Old Faithful Geyser Data
1454///
1455/// ## Description:
1456///
1457/// Waiting time between eruptions and the duration of the eruption
1458/// for the Old Faithful geyser in Yellowstone National Park, Wyoming,
1459/// USA.
1460///
1461/// ## Usage:
1462///
1463/// faithful
1464///
1465/// ## Format:
1466///
1467/// A data frame with 272 observations on 2 variables.
1468///
1469/// * \[,1\]  eruptions  numeric  Eruption time in mins
1470/// * \[,2\]  waiting numeric  Waiting time to next
1471/// eruption (in mins)
1472///
1473/// ## Details:
1474///
1475/// A closer look at ‘faithful$eruptions’ reveals that these are
1476/// heavily rounded times originally in seconds, where multiples of 5
1477/// are more frequent than expected under non-human measurement.  For
1478/// a better version of the eruption times, see the example below.
1479///
1480/// There are many versions of this dataset around: Azzalini and
1481/// Bowman (1990) use a more complete version.
1482///
1483/// ## Source:
1484///
1485/// W. Härdle.
1486///
1487/// ## References:
1488///
1489/// Härdle, W. (1991).  _Smoothing Techniques with Implementation in
1490/// S_.  New York: Springer.
1491///
1492/// Azzalini, A. and Bowman, A. W. (1990).  A look at some data on the
1493/// Old Faithful geyser.  _Applied Statistics_, *39*, 357-365.
1494/// doi:10.2307/2347385 <https://doi.org/10.2307/2347385>.
1495///
1496/// ## See Also:
1497///
1498/// ‘geyser’ in package ‘MASS’ for the Azzalini-Bowman version.
1499///
1500/// ## Examples:
1501///
1502/// ```r
1503/// require(stats); require(graphics)
1504/// f.tit <-  "faithful data: Eruptions of Old Faithful"
1505///
1506/// ne60 <- round(e60 <- 60 * faithful$eruptions)
1507/// all.equal(e60, ne60) # relative diff. ~ 1/10000
1508/// table(zapsmall(abs(e60 - ne60))) # 0, 0.02 or 0.04
1509/// faithful$better.eruptions <- ne60 / 60
1510/// te <- table(ne60)
1511/// te[te >= 4] # (too) many multiples of 5 !
1512/// plot(names(te), te, type = "h", main = f.tit, xlab = "Eruption time (sec)")
1513///
1514/// plot(faithful[, -3], main = f.tit,
1515///  xlab = "Eruption time (min)",
1516///  ylab = "Waiting time to next eruption (min)")
1517/// lines(lowess(faithful$eruptions, faithful$waiting, f = 2/3, iter = 3),
1518/// col = "red")
1519/// ```
1520pub fn faithful() -> PolarsResult<DataFrame> {
1521    CsvReader::new(Cursor::new(include_str!("faithful.csv"))).finish()
1522}
1523
1524/// # Monthly Deaths from Lung Diseases in the UK
1525///
1526/// ## Description:
1527///
1528/// Three time series giving the monthly deaths from bronchitis,
1529/// emphysema and asthma in the UK, 1974-1979, both sexes (‘ldeaths’),
1530/// males (‘mdeaths’) and females (‘fdeaths’).
1531///
1532/// ## Usage:
1533///
1534/// ldeaths
1535/// fdeaths
1536/// mdeaths
1537///
1538/// ## Source:
1539///
1540/// P. J. Diggle (1990) _Time Series: A Biostatistical Introduction._
1541/// Oxford, table A.3
1542///
1543/// ## Examples:
1544///
1545/// ```r
1546/// require(stats); require(graphics) # for time
1547/// plot(ldeaths)
1548/// plot(mdeaths, fdeaths)
1549/// ## Better labels:
1550/// yr <- floor(tt <- time(mdeaths))
1551/// plot(mdeaths, fdeaths,
1552///  xy.labels = paste(month.abb[12*(tt - yr)], yr-1900, sep = "'"))
1553/// ```
1554pub fn fdeaths() -> PolarsResult<DataFrame> {
1555    CsvReader::new(Cursor::new(include_str!("fdeaths.csv"))).finish()
1556}
1557
1558/// # Determination of Formaldehyde
1559///
1560/// ## Description:
1561///
1562/// These data are from a chemical experiment to prepare a standard
1563/// curve for the determination of formaldehyde by the addition of
1564/// chromatropic acid and concentrated sulphuric acid and the reading
1565/// of the resulting purple color on a spectrophotometer.
1566///
1567/// ## Usage:
1568///
1569/// Formaldehyde
1570///
1571/// ## Format:
1572///
1573/// A data frame with 6 observations on 2 variables.
1574///
1575/// * \[,1\]  carb numeric  Carbohydrate (ml)
1576/// * \[,2\]  optden  numeric  Optical Density
1577///
1578/// ## Source:
1579///
1580/// Bennett, N. A. and N. L. Franklin (1954) _Statistical Analysis in
1581/// Chemistry and the Chemical Industry_.  New York: Wiley.
1582///
1583/// ## References:
1584///
1585/// McNeil, D. R. (1977) _Interactive Data Analysis._ New York: Wiley.
1586///
1587/// ## Examples:
1588///
1589/// ```r
1590/// require(stats); require(graphics)
1591/// plot(optden ~ carb, data = Formaldehyde,
1592///  xlab = "Carbohydrate (ml)", ylab = "Optical Density",
1593///  main = "Formaldehyde data", col = 4, las = 1)
1594/// abline(fm1 <- lm(optden ~ carb, data = Formaldehyde))
1595/// summary(fm1)
1596/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
1597/// plot(fm1)
1598/// par(opar)
1599/// ```
1600pub fn formaldehyde() -> PolarsResult<DataFrame> {
1601    CsvReader::new(Cursor::new(include_str!("Formaldehyde.csv"))).finish()
1602}
1603
1604/// # Freeny's Revenue Data
1605///
1606/// ## Description:
1607///
1608/// Freeny's data on quarterly revenue and explanatory variables.
1609///
1610/// ## Usage:
1611///
1612/// freeny
1613/// freeny.x
1614/// freeny.y
1615///
1616/// ## Format:
1617///
1618/// There are three ‘freeny’ data sets.
1619///
1620/// * ‘freeny.y’ is a time series with 39 observations on quarterly
1621/// revenue from (1962,2Q) to (1971,4Q).
1622/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
1623/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
1624/// potential.
1625/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
1626/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
1627/// ‘market.potential’ obtained from the above two data objects.
1628///
1629/// ## Source:
1630///
1631/// A. E. Freeny (1977) _A Portable Linear Regression Package with
1632/// Test Programs_.  Bell Laboratories memorandum.
1633///
1634/// ## References:
1635///
1636/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
1637/// Language_.  Wadsworth & Brooks/Cole.
1638///
1639/// ## Examples:
1640///
1641/// ```r
1642/// require(stats); require(graphics)
1643/// summary(freeny)
1644/// pairs(freeny, main = "freeny data")
1645/// # gives warning: freeny$y has class "ts"
1646///
1647/// summary(fm1 <- lm(y ~ ., data = freeny))
1648/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
1649/// mar = c(4.1, 4.1, 2.1, 1.1))
1650/// plot(fm1)
1651/// par(opar)
1652/// ```
1653pub fn freeny() -> PolarsResult<DataFrame> {
1654    CsvReader::new(Cursor::new(include_str!("freeny.csv"))).finish()
1655}
1656
1657/// # Freeny's Revenue Data
1658///
1659/// ## Description:
1660///
1661/// Freeny's data on quarterly revenue and explanatory variables.
1662///
1663/// ## Usage:
1664///
1665/// freeny
1666/// freeny.x
1667/// freeny.y
1668///
1669/// ## Format:
1670///
1671/// There are three ‘freeny’ data sets.
1672///
1673/// * ‘freeny.y’ is a time series with 39 observations on quarterly
1674/// revenue from (1962,2Q) to (1971,4Q).
1675/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
1676/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
1677/// potential.
1678/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
1679/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
1680/// ‘market.potential’ obtained from the above two data objects.
1681///
1682/// ## Source:
1683///
1684/// A. E. Freeny (1977) _A Portable Linear Regression Package with
1685/// Test Programs_.  Bell Laboratories memorandum.
1686///
1687/// ## References:
1688///
1689/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
1690/// Language_.  Wadsworth & Brooks/Cole.
1691///
1692/// ## Examples:
1693///
1694/// ```r
1695/// require(stats); require(graphics)
1696/// summary(freeny)
1697/// pairs(freeny, main = "freeny data")
1698/// # gives warning: freeny$y has class "ts"
1699///
1700/// summary(fm1 <- lm(y ~ ., data = freeny))
1701/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
1702/// mar = c(4.1, 4.1, 2.1, 1.1))
1703/// plot(fm1)
1704/// par(opar)
1705/// ```
1706pub fn freeny_x() -> PolarsResult<DataFrame> {
1707    CsvReader::new(Cursor::new(include_str!("freeny.x.csv"))).finish()
1708}
1709
1710/// # Freeny's Revenue Data
1711///
1712/// ## Description:
1713///
1714/// Freeny's data on quarterly revenue and explanatory variables.
1715///
1716/// ## Usage:
1717///
1718/// freeny
1719/// freeny.x
1720/// freeny.y
1721///
1722/// ## Format:
1723///
1724/// There are three ‘freeny’ data sets.
1725///
1726/// * ‘freeny.y’ is a time series with 39 observations on quarterly
1727/// revenue from (1962,2Q) to (1971,4Q).
1728/// * ‘freeny.x’ is a matrix of explanatory variables.  The columns are
1729/// * ‘freeny.y’ lagged 1 quarter, price index, income level, and market
1730/// potential.
1731/// * Finally, ‘freeny’ is a data frame with variables ‘y’,
1732/// ‘lag.quarterly.revenue’, ‘price.index’, ‘income.level’, and
1733/// ‘market.potential’ obtained from the above two data objects.
1734///
1735/// ## Source:
1736///
1737/// A. E. Freeny (1977) _A Portable Linear Regression Package with
1738/// Test Programs_.  Bell Laboratories memorandum.
1739///
1740/// ## References:
1741///
1742/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
1743/// Language_.  Wadsworth & Brooks/Cole.
1744///
1745/// ## Examples:
1746///
1747/// ```r
1748/// require(stats); require(graphics)
1749/// summary(freeny)
1750/// pairs(freeny, main = "freeny data")
1751/// # gives warning: freeny$y has class "ts"
1752///
1753/// summary(fm1 <- lm(y ~ ., data = freeny))
1754/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
1755/// mar = c(4.1, 4.1, 2.1, 1.1))
1756/// plot(fm1)
1757/// par(opar)
1758/// ```
1759pub fn freeny_y() -> PolarsResult<DataFrame> {
1760    CsvReader::new(Cursor::new(include_str!("freeny.y.csv"))).finish()
1761}
1762
1763/// # Hair and Eye Color of Statistics Students
1764///
1765/// ## Description:
1766///
1767/// Distribution of hair and eye color and sex in 592 statistics
1768/// students.
1769///
1770/// ## Usage:
1771///
1772/// HairEyeColor
1773///
1774/// ## Format:
1775///
1776/// A 3-dimensional array resulting from cross-tabulating 592
1777/// observations on 3 variables.  The variables and their levels are
1778/// as follows:
1779///
1780/// | No | Name  | Levels                    |
1781/// |----|-------|---------------------------|
1782/// | 1  | Hair  | Black, Brown, Red, Blond  |
1783/// | 2  | Eye   | Brown, Blue, Hazel, Green |
1784/// | 3  | Sex   | Male, Female              |
1785///
1786/// ## Details:
1787///
1788/// The Hair x Eye table comes from a survey of students at the
1789/// University of Delaware reported by Snee (1974).  The split by
1790/// ‘Sex’ was added by Friendly (1992a) for didactic purposes.
1791///
1792/// This data set is useful for illustrating various techniques for
1793/// the analysis of contingency tables, such as the standard
1794/// chi-squared test or, more generally, log-linear modelling, and
1795/// graphical methods such as mosaic plots, sieve diagrams or
1796/// association plots.
1797///
1798/// ## Source:
1799///
1800/// <http://www.datavis.ca/sas/vcd/catdata/haireye.sas>
1801///
1802/// Snee (1974) gives the two-way table aggregated over ‘Sex’.  The
1803/// ‘Sex’ split of the ‘Brown hair, Brown eye’ cell was changed to
1804/// agree with that used by Friendly (2000).
1805///
1806/// ## References:
1807///
1808/// Snee, R. D. (1974).  Graphical display of two-way contingency
1809/// tables.  _The American Statistician_, *28*, 9-12.
1810/// doi:10.2307/2683520 <https://doi.org/10.2307/2683520>.
1811///
1812/// Friendly, M. (1992a).  Graphical methods for categorical data.
1813/// _SAS User Group International Conference Proceedings_, *17*,
1814/// 190-200.  <http://datavis.ca/papers/sugi/sugi17.pdf>
1815///
1816/// Friendly, M. (1992b).  Mosaic displays for loglinear models.
1817/// _Proceedings of the Statistical Graphics Section_, American
1818/// Statistical Association, pp. 61-68.
1819/// <http://www.datavis.ca/papers/asa92.html>
1820///
1821/// Friendly, M. (2000).  _Visualizing Categorical Data_.  SAS
1822/// Institute, ISBN 1-58025-660-0.
1823///
1824/// ## See Also:
1825///
1826/// ‘chisq.test’, ‘loglin’, ‘mosaicplot’
1827///
1828/// ## Examples:
1829///
1830/// ```r
1831/// require(graphics)
1832/// ## Full mosaic
1833/// mosaicplot(HairEyeColor)
1834/// ## Aggregate over sex (as in Snee's original data)
1835/// x <- apply(HairEyeColor, c(1, 2), sum)
1836/// x
1837/// mosaicplot(x, main = "Relation between hair and eye color")
1838/// ```
1839pub fn hair_eye_color() -> PolarsResult<DataFrame> {
1840    CsvReader::new(Cursor::new(include_str!("HairEyeColor.csv"))).finish()
1841}
1842
1843/// # Harman Example 2.3
1844///
1845/// ## Description:
1846///
1847/// A correlation matrix of eight physical measurements on 305 girls
1848/// between ages seven and seventeen.
1849///
1850/// ## Usage:
1851///
1852/// Harman23.cor
1853///
1854/// ## Source:
1855///
1856/// Harman, H. H. (1976) _Modern Factor Analysis_, Third Edition
1857/// Revised, University of Chicago Press, Table 2.3.
1858///
1859/// ## Examples:
1860///
1861/// ```r
1862/// require(stats)
1863/// (Harman23.FA <- factanal(factors = 1, covmat = Harman23.cor))
1864/// for(factors in 2:4) print(update(Harman23.FA, factors = factors))
1865/// ```
1866pub fn harman23_cor() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
1867    let mut center = Vec::new();
1868    let mut n_obs = 0;
1869    CsvReader::new(Cursor::new(include_str!("Harman23.cor.center.csv")))
1870        .finish()?
1871        .column("x")?
1872        .cast(&DataType::Float64)?
1873        .f64()?
1874        .for_each(|d| center.push(d.unwrap() as usize));
1875    CsvReader::new(Cursor::new(include_str!("Harman23.cor.n.obs.csv")))
1876        .finish()?
1877        .column("x")?
1878        .cast(&DataType::Float64)?
1879        .f64()?
1880        .for_each(|d| n_obs = d.unwrap() as usize);
1881    Ok((
1882        CsvReader::new(Cursor::new(include_str!("Harman23.cor.cov.csv"))).finish()?,
1883        center,
1884        n_obs,
1885    ))
1886}
1887
1888/// # Harman Example 7.4
1889///
1890/// ## Description:
1891///
1892/// A correlation matrix of 24 psychological tests given to 145
1893/// seventh and eight-grade children in a Chicago suburb by Holzinger
1894/// and Swineford.
1895///
1896/// ## Usage:
1897///
1898/// Harman74.cor
1899///
1900/// ## Source:
1901///
1902/// Harman, H. H. (1976) _Modern Factor Analysis_, Third Edition
1903/// Revised, University of Chicago Press, Table 7.4.
1904///
1905/// ## Examples:
1906///
1907/// ```r
1908/// require(stats)
1909/// (Harman74.FA <- factanal(factors = 1, covmat = Harman74.cor))
1910/// for(factors in 2:5) print(update(Harman74.FA, factors = factors))
1911/// Harman74.FA <- factanal(factors = 5, covmat = Harman74.cor,
1912/// rotation = "promax")
1913/// print(Harman74.FA$loadings, sort = TRUE)
1914/// ```
1915pub fn harman74() -> PolarsResult<(DataFrame, Vec<usize>, usize)> {
1916    let mut center = Vec::new();
1917    let mut n_obs = 0;
1918    CsvReader::new(Cursor::new(include_str!("Harman74.cor.center.csv")))
1919        .finish()?
1920        .column("x")?
1921        .cast(&DataType::Float64)?
1922        .f64()?
1923        .for_each(|d| center.push(d.unwrap() as usize));
1924    CsvReader::new(Cursor::new(include_str!("Harman74.cor.n.obs.csv")))
1925        .finish()?
1926        .column("x")?
1927        .cast(&DataType::Float64)?
1928        .f64()?
1929        .for_each(|d| n_obs = d.unwrap() as usize);
1930    Ok((
1931        CsvReader::new(Cursor::new(include_str!("Harman74.cor.cov.csv"))).finish()?,
1932        center,
1933        n_obs,
1934    ))
1935}
1936
1937/// # Pharmacokinetics of Indomethacin
1938///
1939/// ## Description:
1940///
1941/// The ‘Indometh’ data frame has 66 rows and 3 columns of data on the
1942/// pharmacokinetics of indometacin (or, older spelling,
1943/// ‘indomethacin’).
1944///
1945/// ## Usage:
1946///
1947/// Indometh
1948///
1949/// ## Format:
1950///
1951/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
1952/// "groupedData", "data.frame")’ containing the following columns:
1953///
1954/// * Subject an ordered factor with containing the subject codes.  The
1955/// ordering is according to increasing maximum response.
1956/// * time a numeric vector of times at which blood samples were drawn
1957/// (hr).
1958/// * conc a numeric vector of plasma concentrations of indometacin
1959/// (mcg/ml).
1960///
1961/// ## Details:
1962///
1963/// Each of the six subjects were given an intravenous injection of
1964/// indometacin.
1965///
1966/// This dataset was originally part of package ‘nlme’, and that has
1967/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
1968/// for its grouped-data classes.
1969///
1970/// ## Source:
1971///
1972/// Kwan, Breault, Umbenhauer, McMahon and Duggan (1976) Kinetics of
1973/// Indomethacin absorption, elimination, and enterohepatic
1974/// circulation in man.  _Journal of Pharmacokinetics and
1975/// Biopharmaceutics_ *4*, 255-280.
1976///
1977/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
1978/// Repeated Measurement Data_, Chapman & Hall (section 5.2.4, p. 129)
1979///
1980/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
1981/// and S-PLUS_, Springer.
1982///
1983/// ## See Also:
1984///
1985/// ‘SSbiexp’ for models fitted to this dataset.
1986pub fn indometh() -> PolarsResult<DataFrame> {
1987    CsvReader::new(Cursor::new(include_str!("Indometh.csv"))).finish()
1988}
1989
1990/// # Infertility after Spontaneous and Induced Abortion
1991///
1992/// ## Description:
1993///
1994/// This is a matched case-control study dating from before the
1995/// availability of conditional logistic regression.
1996///
1997/// ## Usage:
1998///
1999/// infert
2000///
2001/// ## Format:
2002///
2003/// 1.  Education
2004///     * 0 = 0-5  years
2005///     * 1 = 6-11 years
2006///     * 2 = 12+  years
2007/// 2.  age  age in years of case
2008/// 3.  parity  count
2009/// 4.  number of prior induced abortions
2010///     * 0 = 0
2011///     * 1 = 1
2012///     * 2 = 2 or more
2013/// 5.  case status
2014///     * 1 = case
2015///     * 0 = control
2016/// 6.  number of prior spontaneous abortions
2017///     * 0 = 0
2018///     * 1 = 1
2019///     * 2 = 2 or more
2020/// 7.  matched set number  1-83
2021/// 8.  stratum number 1-63
2022///
2023/// ## Note:
2024///
2025/// One case with two prior spontaneous abortions and two prior
2026/// induced abortions is omitted.
2027///
2028/// ## Source:
2029///
2030/// Trichopoulos _et al_ (1976) _Br. J. of Obst. and Gynaec._ *83*,
2031/// 645-650.
2032///
2033/// ## Examples:
2034///
2035/// ```r
2036/// require(stats)
2037/// model1 <- glm(case ~ spontaneous+induced, data = infert, family = binomial())
2038/// summary(model1)
2039/// ## adjusted for other potential confounders:
2040/// summary(model2 <- glm(case ~ age+parity+education+spontaneous+induced,
2041/// data = infert, family = binomial()))
2042/// ## Really should be analysed by conditional logistic regression
2043/// ## which is in the survival package
2044/// if(require(survival)){
2045///  model3 <- clogit(case ~ spontaneous+induced+strata(stratum), data = infert)
2046///  print(summary(model3))
2047///  detach()  # survival (conflicts)
2048/// }
2049/// ```
2050pub fn infert() -> PolarsResult<DataFrame> {
2051    CsvReader::new(Cursor::new(include_str!("infert.csv"))).finish()
2052}
2053
2054/// # Effectiveness of Insect Sprays
2055///
2056/// ## Description:
2057///
2058/// The counts of insects in agricultural experimental units treated
2059/// with different insecticides.
2060///
2061/// ## Usage:
2062///
2063/// InsectSprays
2064///
2065/// ## Format:
2066///
2067/// A data frame with 72 observations on 2 variables.
2068///
2069/// * \[,1\]  count  numeric  Insect count
2070/// * \[,2\]  spray  factorThe type of spray
2071///
2072/// ## Source:
2073///
2074/// Beall, G., (1942) The Transformation of data from entomological
2075/// field experiments, _Biometrika_, *29*, 243-262.
2076///
2077/// ## References:
2078///
2079/// McNeil, D. (1977) _Interactive Data Analysis_.  New York: Wiley.
2080///
2081/// ## Examples:
2082///
2083/// ```r
2084/// require(stats); require(graphics)
2085/// boxplot(count ~ spray, data = InsectSprays,
2086///  xlab = "Type of spray", ylab = "Insect count",
2087///  main = "InsectSprays data", varwidth = TRUE, col = "lightgray")
2088/// fm1 <- aov(count ~ spray, data = InsectSprays)
2089/// summary(fm1)
2090/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
2091/// plot(fm1)
2092/// fm2 <- aov(sqrt(count) ~ spray, data = InsectSprays)
2093/// summary(fm2)
2094/// plot(fm2)
2095/// par(opar)
2096/// ```
2097pub fn insect_sprays() -> PolarsResult<DataFrame> {
2098    CsvReader::new(Cursor::new(include_str!("InsectSprays.csv"))).finish()
2099}
2100
2101/// # Edgar Anderson's Iris Data
2102///
2103/// ## Description:
2104///
2105/// This famous (Fisher's or Anderson's) iris data set gives the
2106/// measurements in centimeters of the variables sepal length and
2107/// width and petal length and width, respectively, for 50 flowers
2108/// from each of 3 species of iris.  The species are _Iris setosa_,
2109/// _versicolor_, and _virginica_.
2110///
2111/// ## Usage:
2112///
2113/// iris
2114/// iris3
2115///
2116/// ## Format:
2117///
2118/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
2119/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
2120/// ‘Petal.Width’, and ‘Species’.
2121///
2122/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
2123/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
2124/// gives the case number within the species subsample, the second the
2125/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
2126/// ‘Petal W.’, and the third the species.
2127///
2128/// ## Source:
2129///
2130/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
2131/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
2132///
2133/// The data were collected by Anderson, Edgar (1935).  The irises of
2134/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
2135/// *59*, 2-5.
2136///
2137/// ## References:
2138///
2139/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
2140/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
2141///
2142/// ## See Also:
2143///
2144/// ‘matplot’ some examples of which use ‘iris’.
2145///
2146/// ## Examples:
2147///
2148/// ```r
2149/// dni3 <- dimnames(iris3)
2150/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
2151/// dimnames = list(NULL, sub(" L.",".Length",
2152/// sub(" W.",".Width", dni3[[2]])))),
2153/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
2154/// all.equal(ii, iris) # TRUE
2155/// ```
2156pub fn iris() -> PolarsResult<DataFrame> {
2157    CsvReader::new(Cursor::new(include_str!("iris.csv"))).finish()
2158}
2159
2160/// # Edgar Anderson's Iris Data
2161///
2162/// ## Description:
2163///
2164/// This famous (Fisher's or Anderson's) iris data set gives the
2165/// measurements in centimeters of the variables sepal length and
2166/// width and petal length and width, respectively, for 50 flowers
2167/// from each of 3 species of iris.  The species are _Iris setosa_,
2168/// _versicolor_, and _virginica_.
2169///
2170/// ## Usage:
2171///
2172/// iris
2173/// iris3
2174///
2175/// ## Format:
2176///
2177/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
2178/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
2179/// ‘Petal.Width’, and ‘Species’.
2180///
2181/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
2182/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
2183/// gives the case number within the species subsample, the second the
2184/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
2185/// ‘Petal W.’, and the third the species.
2186///
2187/// ## Source:
2188///
2189/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
2190/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
2191///
2192/// The data were collected by Anderson, Edgar (1935).  The irises of
2193/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
2194/// *59*, 2-5.
2195///
2196/// ## References:
2197///
2198/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
2199/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
2200///
2201/// ## See Also:
2202///
2203/// ‘matplot’ some examples of which use ‘iris’.
2204///
2205/// ## Examples:
2206///
2207/// ```r
2208/// dni3 <- dimnames(iris3)
2209/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
2210/// dimnames = list(NULL, sub(" L.",".Length",
2211/// sub(" W.",".Width", dni3[[2]])))),
2212/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
2213/// all.equal(ii, iris) # TRUE
2214/// ```
2215pub fn iris3_setosa() -> PolarsResult<DataFrame> {
2216    CsvReader::new(Cursor::new(include_str!("iris3.Setosa.csv"))).finish()
2217}
2218
2219/// # Edgar Anderson's Iris Data
2220///
2221/// ## Description:
2222///
2223/// This famous (Fisher's or Anderson's) iris data set gives the
2224/// measurements in centimeters of the variables sepal length and
2225/// width and petal length and width, respectively, for 50 flowers
2226/// from each of 3 species of iris.  The species are _Iris setosa_,
2227/// _versicolor_, and _virginica_.
2228///
2229/// ## Usage:
2230///
2231/// iris
2232/// iris3
2233///
2234/// ## Format:
2235///
2236/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
2237/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
2238/// ‘Petal.Width’, and ‘Species’.
2239///
2240/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
2241/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
2242/// gives the case number within the species subsample, the second the
2243/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
2244/// ‘Petal W.’, and the third the species.
2245///
2246/// ## Source:
2247///
2248/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
2249/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
2250///
2251/// The data were collected by Anderson, Edgar (1935).  The irises of
2252/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
2253/// *59*, 2-5.
2254///
2255/// ## References:
2256///
2257/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
2258/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
2259///
2260/// ## See Also:
2261///
2262/// ‘matplot’ some examples of which use ‘iris’.
2263///
2264/// ## Examples:
2265///
2266/// ```r
2267/// dni3 <- dimnames(iris3)
2268/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
2269/// dimnames = list(NULL, sub(" L.",".Length",
2270/// sub(" W.",".Width", dni3[[2]])))),
2271/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
2272/// all.equal(ii, iris) # TRUE
2273/// ```
2274pub fn iris3_versicolor() -> PolarsResult<DataFrame> {
2275    CsvReader::new(Cursor::new(include_str!("iris3.Versicolor.csv"))).finish()
2276}
2277
2278/// # Edgar Anderson's Iris Data
2279///
2280/// ## Description:
2281///
2282/// This famous (Fisher's or Anderson's) iris data set gives the
2283/// measurements in centimeters of the variables sepal length and
2284/// width and petal length and width, respectively, for 50 flowers
2285/// from each of 3 species of iris.  The species are _Iris setosa_,
2286/// _versicolor_, and _virginica_.
2287///
2288/// ## Usage:
2289///
2290/// iris
2291/// iris3
2292///
2293/// ## Format:
2294///
2295/// ‘iris’ is a data frame with 150 cases (rows) and 5 variables
2296/// (columns) named ‘Sepal.Length’, ‘Sepal.Width’, ‘Petal.Length’,
2297/// ‘Petal.Width’, and ‘Species’.
2298///
2299/// ‘iris3’ gives the same data arranged as a 3-dimensional array of
2300/// size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
2301/// gives the case number within the species subsample, the second the
2302/// measurements with names ‘Sepal L.’, ‘Sepal W.’, ‘Petal L.’, and
2303/// ‘Petal W.’, and the third the species.
2304///
2305/// ## Source:
2306///
2307/// Fisher, R. A. (1936) The use of multiple measurements in taxonomic
2308/// problems.  _Annals of Eugenics_, *7*, Part II, 179-188.
2309///
2310/// The data were collected by Anderson, Edgar (1935).  The irises of
2311/// the Gaspe Peninsula, _Bulletin of the American Iris Society_,
2312/// *59*, 2-5.
2313///
2314/// ## References:
2315///
2316/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
2317/// Language_.  Wadsworth & Brooks/Cole. (has ‘iris3’ as ‘iris’.)
2318///
2319/// ## See Also:
2320///
2321/// ‘matplot’ some examples of which use ‘iris’.
2322///
2323/// ## Examples:
2324///
2325/// ```r
2326/// dni3 <- dimnames(iris3)
2327/// ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol = 4,
2328/// dimnames = list(NULL, sub(" L.",".Length",
2329/// sub(" W.",".Width", dni3[[2]])))),
2330/// Species = gl(3, 50, labels = sub("S", "s", sub("V", "v", dni3[[3]]))))
2331/// all.equal(ii, iris) # TRUE
2332/// ```
2333pub fn iris3_virginica() -> PolarsResult<DataFrame> {
2334    CsvReader::new(Cursor::new(include_str!("iris3.Virginica.csv"))).finish()
2335}
2336
2337/// # Areas of the World's Major Landmasses
2338///
2339/// ## Description:
2340///
2341/// The areas in thousands of square miles of the landmasses which
2342/// exceed 10,000 square miles.
2343///
2344/// ## Usage:
2345///
2346/// islands
2347///
2348/// ## Format:
2349///
2350/// A named vector of length 48.
2351///
2352/// ## Source:
2353///
2354/// The World Almanac and Book of Facts, 1975, page 406.
2355///
2356/// ## References:
2357///
2358/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
2359///
2360/// ## Examples:
2361///
2362/// ```r
2363/// require(graphics)
2364/// dotchart(log(islands, 10),
2365/// main = "islands data: log10(area) (log10(sq. miles))")
2366/// dotchart(log(islands[order(islands)], 10),
2367/// main = "islands data: log10(area) (log10(sq. miles))")
2368/// ```
2369pub fn islands() -> PolarsResult<DataFrame> {
2370    CsvReader::new(Cursor::new(include_str!("islands.csv"))).finish()
2371}
2372
2373/// # Quarterly Earnings per Johnson & Johnson Share
2374///
2375/// ## Description:
2376///
2377/// Quarterly earnings (dollars) per Johnson & Johnson share 1960-80.
2378///
2379/// ## Usage:
2380///
2381/// JohnsonJohnson
2382///
2383/// ## Format:
2384///
2385/// A quarterly time series
2386///
2387/// ## Source:
2388///
2389/// Shumway, R. H. and Stoffer, D. S. (2000) _Time Series Analysis and
2390/// its Applications_.  Second Edition.  Springer.  Example 1.1.
2391///
2392/// ## Examples:
2393///
2394/// ```r
2395/// require(stats); require(graphics)
2396/// JJ <- log10(JohnsonJohnson)
2397/// plot(JJ)
2398/// ## This example gives a possible-non-convergence warning on some
2399/// ## platforms, but does seem to converge on x86 Linux and Windows.
2400/// (fit <- StructTS(JJ, type = "BSM"))
2401/// tsdiag(fit)
2402/// sm <- tsSmooth(fit)
2403/// plot(cbind(JJ, sm[, 1], sm[, 3]-0.5), plot.type = "single",
2404///  col = c("black", "green", "blue"))
2405/// abline(h = -0.5, col = "grey60")
2406///
2407/// monthplot(fit)
2408/// ```
2409pub fn johnson_johnson() -> PolarsResult<DataFrame> {
2410    CsvReader::new(Cursor::new(include_str!("JohnsonJohnson.csv"))).finish()
2411}
2412
2413/// # Level of Lake Huron 1875-1972
2414///
2415/// ## Description:
2416///
2417/// Annual measurements of the level, in feet, of Lake Huron
2418/// 1875-1972.
2419///
2420/// ## Usage:
2421///
2422/// LakeHuron
2423///
2424/// ## Format:
2425///
2426/// A time series of length 98.
2427///
2428/// ## Source:
2429///
2430/// Brockwell, P. J. and Davis, R. A. (1991).  _Time Series and
2431/// Forecasting Methods_.  Second edition. Springer, New York. Series
2432/// A, page 555.
2433///
2434/// Brockwell, P. J. and Davis, R. A. (1996).  _Introduction to Time
2435/// Series and Forecasting_.  Springer, New York.  Sections 5.1 and
2436/// 7.6.
2437pub fn lake_huron() -> PolarsResult<DataFrame> {
2438    CsvReader::new(Cursor::new(include_str!("LakeHuron.csv"))).finish()
2439}
2440
2441/// # Monthly Deaths from Lung Diseases in the UK
2442///
2443/// ## Description:
2444///
2445/// Three time series giving the monthly deaths from bronchitis,
2446/// emphysema and asthma in the UK, 1974-1979, both sexes (‘ldeaths’),
2447/// males (‘mdeaths’) and females (‘fdeaths’).
2448///
2449/// ## Usage:
2450///
2451/// ldeaths
2452/// fdeaths
2453/// mdeaths
2454///
2455/// ## Source:
2456///
2457/// P. J. Diggle (1990) _Time Series: A Biostatistical Introduction._
2458/// Oxford, table A.3
2459///
2460/// ## Examples:
2461///
2462/// ```r
2463/// require(stats); require(graphics) # for time
2464/// plot(ldeaths)
2465/// plot(mdeaths, fdeaths)
2466/// ## Better labels:
2467/// yr <- floor(tt <- time(mdeaths))
2468/// plot(mdeaths, fdeaths,
2469///  xy.labels = paste(month.abb[12*(tt - yr)], yr-1900, sep = "'"))
2470/// ```
2471pub fn ldeaths() -> PolarsResult<DataFrame> {
2472    CsvReader::new(Cursor::new(include_str!("ldeaths.csv"))).finish()
2473}
2474
2475/// # Luteinizing Hormone in Blood Samples
2476///
2477/// ## Description:
2478///
2479/// A regular time series giving the luteinizing hormone in blood
2480/// samples at 10 mins intervals from a human female, 48 samples.
2481///
2482/// ## Usage:
2483///
2484/// lh
2485///
2486/// ## Source:
2487///
2488/// P.J. Diggle (1990) _Time Series: A Biostatistical Introduction._
2489/// Oxford, table A.1, series 3
2490pub fn lh() -> PolarsResult<DataFrame> {
2491    CsvReader::new(Cursor::new(include_str!("lh.csv"))).finish()
2492}
2493
2494/// # Intercountry Life-Cycle Savings Data
2495///
2496/// ## Description:
2497///
2498/// Data on the savings ratio 1960-1970.
2499///
2500/// ## Usage:
2501///
2502/// LifeCycleSavings
2503///
2504/// ## Format:
2505///
2506/// A data frame with 50 observations on 5 variables.
2507///
2508/// * \[,1\]  sr  numeric  aggregate personal savings
2509/// * \[,2\]  pop15  numeric  % of population under 15
2510/// * \[,3\]  pop75  numeric  % of population over 75
2511/// * \[,4\]  dpi numeric  real per-capita disposable
2512/// income
2513/// * \[,5\]  ddpinumeric  % growth rate of dpi
2514///
2515/// ## Details:
2516///
2517/// Under the life-cycle savings hypothesis as developed by Franco
2518/// Modigliani, the savings ratio (aggregate personal saving divided
2519/// by disposable income) is explained by per-capita disposable
2520/// income, the percentage rate of change in per-capita disposable
2521/// income, and two demographic variables: the percentage of
2522/// population less than 15 years old and the percentage of the
2523/// population over 75 years old.  The data are averaged over the
2524/// decade 1960-1970 to remove the business cycle or other short-term
2525/// fluctuations.
2526///
2527/// # Source:
2528///
2529/// The data were obtained from Belsley, Kuh and Welsch (1980).  They
2530/// in turn obtained the data from Sterling (1977).
2531///
2532/// ## References:
2533///
2534/// Sterling, Arnie (1977) Unpublished BS Thesis.  Massachusetts
2535/// Institute of Technology.
2536///
2537/// Belsley, D. A., Kuh. E. and Welsch, R. E. (1980) _Regression
2538/// Diagnostics_.  New York: Wiley.
2539///
2540/// ## Examples:
2541///
2542/// ```r
2543/// require(stats); require(graphics)
2544/// pairs(LifeCycleSavings, panel = panel.smooth,
2545/// main = "LifeCycleSavings data")
2546/// fm1 <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LifeCycleSavings)
2547/// summary(fm1)
2548/// ```
2549pub fn life_cycle_savings() -> PolarsResult<DataFrame> {
2550    CsvReader::new(Cursor::new(include_str!("LifeCycleSavings.csv"))).finish()
2551}
2552
2553/// # Growth of Loblolly pine trees
2554///
2555/// ## Description:
2556///
2557/// The ‘Loblolly’ data frame has 84 rows and 3 columns of records of
2558/// the growth of Loblolly pine trees.
2559///
2560/// ## Usage:
2561///
2562/// Loblolly
2563///
2564/// ## Format:
2565///
2566/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
2567/// "groupedData", "data.frame")’ containing the following columns:
2568///
2569/// * height a numeric vector of tree heights (ft).
2570/// * age a numeric vector of tree ages (yr).
2571/// * Seed an ordered factor indicating the seed source for the tree.
2572/// The ordering is according to increasing maximum height.
2573///
2574/// ## Details:
2575///
2576/// This dataset was originally part of package ‘nlme’, and that has
2577/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
2578/// for its grouped-data classes.
2579///
2580/// ## Source:
2581///
2582/// Kung, F. H. (1986), Fitting logistic growth curve with
2583/// predetermined carrying capacity, in _Proceedings of the
2584/// Statistical Computing Section, American Statistical Association_,
2585/// 340-343.
2586///
2587/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
2588/// and S-PLUS_, Springer.
2589///
2590/// ## Examples:
2591///
2592/// ```r
2593/// require(stats); require(graphics)
2594/// plot(height ~ age, data = Loblolly, subset = Seed == 329,
2595///  xlab = "Tree age (yr)", las = 1,
2596///  ylab = "Tree height (ft)",
2597///  main = "Loblolly data and fitted curve (Seed 329 only)")
2598/// fm1 <- nls(height ~ SSasymp(age, Asym, R0, lrc),
2599///  data = Loblolly, subset = Seed == 329)
2600/// age <- seq(0, 30, length.out = 101)
2601/// lines(age, predict(fm1, list(age = age)))
2602/// ```
2603pub fn loblolly() -> PolarsResult<DataFrame> {
2604    CsvReader::new(Cursor::new(include_str!("Loblolly.csv"))).finish()
2605}
2606
2607/// # Longley's Economic Regression Data
2608///
2609/// ## Description:
2610///
2611/// A macroeconomic data set which provides a well-known example for a
2612/// highly collinear regression.
2613///
2614/// ## Usage:
2615///
2616/// longley
2617///
2618/// ## Format:
2619///
2620/// A data frame with 7 economical variables, observed yearly from
2621/// 1947 to 1962 (n=16).
2622///
2623/// * ‘GNP.deflator’ GNP implicit price deflator (1954=100)
2624/// * ‘GNP’ Gross National Product.
2625/// * ‘Unemployed’ number of unemployed.
2626/// * ‘Armed.Forces’ number of people in the armed forces.
2627/// * ‘Population’ ‘noninstitutionalized’ population >= 14 years of age.
2628/// * ‘Year’ the year (time).
2629/// * ‘Employed’ number of people employed.
2630/// * The regression ‘lm(Employed ~ .)’ is known to be highly collinear.
2631///
2632/// ## Source:
2633///
2634/// J. W. Longley (1967) An appraisal of least-squares programs from
2635/// the point of view of the user.  _Journal of the American
2636/// Statistical Association_ *62*, 819-841.
2637///
2638/// ## References:
2639///
2640/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
2641/// Language_.  Wadsworth & Brooks/Cole.
2642///
2643/// ## Examples:
2644///
2645/// ```r
2646/// require(stats); require(graphics)
2647/// ## give the data set in the form it is used in S-PLUS:
2648/// longley.x <- data.matrix(longley[, 1:6])
2649/// longley.y <- longley[, "Employed"]
2650/// pairs(longley, main = "longley data")
2651/// summary(fm1 <- lm(Employed ~ ., data = longley))
2652/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
2653/// mar = c(4.1, 4.1, 2.1, 1.1))
2654/// plot(fm1)
2655/// par(opar)
2656/// ```
2657pub fn longley() -> PolarsResult<DataFrame> {
2658    CsvReader::new(Cursor::new(include_str!("longley.csv"))).finish()
2659}
2660
2661/// # Annual Canadian Lynx trappings 1821-1934
2662///
2663/// ## Description:
2664///
2665/// Annual numbers of lynx trappings for 1821-1934 in Canada. Taken
2666/// from Brockwell & Davis (1991), this appears to be the series
2667/// considered by Campbell & Walker (1977).
2668///
2669/// ## Usage:
2670///
2671/// lynx
2672///
2673/// ## Source:
2674///
2675/// Brockwell, P. J. and Davis, R. A. (1991).  _Time Series and
2676/// Forecasting Methods_.  Second edition.  Springer. Series G (page
2677/// 557).
2678///
2679/// ## References:
2680///
2681/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988).  _The New
2682/// S Language_.  Wadsworth & Brooks/Cole.
2683///
2684/// Campbell, M. J. and Walker, A. M. (1977).  A Survey of statistical
2685/// work on the Mackenzie River series of annual Canadian lynx
2686/// trappings for the years 1821-1934 and a new analysis.  _Journal of
2687/// the Royal Statistical Society Series A_, *140*, 411-431.
2688/// doi:10.2307/2345277 <https://doi.org/10.2307/2345277>.
2689pub fn lynx() -> PolarsResult<DataFrame> {
2690    CsvReader::new(Cursor::new(include_str!("lynx.csv"))).finish()
2691}
2692
2693/// # Michelson Speed of Light Data
2694///
2695/// ## Description:
2696///
2697/// A classical data of Michelson (but not this one with Morley) on
2698/// measurements done in 1879 on the speed of light.  The data
2699/// consists of five experiments, each consisting of 20 consecutive
2700/// ‘runs’.  The response is the speed of light measurement, suitably
2701/// coded (km/sec, with ‘299000’ subtracted).
2702///
2703/// ## Usage:
2704///
2705/// morley
2706///
2707/// ## Format:
2708///
2709/// A data frame with 100 observations on the following 3 variables.
2710///
2711/// * ‘Expt’ The experiment number, from 1 to 5.
2712/// * ‘Run’ The run number within each experiment.
2713/// * ‘Speed’ Speed-of-light measurement.
2714///
2715/// ## Details:
2716///
2717/// The data is here viewed as a randomized block experiment with
2718/// ‘experiment’ and ‘run’ as the factors.  ‘run’ may also be
2719/// considered a quantitative variate to account for linear (or
2720/// polynomial) changes in the measurement over the course of a single
2721/// experiment.
2722///
2723/// ## Note:
2724///
2725/// This is the same dataset as ‘michelson’ in package ‘MASS’.
2726///
2727/// ## Source:
2728///
2729/// A. J. Weekes (1986) _A Genstat Primer_.  London: Edward Arnold.
2730///
2731/// S. M. Stigler (1977) Do robust estimators work with real data?
2732/// _Annals of Statistics_ *5*, 1055-1098. (See Table 6.)
2733///
2734/// A. A. Michelson (1882) Experimental determination of the velocity
2735/// of light made at the United States Naval Academy, Annapolis.
2736/// _Astronomic Papers_ *1* 135-8.  U.S. Nautical Almanac Office.
2737/// (See Table 24.)
2738///
2739/// ## Examples:
2740///
2741/// ```r
2742/// require(stats); require(graphics)
2743/// michelson <- transform(morley,
2744///  Expt = factor(Expt), Run = factor(Run))
2745/// xtabs(~ Expt + Run, data = michelson)  # 5 x 20 balanced (two-way)
2746/// plot(Speed ~ Expt, data = michelson,
2747///  main = "Speed of Light Data", xlab = "Experiment No.")
2748/// fm <- aov(Speed ~ Run + Expt, data = michelson)
2749/// summary(fm)
2750/// fm0 <- update(fm, . ~ . - Run)
2751/// anova(fm0, fm)
2752/// ```
2753pub fn morley() -> PolarsResult<DataFrame> {
2754    CsvReader::new(Cursor::new(include_str!("morley.csv"))).finish()
2755}
2756
2757/// # Motor Trend Car Road Tests
2758///
2759/// ## Description:
2760///
2761/// The data was extracted from the 1974 _Motor Trend_ US magazine,
2762/// and comprises fuel consumption and 10 aspects of automobile design
2763/// and performance for 32 automobiles (1973-74 models).
2764///
2765/// ## Usage:
2766///
2767/// mtcars
2768///
2769/// ## Format:
2770///
2771/// A data frame with 32 observations on 11 (numeric) variables.
2772///
2773/// * \[, 1\]  mpgMiles/(US) gallon
2774/// * \[, 2\]  cylNumber of cylinders
2775/// * \[, 3\]  disp  Displacement (cu.in.)
2776/// * \[, 4\]  hp Gross horsepower
2777/// * \[, 5\]  drat  Rear axle ratio
2778/// * \[, 6\]  wt Weight (1000 lbs)
2779/// * \[, 7\]  qsec  1/4 mile time
2780/// * \[, 8\]  vs Engine (0 = V-shaped, 1 = straight)
2781/// * \[, 9\]  am Transmission (0 = automatic, 1 = manual)
2782/// * \[,10\]  gear  Number of forward gears
2783/// * \[,11\]  carb  Number of carburetors
2784///
2785/// ## Note:
2786///
2787/// Henderson and Velleman (1981) comment in a footnote to Table 1:
2788/// ‘Hocking [original transcriber]'s noncrucial coding of the Mazda's
2789/// rotary engine as a straight six-cylinder engine and the Porsche's
2790/// flat engine as a V engine, as well as the inclusion of the diesel
2791/// Mercedes 240D, have been retained to enable direct comparisons to
2792/// be made with previous analyses.’
2793///
2794/// ## Source:
2795///
2796/// Henderson and Velleman (1981), Building multiple regression models
2797/// interactively.  _Biometrics_, *37*, 391-411.
2798///
2799/// ## Examples:
2800///
2801/// ```r
2802/// require(graphics)
2803/// pairs(mtcars, main = "mtcars data", gap = 1/4)
2804/// coplot(mpg ~ disp | as.factor(cyl), data = mtcars,
2805/// panel = panel.smooth, rows = 1)
2806/// ## possibly more meaningful, e.g., for summary() or bivariate plots:
2807/// mtcars2 <- within(mtcars, {
2808/// vs <- factor(vs, labels = c("V", "S"))
2809/// am <- factor(am, labels = c("automatic", "manual"))
2810/// cyl  <- ordered(cyl)
2811/// gear <- ordered(gear)
2812/// carb <- ordered(carb)
2813/// })
2814/// summary(mtcars2)
2815/// ```
2816pub fn mtcars() -> PolarsResult<DataFrame> {
2817    CsvReader::new(Cursor::new(include_str!("mtcars.csv"))).finish()
2818}
2819
2820/// # Average Yearly Temperatures in New Haven
2821///
2822/// ## Description:
2823///
2824/// The mean annual temperature in degrees Fahrenheit in New Haven,
2825/// Connecticut, from 1912 to 1971.
2826///
2827/// ## Usage:
2828///
2829/// nhtemp
2830///
2831/// ## Format:
2832///
2833/// A time series of 60 observations.
2834///
2835/// ## Source:
2836///
2837/// Vaux, J. E. and Brinker, N. B. (1972) _Cycles_, *1972*, 117-121.
2838///
2839/// ## References:
2840///
2841/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
2842/// Wiley.
2843///
2844/// ## Examples:
2845///
2846/// ```r
2847/// require(stats); require(graphics)
2848/// plot(nhtemp, main = "nhtemp data",
2849///  ylab = "Mean annual temperature in New Haven, CT (deg. F)")
2850/// ```
2851pub fn nhtemp() -> PolarsResult<DataFrame> {
2852    CsvReader::new(Cursor::new(include_str!("nhtemp.csv"))).finish()
2853}
2854
2855/// # Flow of the River Nile
2856///
2857/// ## Description:
2858///
2859/// Measurements of the annual flow of the river Nile at Aswan
2860/// (formerly ‘Assuan’), 1871-1970, in 10^8 m^3, “with apparent
2861/// changepoint near 1898” (Cobb(1978), Table 1, p.249).
2862///
2863/// ## Usage:
2864///
2865/// Nile
2866///
2867/// ## Format:
2868///
2869/// A time series of length 100.
2870///
2871/// ## Source:
2872///
2873/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
2874/// State Space Methods_.  Oxford University Press.
2875///
2876/// ## References:
2877///
2878/// Balke, N. S. (1993).  Detecting level shifts in time series.
2879/// _Journal of Business and Economic Statistics_, *11*, 81-92.
2880/// doi:10.2307/1391308 <https://doi.org/10.2307/1391308>.
2881///
2882/// Cobb, G. W. (1978).  The problem of the Nile: conditional solution
2883/// to a change-point problem.  _Biometrika_ *65*, 243-51.
2884/// doi:10.2307/2335202 <https://doi.org/10.2307/2335202>.
2885///
2886/// ## Examples:
2887///
2888/// ```r
2889/// require(stats); require(graphics)
2890/// par(mfrow = c(2, 2))
2891/// plot(Nile)
2892/// acf(Nile)
2893/// pacf(Nile)
2894/// ar(Nile) # selects order 2
2895/// cpgram(ar(Nile)$resid)
2896/// par(mfrow = c(1, 1))
2897/// arima(Nile, c(2, 0, 0))
2898///
2899/// ## Now consider missing values, following Durbin & Koopman
2900/// NileNA <- Nile
2901/// NileNA[c(21:40, 61:80)] <- NA
2902/// arima(NileNA, c(2, 0, 0))
2903/// plot(NileNA)
2904/// pred <-
2905/// predict(arima(window(NileNA, 1871, 1890), c(2, 0, 0)), n.ahead = 20)
2906/// lines(pred$pred, lty = 3, col = "red")
2907/// lines(pred$pred + 2*pred$se, lty = 2, col = "blue")
2908/// lines(pred$pred - 2*pred$se, lty = 2, col = "blue")
2909/// pred <-
2910/// predict(arima(window(NileNA, 1871, 1930), c(2, 0, 0)), n.ahead = 20)
2911/// lines(pred$pred, lty = 3, col = "red")
2912/// lines(pred$pred + 2*pred$se, lty = 2, col = "blue")
2913/// lines(pred$pred - 2*pred$se, lty = 2, col = "blue")
2914///
2915/// ## Structural time series models
2916/// par(mfrow = c(3, 1))
2917/// plot(Nile)
2918/// ## local level model
2919/// (fit <- StructTS(Nile, type = "level"))
2920/// lines(fitted(fit), lty = 2)  # contemporaneous smoothing
2921/// lines(tsSmooth(fit), lty = 2, col = 4)# fixed-interval smoothing
2922/// plot(residuals(fit)); abline(h = 0, lty = 3)
2923/// ## local trend model
2924/// (fit2 <- StructTS(Nile, type = "trend")) ## constant trend fitted
2925/// pred <- predict(fit, n.ahead = 30)
2926/// ## with 50% confidence interval
2927/// ts.plot(Nile, pred$pred,
2928///  pred$pred + 0.67*pred$se, pred$pred -0.67*pred$se)
2929///
2930/// ## Now consider missing values
2931/// plot(NileNA)
2932/// (fit3 <- StructTS(NileNA, type = "level"))
2933/// lines(fitted(fit3), lty = 2)
2934/// lines(tsSmooth(fit3), lty = 3)
2935/// plot(residuals(fit3)); abline(h = 0, lty = 3)
2936/// ```
2937pub fn nile() -> PolarsResult<DataFrame> {
2938    CsvReader::new(Cursor::new(include_str!("Nile.csv"))).finish()
2939}
2940
2941/// # Average Monthly Temperatures at Nottingham, 1920-1939
2942///
2943/// ## Description:
2944///
2945/// A time series object containing average air temperatures at
2946/// Nottingham Castle in degrees Fahrenheit for 20 years.
2947///
2948/// ## Usage:
2949///
2950/// nottem
2951///
2952/// ## Source:
2953///
2954/// Anderson, O. D. (1976) _Time Series Analysis and Forecasting: The
2955/// Box-Jenkins approach._ Butterworths. Series R.
2956///
2957/// ## Examples:
2958///
2959/// ```r
2960/// require(stats); require(graphics)
2961/// nott <- window(nottem, end = c(1936,12))
2962/// fit <- arima(nott, order = c(1,0,0), list(order = c(2,1,0), period = 12))
2963/// nott.fore <- predict(fit, n.ahead = 36)
2964/// ts.plot(nott, nott.fore$pred, nott.fore$pred+2*nott.fore$se,
2965///  nott.fore$pred-2*nott.fore$se, gpars = list(col = c(1,1,4,4)))
2966/// ```
2967pub fn nottem() -> PolarsResult<DataFrame> {
2968    CsvReader::new(Cursor::new(include_str!("nottem.csv"))).finish()
2969}
2970
2971/// # Classical N, P, K Factorial Experiment
2972///
2973/// ## Description:
2974///
2975/// A classical N, P, K (nitrogen, phosphate, potassium) factorial
2976/// experiment on the growth of peas conducted on 6 blocks. Each half
2977/// of a fractional factorial design confounding the NPK interaction
2978/// was used on 3 of the plots.
2979///
2980/// ## Usage:
2981///
2982/// npk
2983///
2984/// ## Format:
2985///
2986/// The ‘npk’ data frame has 24 rows and 5 columns:
2987///
2988/// * ‘block’ which block (label 1 to 6).
2989/// * ‘N’ indicator (0/1) for the application of nitrogen.
2990/// * ‘P’ indicator (0/1) for the application of phosphate.
2991/// * ‘K’ indicator (0/1) for the application of potassium.
2992/// * ‘yield’ Yield of peas, in pounds/plot (the plots were (1/70)
2993/// acre).
2994///
2995/// ## Source:
2996///
2997/// Imperial College, London, M.Sc. exercise sheet.
2998///
2999/// ## References:
3000///
3001/// Venables, W. N. and Ripley, B. D. (2002) _Modern Applied
3002/// Statistics with S._ Fourth edition.  Springer.
3003///
3004/// ## Examples:
3005///
3006/// ```r
3007/// options(contrasts = c("contr.sum", "contr.poly"))
3008/// npk.aov <- aov(yield ~ block + N*P*K, npk)
3009/// npk.aov
3010/// summary(npk.aov)
3011/// coef(npk.aov)
3012/// options(contrasts = c("contr.treatment", "contr.poly"))
3013/// npk.aov1 <- aov(yield ~ block + N + K, data = npk)
3014/// summary.lm(npk.aov1)
3015/// se.contrast(npk.aov1, list(N=="0", N=="1"), data = npk)
3016/// model.tables(npk.aov1, type = "means", se = TRUE)
3017/// ```
3018pub fn npk() -> PolarsResult<DataFrame> {
3019    CsvReader::new(Cursor::new(include_str!("npk.csv"))).finish()
3020}
3021
3022/// # Occupational Status of Fathers and their Sons
3023///
3024/// ## Description:
3025///
3026/// Cross-classification of a sample of British males according to
3027/// each subject's occupational status and his father's occupational
3028/// status.
3029///
3030/// ## Usage:
3031///
3032/// occupationalStatus
3033///
3034/// ## Format:
3035///
3036/// A ‘table’ of counts, with classifying factors ‘origin’ (father's
3037/// occupational status; levels ‘1:8’) and ‘destination’ (son's
3038/// occupational status; levels ‘1:8’).
3039///
3040/// ## Source:
3041///
3042/// Goodman, L. A. (1979) Simple Models for the Analysis of
3043/// Association in Cross-Classifications having Ordered Categories.
3044/// _J. Am. Stat. Assoc._, *74* (367), 537-552.
3045///
3046/// The data set has been in package ‘gnm’ and been provided by the
3047/// package authors.
3048///
3049/// ## Examples:
3050///
3051/// ```r
3052/// require(stats); require(graphics)
3053///
3054/// plot(occupationalStatus)
3055///
3056/// ##  Fit a uniform association model separating diagonal effects
3057/// Diag <- as.factor(diag(1:8))
3058/// Rscore <- scale(as.numeric(row(occupationalStatus)), scale = FALSE)
3059/// Cscore <- scale(as.numeric(col(occupationalStatus)), scale = FALSE)
3060/// modUnif <- glm(Freq ~ origin + destination + Diag + Rscore:Cscore,
3061/// family = poisson, data = occupationalStatus)
3062///
3063/// summary(modUnif)
3064/// plot(modUnif) # 4 plots, with warning about  h_ii ~= 1
3065/// ```
3066pub fn occupational_status() -> PolarsResult<DataFrame> {
3067    CsvReader::new(Cursor::new(include_str!("occupationalStatus.csv"))).finish()
3068}
3069
3070/// # Growth of Orange Trees
3071///
3072/// ## Description:
3073///
3074/// The ‘Orange’ data frame has 35 rows and 3 columns of records of
3075/// the growth of orange trees.
3076///
3077/// ## Usage:
3078///
3079/// Orange
3080///
3081/// ## Format:
3082///
3083/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
3084/// "groupedData", "data.frame")’ containing the following columns:
3085///
3086/// * Tree an ordered factor indicating the tree on which the
3087/// measurement is made.  The ordering is according to increasing
3088/// maximum diameter.
3089/// * age a numeric vector giving the age of the tree (days since
3090/// 1968/12/31)
3091/// * circumference a numeric vector of trunk circumferences (mm).  This
3092/// is probably “circumference at breast height”, a standard
3093/// measurement in forestry.
3094///
3095/// ## Details:
3096///
3097/// This dataset was originally part of package ‘nlme’, and that has
3098/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
3099/// for its grouped-data classes.
3100///
3101/// ## Source:
3102///
3103/// Draper, N. R. and Smith, H. (1998), _Applied Regression Analysis
3104/// (3rd ed)_, Wiley (exercise 24.N).
3105///
3106/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
3107/// and S-PLUS_, Springer.
3108///
3109/// ## Examples:
3110///
3111/// ```r
3112/// require(stats); require(graphics)
3113/// coplot(circumference ~ age | Tree, data = Orange, show.given = FALSE)
3114/// fm1 <- nls(circumference ~ SSlogis(age, Asym, xmid, scal),
3115///  data = Orange, subset = Tree == 3)
3116/// plot(circumference ~ age, data = Orange, subset = Tree == 3,
3117///  xlab = "Tree age (days since 1968/12/31)",
3118///  ylab = "Tree circumference (mm)", las = 1,
3119///  main = "Orange tree data and fitted model (Tree 3 only)")
3120/// age <- seq(0, 1600, length.out = 101)
3121/// lines(age, predict(fm1, list(age = age)))
3122/// ```
3123pub fn orange() -> PolarsResult<DataFrame> {
3124    CsvReader::new(Cursor::new(include_str!("Orange.csv"))).finish()
3125}
3126
3127/// # Potency of Orchard Sprays
3128///
3129/// ## Description:
3130///
3131/// An experiment was conducted to assess the potency of various
3132/// constituents of orchard sprays in repelling honeybees, using a
3133/// Latin square design.
3134///
3135/// ## Usage:
3136///
3137/// OrchardSprays
3138///
3139/// ## Format:
3140///
3141/// A data frame with 64 observations on 4 variables.
3142///
3143/// * \[,1\]  rowpos  numeric  Row of the design
3144/// * \[,2\]  colpos  numeric  Column of the design
3145/// * \[,3\]  treatment  factorTreatment level
3146/// * \[,4\]  decreasenumeric  Response
3147///
3148/// ## Details:
3149///
3150/// Individual cells of dry comb were filled with measured amounts of
3151/// lime sulphur emulsion in sucrose solution.  Seven different
3152/// concentrations of lime sulphur ranging from a concentration of
3153/// 1/100 to 1/1,562,500 in successive factors of 1/5 were used as
3154/// well as a solution containing no lime sulphur.
3155///
3156/// The responses for the different solutions were obtained by
3157/// releasing 100 bees into the chamber for two hours, and then
3158/// measuring the decrease in volume of the solutions in the various
3159/// cells.
3160///
3161/// An 8 x 8 Latin square design was used and the treatments were
3162/// coded as follows:
3163///
3164/// * A  highest level of lime sulphur
3165/// * B  next highest level of lime sulphur
3166/// * ...
3167/// * G  lowest level of lime sulphur
3168/// * H  no lime sulphur
3169///
3170/// ## Source:
3171///
3172/// Finney, D. J. (1947) _Probit Analysis_.  Cambridge.
3173///
3174/// ## References:
3175///
3176/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
3177/// Wiley.
3178///
3179/// ## Examples:
3180///
3181/// ```r
3182/// require(graphics)
3183/// pairs(OrchardSprays, main = "OrchardSprays data")
3184/// ```
3185pub fn orchard_sprays() -> PolarsResult<DataFrame> {
3186    CsvReader::new(Cursor::new(include_str!("OrchardSprays.csv"))).finish()
3187}
3188
3189/// # Results from an Experiment on Plant Growth
3190///
3191/// ## Description:
3192///
3193/// Results from an experiment to compare yields (as measured by dried
3194/// weight of plants) obtained under a control and two different
3195/// treatment conditions.
3196///
3197/// ## Usage:
3198///
3199/// PlantGrowth
3200///
3201/// ## Format:
3202///
3203/// A data frame of 30 cases on 2 variables.
3204///
3205/// * \[, 1\]  weight  numeric
3206/// * \[, 2\]  groupfactor
3207///
3208/// The levels of ‘group’ are ‘ctrl’, ‘trt1’, and ‘trt2’.
3209///
3210/// ## Source:
3211///
3212/// Dobson, A. J. (1983) _An Introduction to Statistical Modelling_.
3213/// London: Chapman and Hall.
3214///
3215/// ## Examples:
3216///
3217/// ```r
3218/// ## One factor ANOVA example from Dobson's book, cf. Table 7.4:
3219/// require(stats); require(graphics)
3220/// boxplot(weight ~ group, data = PlantGrowth, main = "PlantGrowth data",
3221///  ylab = "Dried weight of plants", col = "lightgray",
3222///  notch = TRUE, varwidth = TRUE)
3223/// anova(lm(weight ~ group, data = PlantGrowth))
3224/// ```
3225pub fn plant_growth() -> PolarsResult<DataFrame> {
3226    CsvReader::new(Cursor::new(include_str!("PlantGrowth.csv"))).finish()
3227}
3228
3229/// # Annual Precipitation in US Cities
3230///
3231/// ## Description:
3232///
3233/// The average amount of precipitation (rainfall) in inches for each
3234/// of 70 United States (and Puerto Rico) cities.
3235///
3236/// ## Usage:
3237///
3238/// precip
3239///
3240/// ## Format:
3241///
3242/// A named vector of length 70.
3243///
3244/// ## Note:
3245///
3246/// The dataset version up to Nov.16, 2016 had a typo in
3247/// ‘"Cincinnati"’'s name.  The examples show how to recreate that
3248/// version.
3249///
3250/// ## Source:
3251///
3252/// Statistical Abstracts of the United States, 1975.
3253///
3254/// ## References:
3255///
3256/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
3257/// Wiley.
3258///
3259/// ## Examples:
3260///
3261/// ```r
3262/// require(graphics)
3263/// dotchart(precip[order(precip)], main = "precip data")
3264/// title(sub = "Average annual precipitation (in.)")
3265///
3266/// ## Old ("wrong") version of dataset (just name change):
3267/// precip.O <- local({
3268/// p <- precip; names(p)[names(p) == "Cincinnati"] <- "Cincinati" ; p })
3269/// stopifnot(all(precip == precip.O),
3270/// match("Cincinnati", names(precip)) == 46,
3271/// identical(names(precip)[-46], names(precip.O)[-46]))
3272/// ```
3273pub fn precip() -> PolarsResult<DataFrame> {
3274    CsvReader::new(Cursor::new(include_str!("precip.csv"))).finish()
3275}
3276
3277/// # Quarterly Approval Ratings of US Presidents
3278///
3279/// ## Description:
3280///
3281/// The (approximately) quarterly approval rating for the President of
3282/// the United States from the first quarter of 1945 to the last
3283/// quarter of 1974.
3284///
3285/// ## Usage:
3286///
3287/// presidents
3288///
3289/// ## Format:
3290///
3291/// A time series of 120 values.
3292///
3293/// ## Details:
3294///
3295/// The data are actually a fudged version of the approval ratings.
3296/// See McNeil's book for details.
3297///
3298/// ## Source:
3299///
3300/// The Gallup Organisation.
3301///
3302/// ## References:
3303///
3304/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
3305/// Wiley.
3306///
3307/// ## Examples:
3308///
3309/// ```r
3310/// require(stats); require(graphics)
3311/// plot(presidents, las = 1, ylab = "Approval rating (%)",
3312///  main = "presidents data")
3313/// ```
3314pub fn presidents() -> PolarsResult<DataFrame> {
3315    CsvReader::new(Cursor::new(include_str!("presidents.csv"))).finish()
3316}
3317
3318/// # Vapor Pressure of Mercury as a Function of Temperature
3319///
3320/// ## Description:
3321///
3322/// Data on the relation between temperature in degrees Celsius and
3323/// vapor pressure of mercury in millimeters (of mercury).
3324///
3325/// ## Usage:
3326///
3327/// pressure
3328///
3329/// ## Format:
3330///
3331/// A data frame with 19 observations on 2 variables.
3332///
3333/// * \[, 1\]  temperature  numeric  temperature (deg C)
3334/// * \[, 2\]  pressure  numeric  pressure (mm)
3335///
3336/// ## Source:
3337///
3338/// Weast, R. C., ed. (1973) _Handbook of Chemistry and Physics_.  CRC
3339/// Press.
3340///
3341/// ## References:
3342///
3343/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
3344/// Wiley.
3345///
3346/// ## Examples:
3347///
3348/// ```r
3349/// require(graphics)
3350/// plot(pressure, xlab = "Temperature (deg C)",
3351///  ylab = "Pressure (mm of Hg)",
3352///  main = "pressure data: Vapor Pressure of Mercury")
3353/// plot(pressure, xlab = "Temperature (deg C)",  log = "y",
3354///  ylab = "Pressure (mm of Hg)",
3355///  main = "pressure data: Vapor Pressure of Mercury")
3356/// ```
3357pub fn pressure() -> PolarsResult<DataFrame> {
3358    CsvReader::new(Cursor::new(include_str!("pressure.csv"))).finish()
3359}
3360
3361/// # Reaction Velocity of an Enzymatic Reaction
3362///
3363/// ## Description:
3364///
3365/// The ‘Puromycin’ data frame has 23 rows and 3 columns of the
3366/// reaction velocity versus substrate concentration in an enzymatic
3367/// reaction involving untreated cells or cells treated with
3368/// Puromycin.
3369///
3370/// ## Usage:
3371///
3372/// Puromycin
3373///
3374/// ## Format:
3375///
3376/// This data frame contains the following columns:
3377///
3378/// * ‘conc’ a numeric vector of substrate concentrations (ppm)
3379/// * ‘rate’ a numeric vector of instantaneous reaction rates
3380/// (counts/min/min)
3381/// * ‘state’ a factor with levels ‘treated’ ‘untreated’
3382///
3383/// ## Details:
3384///
3385/// Data on the velocity of an enzymatic reaction were obtained by
3386/// Treloar (1974).  The number of counts per minute of radioactive
3387/// product from the reaction was measured as a function of substrate
3388/// concentration in parts per million (ppm) and from these counts the
3389/// initial rate (or velocity) of the reaction was calculated
3390/// (counts/min/min).  The experiment was conducted once with the
3391/// enzyme treated with Puromycin, and once with the enzyme untreated.
3392///
3393/// ## Source:
3394///
3395/// Bates, D.M. and Watts, D.G. (1988), _Nonlinear Regression Analysis
3396/// and Its Applications_, Wiley, Appendix A1.3.
3397///
3398/// Treloar, M. A. (1974), _Effects of Puromycin on
3399/// Galactosyltransferase in Golgi Membranes_, M.Sc. Thesis, U. of
3400/// Toronto.
3401///
3402/// ## See Also:
3403///
3404/// ‘SSmicmen’ for other models fitted to this dataset.
3405///
3406/// ## Examples:
3407///
3408/// ```r
3409/// require(stats); require(graphics)
3410///
3411/// plot(rate ~ conc, data = Puromycin, las = 1,
3412///  xlab = "Substrate concentration (ppm)",
3413///  ylab = "Reaction velocity (counts/min/min)",
3414///  pch = as.integer(Puromycin$state),
3415///  col = as.integer(Puromycin$state),
3416///  main = "Puromycin data and fitted Michaelis-Menten curves")
3417/// ## simplest form of fitting the Michaelis-Menten model to these data
3418/// fm1 <- nls(rate ~ Vm * conc/(K + conc), data = Puromycin,
3419///  subset = state == "treated",
3420///  start = c(Vm = 200, K = 0.05))
3421/// fm2 <- nls(rate ~ Vm * conc/(K + conc), data = Puromycin,
3422///  subset = state == "untreated",
3423///  start = c(Vm = 160, K = 0.05))
3424/// summary(fm1)
3425/// summary(fm2)
3426/// ## add fitted lines to the plot
3427/// conc <- seq(0, 1.2, length.out = 101)
3428/// lines(conc, predict(fm1, list(conc = conc)), lty = 1, col = 1)
3429/// lines(conc, predict(fm2, list(conc = conc)), lty = 2, col = 2)
3430/// legend(0.8, 120, levels(Puromycin$state),
3431/// col = 1:2, lty = 1:2, pch = 1:2)
3432///
3433/// ## using partial linearity
3434/// fm3 <- nls(rate ~ conc/(K + conc), data = Puromycin,
3435///  subset = state == "treated", start = c(K = 0.05),
3436///  algorithm = "plinear")
3437/// ```
3438pub fn puromycin() -> PolarsResult<DataFrame> {
3439    CsvReader::new(Cursor::new(include_str!("Puromycin.csv"))).finish()
3440}
3441
3442/// # Locations of Earthquakes off Fiji
3443///
3444/// ## Description:
3445///
3446/// The data set give the locations of 1000 seismic events of MB >
3447/// 4.0.  The events occurred in a cube near Fiji since 1964.
3448///
3449/// ## Usage:
3450///
3451/// quakes
3452///
3453/// ## Format:
3454///
3455/// A data frame with 1000 observations on 5 variables.
3456///
3457/// * \[,1\]  lat numeric  Latitude of event
3458/// * \[,2\]  longnumeric  Longitude
3459/// * \[,3\]  depth  numeric  Depth (km)
3460/// * \[,4\]  mag numeric  Richter Magnitude
3461/// * \[,5\]  stations  numeric  Number of stations reporting
3462///
3463/// ## Details:
3464///
3465/// There are two clear planes of seismic activity.  One is a major
3466/// plate junction; the other is the Tonga trench off New Zealand.
3467/// These data constitute a subsample from a larger dataset of
3468/// containing 5000 observations.
3469///
3470/// ## Source:
3471///
3472/// This is one of the Harvard PRIM-H project data sets.  They in turn
3473/// obtained it from Dr. John Woodhouse, Dept. of Geophysics, Harvard
3474/// University.
3475///
3476/// ## Examples:
3477///
3478/// ```r
3479/// require(graphics)
3480/// pairs(quakes, main = "Fiji Earthquakes, N = 1000", cex.main = 1.2, pch = ".")
3481/// ```
3482pub fn quakes() -> PolarsResult<DataFrame> {
3483    CsvReader::new(Cursor::new(include_str!("quakes.csv"))).finish()
3484}
3485
3486/// # Random Numbers from Congruential Generator RANDU
3487///
3488/// ## Description:
3489///
3490/// 400 triples of successive random numbers were taken from the VAX
3491/// FORTRAN function RANDU running under VMS 1.5.
3492///
3493/// ## Usage:
3494///
3495/// randu
3496///
3497/// ## Format:
3498///
3499/// A data frame with 400 observations on 3 variables named ‘x’, ‘y’
3500/// and ‘z’ which give the first, second and third random number in
3501/// the triple.
3502///
3503/// ## Details:
3504///
3505/// In three dimensional displays it is evident that the triples fall
3506/// on 15 parallel planes in 3-space. This can be shown theoretically
3507/// to be true for all triples from the RANDU generator.
3508///
3509/// These particular 400 triples start 5 apart in the sequence, that
3510/// is they are ((U\[5i+1\], U\[5i+2\], U\[5i+3\]), i= 0, ..., 399), and
3511/// they are rounded to 6 decimal places.
3512///
3513/// Under VMS versions 2.0 and higher, this problem has been fixed.
3514///
3515/// ## Source:
3516///
3517/// David Donoho
3518///
3519/// ## Examples:
3520///
3521/// ```r
3522/// ## We could re-generate the dataset by the following R code
3523/// seed <- as.double(1)
3524/// RANDU <- function() {
3525/// seed <<- ((2^16 + 3) * seed) %% (2^31)
3526/// seed/(2^31)
3527/// }
3528/// for(i in 1:400) {
3529/// U <- c(RANDU(), RANDU(), RANDU(), RANDU(), RANDU())
3530/// print(round(U[1:3], 6))
3531/// }
3532/// ```
3533pub fn randu() -> PolarsResult<DataFrame> {
3534    CsvReader::new(Cursor::new(include_str!("randu.csv"))).finish()
3535}
3536
3537/// # Lengths of Major North American Rivers
3538///
3539/// ## Description:
3540///
3541/// This data set gives the lengths (in miles) of 141 “major” rivers
3542/// in North America, as compiled by the US Geological Survey.
3543///
3544/// ## Usage:
3545///
3546/// rivers
3547///
3548/// ## Format:
3549///
3550/// A vector containing 141 observations.
3551///
3552/// ## Source:
3553///
3554/// World Almanac and Book of Facts, 1975, page 406.
3555///
3556/// ## References:
3557///
3558/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
3559/// Wiley.
3560pub fn rivers() -> PolarsResult<DataFrame> {
3561    CsvReader::new(Cursor::new(include_str!("rivers.csv"))).finish()
3562}
3563
3564/// # Measurements on Petroleum Rock Samples
3565///
3566/// ## Description:
3567///
3568/// Measurements on 48 rock samples from a petroleum reservoir.
3569///
3570/// ## Usage:
3571///
3572/// rock
3573///
3574/// ## Format:
3575///
3576/// A data frame with 48 rows and 4 numeric columns.
3577///
3578/// * \[,1\]  areaarea of pores space, in pixels
3579/// out of 256 by 256
3580/// * \[,2\]  periperimeter in pixels
3581/// * \[,3\]  shape  perimeter/sqrt(area)
3582/// * \[,4\]  permpermeability in milli-Darcies
3583///
3584/// ## Details:
3585///
3586/// Twelve core samples from petroleum reservoirs were sampled by 4
3587/// cross-sections.  Each core sample was measured for permeability,
3588/// and each cross-section has total area of pores, total perimeter of
3589/// pores, and shape.
3590///
3591/// ## Source:
3592///
3593/// Data from BP Research, image analysis by Ronit Katz, U. Oxford.
3594pub fn rock() -> PolarsResult<DataFrame> {
3595    CsvReader::new(Cursor::new(include_str!("rock.csv"))).finish()
3596}
3597
3598/// # Road Casualties in Great Britain 1969-84
3599///
3600/// ## Description:
3601///
3602/// ‘UKDriverDeaths’ is a time series giving the monthly totals of car
3603/// drivers in Great Britain killed or seriously injured Jan 1969 to
3604/// Dec 1984.  Compulsory wearing of seat belts was introduced on 31
3605/// Jan 1983.
3606///
3607/// ‘Seatbelts’ is more information on the same problem.
3608///
3609/// ## Usage:
3610///
3611/// UKDriverDeaths
3612/// Seatbelts
3613///
3614/// ## Format:
3615///
3616/// * ‘Seatbelts’ is a multiple time series, with columns
3617/// * ‘DriversKilled’ car drivers killed.
3618/// * ‘drivers’ same as ‘UKDriverDeaths’.
3619/// * ‘front’ front-seat passengers killed or seriously injured.
3620/// * ‘rear’ rear-seat passengers killed or seriously injured.
3621/// * ‘kms’ distance driven.
3622/// * ‘PetrolPrice’ petrol price.
3623/// * ‘VanKilled’ number of van (‘light goods vehicle’) drivers.
3624/// * ‘law’ 0/1: was the law in effect that month?
3625///
3626/// ## Source:
3627///
3628/// Harvey, A.C. (1989).  _Forecasting, Structural Time Series Models
3629/// and the Kalman Filter_.  Cambridge University Press, pp. 519-523.
3630///
3631/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
3632/// State Space Methods_.  Oxford University Press.
3633///
3634/// References:
3635///
3636/// Harvey, A. C. and Durbin, J. (1986).  The effects of seat belt
3637/// legislation on British road casualties: A case study in structural
3638/// time series modelling.  _Journal of the Royal Statistical Society_
3639/// series A, *149*, 187-227.  doi:10.2307/2981553
3640/// <https://doi.org/10.2307/2981553>.
3641///
3642/// ## Examples:
3643///
3644/// ```r
3645/// require(stats); require(graphics)
3646/// ## work with pre-seatbelt period to identify a model, use logs
3647/// work <- window(log10(UKDriverDeaths), end = 1982+11/12)
3648/// par(mfrow = c(3, 1))
3649/// plot(work); acf(work); pacf(work)
3650/// par(mfrow = c(1, 1))
3651/// (fit <- arima(work, c(1, 0, 0), seasonal = list(order = c(1, 0, 0))))
3652/// z <- predict(fit, n.ahead = 24)
3653/// ts.plot(log10(UKDriverDeaths), z$pred, z$pred+2*z$se, z$pred-2*z$se,
3654///  lty = c(1, 3, 2, 2), col = c("black", "red", "blue", "blue"))
3655///
3656/// ## now see the effect of the explanatory variables
3657/// X <- Seatbelts[, c("kms", "PetrolPrice", "law")]
3658/// X[, 1] <- log10(X[, 1]) - 4
3659/// arima(log10(Seatbelts[, "drivers"]), c(1, 0, 0),
3660/// seasonal = list(order = c(1, 0, 0)), xreg = X)
3661/// ```
3662pub fn seatbelts() -> PolarsResult<DataFrame> {
3663    CsvReader::new(Cursor::new(include_str!("Seatbelts.csv"))).finish()
3664}
3665
3666/// # Student's Sleep Data
3667///
3668/// ## Description:
3669///
3670/// Data which show the effect of two soporific drugs (increase in
3671/// hours of sleep compared to control) on 10 patients.
3672///
3673/// ## Usage:
3674///
3675/// sleep
3676///
3677/// ## Format:
3678///
3679/// A data frame with 20 observations on 3 variables.
3680///
3681/// * \[, 1\]  extra  numeric  increase in hours of sleep
3682/// * \[, 2\]  group  factordrug given
3683/// * \[, 3\]  ID  factorpatient ID
3684///
3685/// ## Details:
3686///
3687/// The ‘group’ variable name may be misleading about the data: They
3688/// represent measurements on 10 persons, not in groups.
3689///
3690/// ## Source:
3691///
3692/// Cushny, A. R. and Peebles, A. R. (1905) The action of optical
3693/// isomers: II hyoscines.  _The Journal of Physiology_ *32*, 501-510.
3694///
3695/// Student (1908) The probable error of the mean.  _Biometrika_, *6*,
3696/// 20.
3697///
3698/// ## References:
3699///
3700/// Scheffé, Henry (1959) _The Analysis of Variance_.  New York, NY:
3701/// Wiley.
3702///
3703/// ## Examples:
3704///
3705/// ```r
3706/// require(stats)
3707/// ## Student's paired t-test
3708/// with(sleep,
3709///  t.test(extra[group == 1],
3710/// extra[group == 2], paired = TRUE))
3711///
3712/// ## The sleep *prolongations*
3713/// sleep1 <- with(sleep, extra[group == 2] - extra[group == 1])
3714/// summary(sleep1)
3715/// stripchart(sleep1, method = "stack", xlab = "hours",
3716///  main = "Sleep prolongation (n = 10)")
3717/// boxplot(sleep1, horizontal = TRUE, add = TRUE,
3718///  at = .6, pars = list(boxwex = 0.5, staplewex = 0.25))
3719/// ```
3720pub fn sleep() -> PolarsResult<DataFrame> {
3721    CsvReader::new(Cursor::new(include_str!("sleep.csv"))).finish()
3722}
3723
3724/// # Brownlee's Stack Loss Plant Data
3725///
3726/// ## Description:
3727///
3728/// Operational data of a plant for the oxidation of ammonia to nitric
3729/// acid.
3730///
3731/// ## Usage:
3732///
3733/// stackloss
3734///
3735/// stack.x
3736/// stack.loss
3737///
3738/// ## Format:
3739///
3740/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
3741///
3742/// * \[,1\]  ‘Air Flow’ Flow of cooling air
3743/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
3744/// Temperature
3745/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
3746/// 1000, minus 500\]
3747/// * \[,4\]  ‘stack.loss’  Stack loss
3748///
3749/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
3750/// with the first three (independent) variables of the data frame,
3751/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
3752/// variable, are provided as well.
3753///
3754/// ## Details:
3755///
3756/// “Obtained from 21 days of operation of a plant for the oxidation
3757/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
3758/// produced are absorbed in a countercurrent absorption tower”.
3759/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
3760///
3761/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
3762/// Temp’ is the temperature of cooling water circulated through coils
3763/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
3764/// acid circulating, minus 50, times 10: that is, 89 corresponds to
3765/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
3766/// times the percentage of the ingoing ammonia to the plant that
3767/// escapes from the absorption column unabsorbed; that is, an
3768/// (inverse) measure of the over-all efficiency of the plant.
3769///
3770/// ## Source:
3771///
3772/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
3773/// Methodology in Science and Engineering_.  New York: Wiley. pp.
3774/// 491-500.
3775///
3776/// ## References:
3777///
3778/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
3779/// Language_.  Wadsworth & Brooks/Cole.
3780///
3781/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
3782/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
3783/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
3784/// Statistics_ *109*, Springer-Verlag, New York.
3785///
3786/// ## Examples:
3787///
3788/// ```r
3789/// require(stats)
3790/// summary(lm.stack <- lm(stack.loss ~ stack.x))
3791/// ```
3792pub fn stack_loss() -> PolarsResult<DataFrame> {
3793    CsvReader::new(Cursor::new(include_str!("stack.loss.csv"))).finish()
3794}
3795
3796/// # Brownlee's Stack Loss Plant Data
3797///
3798/// ## Description:
3799///
3800/// Operational data of a plant for the oxidation of ammonia to nitric
3801/// acid.
3802///
3803/// ## Usage:
3804///
3805/// stackloss
3806///
3807/// stack.x
3808/// stack.loss
3809///
3810/// ## Format:
3811///
3812/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
3813///
3814/// * \[,1\]  ‘Air Flow’ Flow of cooling air
3815/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
3816/// Temperature
3817/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
3818/// 1000, minus 500\]
3819/// * \[,4\]  ‘stack.loss’  Stack loss
3820///
3821/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
3822/// with the first three (independent) variables of the data frame,
3823/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
3824/// variable, are provided as well.
3825///
3826/// ## Details:
3827///
3828/// “Obtained from 21 days of operation of a plant for the oxidation
3829/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
3830/// produced are absorbed in a countercurrent absorption tower”.
3831/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
3832///
3833/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
3834/// Temp’ is the temperature of cooling water circulated through coils
3835/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
3836/// acid circulating, minus 50, times 10: that is, 89 corresponds to
3837/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
3838/// times the percentage of the ingoing ammonia to the plant that
3839/// escapes from the absorption column unabsorbed; that is, an
3840/// (inverse) measure of the over-all efficiency of the plant.
3841///
3842/// ## Source:
3843///
3844/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
3845/// Methodology in Science and Engineering_.  New York: Wiley. pp.
3846/// 491-500.
3847///
3848/// ## References:
3849///
3850/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
3851/// Language_.  Wadsworth & Brooks/Cole.
3852///
3853/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
3854/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
3855/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
3856/// Statistics_ *109*, Springer-Verlag, New York.
3857///
3858/// ## Examples:
3859///
3860/// ```r
3861/// require(stats)
3862/// summary(lm.stack <- lm(stack.loss ~ stack.x))
3863/// ```
3864pub fn stack_x() -> PolarsResult<DataFrame> {
3865    CsvReader::new(Cursor::new(include_str!("stack.x.csv"))).finish()
3866}
3867
3868/// # Brownlee's Stack Loss Plant Data
3869///
3870/// ## Description:
3871///
3872/// Operational data of a plant for the oxidation of ammonia to nitric
3873/// acid.
3874///
3875/// ## Usage:
3876///
3877/// stackloss
3878///
3879/// stack.x
3880/// stack.loss
3881///
3882/// ## Format:
3883///
3884/// ‘stackloss’ is a data frame with 21 observations on 4 variables.
3885///
3886/// * \[,1\]  ‘Air Flow’ Flow of cooling air
3887/// * \[,2\]  ‘Water Temp’  Cooling Water Inlet
3888/// Temperature
3889/// * \[,3\]  ‘Acid Conc.’  Concentration of acid \[per
3890/// 1000, minus 500\]
3891/// * \[,4\]  ‘stack.loss’  Stack loss
3892///
3893/// For compatibility with S-PLUS, the data sets ‘stack.x’, a matrix
3894/// with the first three (independent) variables of the data frame,
3895/// and ‘stack.loss’, the numeric vector giving the fourth (dependent)
3896/// variable, are provided as well.
3897///
3898/// ## Details:
3899///
3900/// “Obtained from 21 days of operation of a plant for the oxidation
3901/// of ammonia (NH3) to nitric acid (HNO3).  The nitric oxides
3902/// produced are absorbed in a countercurrent absorption tower”.
3903/// (Brownlee, cited by Dodge, slightly reformatted by MM.)
3904///
3905/// ‘Air Flow’ represents the rate of operation of the plant.  ‘Water
3906/// Temp’ is the temperature of cooling water circulated through coils
3907/// in the absorption tower.  ‘Acid Conc.’ is the concentration of the
3908/// acid circulating, minus 50, times 10: that is, 89 corresponds to
3909/// 58.9 per cent acid.  ‘stack.loss’ (the dependent variable) is 10
3910/// times the percentage of the ingoing ammonia to the plant that
3911/// escapes from the absorption column unabsorbed; that is, an
3912/// (inverse) measure of the over-all efficiency of the plant.
3913///
3914/// ## Source:
3915///
3916/// Brownlee, K. A. (1960, 2nd ed. 1965) _Statistical Theory and
3917/// Methodology in Science and Engineering_.  New York: Wiley. pp.
3918/// 491-500.
3919///
3920/// ## References:
3921///
3922/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
3923/// Language_.  Wadsworth & Brooks/Cole.
3924///
3925/// Dodge, Y. (1996) The guinea pig of multiple regression. In:
3926/// _Robust Statistics, Data Analysis, and Computer Intensive Methods;
3927/// In Honor of Peter Huber's 60th Birthday_, 1996, _Lecture Notes in
3928/// Statistics_ *109*, Springer-Verlag, New York.
3929///
3930/// ## Examples:
3931///
3932/// ```r
3933/// require(stats)
3934/// summary(lm.stack <- lm(stack.loss ~ stack.x))
3935/// ```
3936pub fn stackloss() -> PolarsResult<DataFrame> {
3937    CsvReader::new(Cursor::new(include_str!("stackloss.csv"))).finish()
3938}
3939
3940/// # US State Facts and Figures
3941///
3942/// ## Description:
3943///
3944/// Data sets related to the 50 states of the United States of
3945/// America.
3946///
3947/// ## Usage:
3948///
3949/// state.abb
3950/// state.area
3951/// state.center
3952/// state.division
3953/// state.name
3954/// state.region
3955/// state.x77
3956///
3957/// ## Details:
3958///
3959/// R currently contains the following “state” data sets.  Note that
3960/// all data are arranged according to alphabetical order of the state
3961/// names.
3962///
3963/// * ‘state.abb’: character vector of 2-letter abbreviations for the
3964/// state names.
3965/// * ‘state.area’: numeric vector of state areas (in square miles).
3966/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
3967/// approximate geographic center of each state in negative
3968/// longitude and latitude.  Alaska and Hawaii are placed just
3969/// off the West Coast.  See ‘Examples’ on how to “correct”.
3970/// * ‘state.division’: ‘factor’ giving state divisions (New England,
3971/// Middle Atlantic, South Atlantic, East South Central, West
3972/// South Central, East North Central, West North Central,
3973/// Mountain, and Pacific).
3974/// * ‘state.name’: character vector giving the full state names.
3975/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
3976/// North Central, West) that each state belongs to.
3977/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
3978/// following statistics in the respective columns.
3979/// * ‘Population’: population estimate as of July 1, 1975
3980/// * ‘Income’: per capita income (1974)
3981/// * ‘Illiteracy’: illiteracy (1970, percent of population)
3982/// * ‘Life Exp’: life expectancy in years (1969-71)
3983/// * ‘Murder’: murder and non-negligent manslaughter rate per
3984/// 100,000 population (1976)
3985/// * ‘HS Grad’: percent high-school graduates (1970)
3986/// * ‘Frost’: mean number of days with minimum temperature below
3987/// freezing (1931-1960) in capital or large city
3988/// * ‘Area’: land area in square miles
3989///
3990/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
3991/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
3992///
3993/// ## Source:
3994///
3995/// U.S. Department of Commerce, Bureau of the Census (1977)
3996/// _Statistical Abstract of the United States_.
3997///
3998/// U.S. Department of Commerce, Bureau of the Census (1977) _County
3999/// and City Data Book_.
4000///
4001/// ## References:
4002///
4003/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4004/// Language_.  Wadsworth & Brooks/Cole.
4005///
4006/// ## Examples:
4007///
4008/// ```r
4009/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4010/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4011/// dst[c("AK", "HI"),]
4012/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4013/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4014/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4015/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4016/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4017/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4018/// state.center2 <- as.list(dxy)
4019///
4020/// plot(dxy, asp=1.2, pch=3, col=2)
4021/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4022/// i <- c("AK","HI")
4023/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4024/// col=adjustcolor(4, .7), length=1/8))
4025/// points(dst[i,], col=2)
4026/// if(FALSE) { # if(require("maps")) {
4027/// map("state", interior = FALSE, add = TRUE)
4028/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4029/// }
4030/// ```
4031pub fn state_abb() -> PolarsResult<DataFrame> {
4032    CsvReader::new(Cursor::new(include_str!("state.abb.csv"))).finish()
4033}
4034
4035/// # US State Facts and Figures
4036///
4037/// ## Description:
4038///
4039/// Data sets related to the 50 states of the United States of
4040/// America.
4041///
4042/// ## Usage:
4043///
4044/// state.abb
4045/// state.area
4046/// state.center
4047/// state.division
4048/// state.name
4049/// state.region
4050/// state.x77
4051///
4052/// ## Details:
4053///
4054/// R currently contains the following “state” data sets.  Note that
4055/// all data are arranged according to alphabetical order of the state
4056/// names.
4057///
4058/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4059/// state names.
4060/// * ‘state.area’: numeric vector of state areas (in square miles).
4061/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4062/// approximate geographic center of each state in negative
4063/// longitude and latitude.  Alaska and Hawaii are placed just
4064/// off the West Coast.  See ‘Examples’ on how to “correct”.
4065/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4066/// Middle Atlantic, South Atlantic, East South Central, West
4067/// South Central, East North Central, West North Central,
4068/// Mountain, and Pacific).
4069/// * ‘state.name’: character vector giving the full state names.
4070/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4071/// North Central, West) that each state belongs to.
4072/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4073/// following statistics in the respective columns.
4074/// * ‘Population’: population estimate as of July 1, 1975
4075/// * ‘Income’: per capita income (1974)
4076/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4077/// * ‘Life Exp’: life expectancy in years (1969-71)
4078/// * ‘Murder’: murder and non-negligent manslaughter rate per
4079/// 100,000 population (1976)
4080/// * ‘HS Grad’: percent high-school graduates (1970)
4081/// * ‘Frost’: mean number of days with minimum temperature below
4082/// freezing (1931-1960) in capital or large city
4083/// * ‘Area’: land area in square miles
4084///
4085/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4086/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4087///
4088/// ## Source:
4089///
4090/// U.S. Department of Commerce, Bureau of the Census (1977)
4091/// _Statistical Abstract of the United States_.
4092///
4093/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4094/// and City Data Book_.
4095///
4096/// ## References:
4097///
4098/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4099/// Language_.  Wadsworth & Brooks/Cole.
4100///
4101/// ## Examples:
4102///
4103/// ```r
4104/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4105/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4106/// dst[c("AK", "HI"),]
4107/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4108/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4109/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4110/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4111/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4112/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4113/// state.center2 <- as.list(dxy)
4114///
4115/// plot(dxy, asp=1.2, pch=3, col=2)
4116/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4117/// i <- c("AK","HI")
4118/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4119/// col=adjustcolor(4, .7), length=1/8))
4120/// points(dst[i,], col=2)
4121/// if(FALSE) { # if(require("maps")) {
4122/// map("state", interior = FALSE, add = TRUE)
4123/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4124/// }
4125/// ```
4126pub fn state_area() -> PolarsResult<DataFrame> {
4127    CsvReader::new(Cursor::new(include_str!("state.area.csv"))).finish()
4128}
4129
4130/// # US State Facts and Figures
4131///
4132/// ## Description:
4133///
4134/// Data sets related to the 50 states of the United States of
4135/// America.
4136///
4137/// ## Usage:
4138///
4139/// state.abb
4140/// state.area
4141/// state.center
4142/// state.division
4143/// state.name
4144/// state.region
4145/// state.x77
4146///
4147/// ## Details:
4148///
4149/// R currently contains the following “state” data sets.  Note that
4150/// all data are arranged according to alphabetical order of the state
4151/// names.
4152///
4153/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4154/// state names.
4155/// * ‘state.area’: numeric vector of state areas (in square miles).
4156/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4157/// approximate geographic center of each state in negative
4158/// longitude and latitude.  Alaska and Hawaii are placed just
4159/// off the West Coast.  See ‘Examples’ on how to “correct”.
4160/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4161/// Middle Atlantic, South Atlantic, East South Central, West
4162/// South Central, East North Central, West North Central,
4163/// Mountain, and Pacific).
4164/// * ‘state.name’: character vector giving the full state names.
4165/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4166/// North Central, West) that each state belongs to.
4167/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4168/// following statistics in the respective columns.
4169/// * ‘Population’: population estimate as of July 1, 1975
4170/// * ‘Income’: per capita income (1974)
4171/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4172/// * ‘Life Exp’: life expectancy in years (1969-71)
4173/// * ‘Murder’: murder and non-negligent manslaughter rate per
4174/// 100,000 population (1976)
4175/// * ‘HS Grad’: percent high-school graduates (1970)
4176/// * ‘Frost’: mean number of days with minimum temperature below
4177/// freezing (1931-1960) in capital or large city
4178/// * ‘Area’: land area in square miles
4179///
4180/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4181/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4182///
4183/// ## Source:
4184///
4185/// U.S. Department of Commerce, Bureau of the Census (1977)
4186/// _Statistical Abstract of the United States_.
4187///
4188/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4189/// and City Data Book_.
4190///
4191/// ## References:
4192///
4193/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4194/// Language_.  Wadsworth & Brooks/Cole.
4195///
4196/// ## Examples:
4197///
4198/// ```r
4199/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4200/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4201/// dst[c("AK", "HI"),]
4202/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4203/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4204/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4205/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4206/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4207/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4208/// state.center2 <- as.list(dxy)
4209///
4210/// plot(dxy, asp=1.2, pch=3, col=2)
4211/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4212/// i <- c("AK","HI")
4213/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4214/// col=adjustcolor(4, .7), length=1/8))
4215/// points(dst[i,], col=2)
4216/// if(FALSE) { # if(require("maps")) {
4217/// map("state", interior = FALSE, add = TRUE)
4218/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4219/// }
4220/// ```
4221pub fn state_center() -> PolarsResult<DataFrame> {
4222    CsvReader::new(Cursor::new(include_str!("state.center.csv"))).finish()
4223}
4224
4225/// # US State Facts and Figures
4226///
4227/// ## Description:
4228///
4229/// Data sets related to the 50 states of the United States of
4230/// America.
4231///
4232/// ## Usage:
4233///
4234/// state.abb
4235/// state.area
4236/// state.center
4237/// state.division
4238/// state.name
4239/// state.region
4240/// state.x77
4241///
4242/// ## Details:
4243///
4244/// R currently contains the following “state” data sets.  Note that
4245/// all data are arranged according to alphabetical order of the state
4246/// names.
4247///
4248/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4249/// state names.
4250/// * ‘state.area’: numeric vector of state areas (in square miles).
4251/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4252/// approximate geographic center of each state in negative
4253/// longitude and latitude.  Alaska and Hawaii are placed just
4254/// off the West Coast.  See ‘Examples’ on how to “correct”.
4255/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4256/// Middle Atlantic, South Atlantic, East South Central, West
4257/// South Central, East North Central, West North Central,
4258/// Mountain, and Pacific).
4259/// * ‘state.name’: character vector giving the full state names.
4260/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4261/// North Central, West) that each state belongs to.
4262/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4263/// following statistics in the respective columns.
4264/// * ‘Population’: population estimate as of July 1, 1975
4265/// * ‘Income’: per capita income (1974)
4266/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4267/// * ‘Life Exp’: life expectancy in years (1969-71)
4268/// * ‘Murder’: murder and non-negligent manslaughter rate per
4269/// 100,000 population (1976)
4270/// * ‘HS Grad’: percent high-school graduates (1970)
4271/// * ‘Frost’: mean number of days with minimum temperature below
4272/// freezing (1931-1960) in capital or large city
4273/// * ‘Area’: land area in square miles
4274///
4275/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4276/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4277///
4278/// ## Source:
4279///
4280/// U.S. Department of Commerce, Bureau of the Census (1977)
4281/// _Statistical Abstract of the United States_.
4282///
4283/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4284/// and City Data Book_.
4285///
4286/// ## References:
4287///
4288/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4289/// Language_.  Wadsworth & Brooks/Cole.
4290///
4291/// ## Examples:
4292///
4293/// ```r
4294/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4295/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4296/// dst[c("AK", "HI"),]
4297/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4298/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4299/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4300/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4301/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4302/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4303/// state.center2 <- as.list(dxy)
4304///
4305/// plot(dxy, asp=1.2, pch=3, col=2)
4306/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4307/// i <- c("AK","HI")
4308/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4309/// col=adjustcolor(4, .7), length=1/8))
4310/// points(dst[i,], col=2)
4311/// if(FALSE) { # if(require("maps")) {
4312/// map("state", interior = FALSE, add = TRUE)
4313/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4314/// }
4315/// ```
4316pub fn state_division() -> PolarsResult<DataFrame> {
4317    CsvReader::new(Cursor::new(include_str!("state.division.csv"))).finish()
4318}
4319
4320/// # US State Facts and Figures
4321///
4322/// ## Description:
4323///
4324/// Data sets related to the 50 states of the United States of
4325/// America.
4326///
4327/// ## Usage:
4328///
4329/// state.abb
4330/// state.area
4331/// state.center
4332/// state.division
4333/// state.name
4334/// state.region
4335/// state.x77
4336///
4337/// ## Details:
4338///
4339/// R currently contains the following “state” data sets.  Note that
4340/// all data are arranged according to alphabetical order of the state
4341/// names.
4342///
4343/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4344/// state names.
4345/// * ‘state.area’: numeric vector of state areas (in square miles).
4346/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4347/// approximate geographic center of each state in negative
4348/// longitude and latitude.  Alaska and Hawaii are placed just
4349/// off the West Coast.  See ‘Examples’ on how to “correct”.
4350/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4351/// Middle Atlantic, South Atlantic, East South Central, West
4352/// South Central, East North Central, West North Central,
4353/// Mountain, and Pacific).
4354/// * ‘state.name’: character vector giving the full state names.
4355/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4356/// North Central, West) that each state belongs to.
4357/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4358/// following statistics in the respective columns.
4359/// * ‘Population’: population estimate as of July 1, 1975
4360/// * ‘Income’: per capita income (1974)
4361/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4362/// * ‘Life Exp’: life expectancy in years (1969-71)
4363/// * ‘Murder’: murder and non-negligent manslaughter rate per
4364/// 100,000 population (1976)
4365/// * ‘HS Grad’: percent high-school graduates (1970)
4366/// * ‘Frost’: mean number of days with minimum temperature below
4367/// freezing (1931-1960) in capital or large city
4368/// * ‘Area’: land area in square miles
4369///
4370/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4371/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4372///
4373/// ## Source:
4374///
4375/// U.S. Department of Commerce, Bureau of the Census (1977)
4376/// _Statistical Abstract of the United States_.
4377///
4378/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4379/// and City Data Book_.
4380///
4381/// ## References:
4382///
4383/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4384/// Language_.  Wadsworth & Brooks/Cole.
4385///
4386/// ## Examples:
4387///
4388/// ```r
4389/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4390/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4391/// dst[c("AK", "HI"),]
4392/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4393/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4394/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4395/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4396/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4397/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4398/// state.center2 <- as.list(dxy)
4399///
4400/// plot(dxy, asp=1.2, pch=3, col=2)
4401/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4402/// i <- c("AK","HI")
4403/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4404/// col=adjustcolor(4, .7), length=1/8))
4405/// points(dst[i,], col=2)
4406/// if(FALSE) { # if(require("maps")) {
4407/// map("state", interior = FALSE, add = TRUE)
4408/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4409/// }
4410/// ```
4411pub fn state_name() -> PolarsResult<DataFrame> {
4412    CsvReader::new(Cursor::new(include_str!("state.name.csv"))).finish()
4413}
4414
4415/// # US State Facts and Figures
4416///
4417/// ## Description:
4418///
4419/// Data sets related to the 50 states of the United States of
4420/// America.
4421///
4422/// ## Usage:
4423///
4424/// state.abb
4425/// state.area
4426/// state.center
4427/// state.division
4428/// state.name
4429/// state.region
4430/// state.x77
4431///
4432/// ## Details:
4433///
4434/// R currently contains the following “state” data sets.  Note that
4435/// all data are arranged according to alphabetical order of the state
4436/// names.
4437///
4438/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4439/// state names.
4440/// * ‘state.area’: numeric vector of state areas (in square miles).
4441/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4442/// approximate geographic center of each state in negative
4443/// longitude and latitude.  Alaska and Hawaii are placed just
4444/// off the West Coast.  See ‘Examples’ on how to “correct”.
4445/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4446/// Middle Atlantic, South Atlantic, East South Central, West
4447/// South Central, East North Central, West North Central,
4448/// Mountain, and Pacific).
4449/// * ‘state.name’: character vector giving the full state names.
4450/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4451/// North Central, West) that each state belongs to.
4452/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4453/// following statistics in the respective columns.
4454/// * ‘Population’: population estimate as of July 1, 1975
4455/// * ‘Income’: per capita income (1974)
4456/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4457/// * ‘Life Exp’: life expectancy in years (1969-71)
4458/// * ‘Murder’: murder and non-negligent manslaughter rate per
4459/// 100,000 population (1976)
4460/// * ‘HS Grad’: percent high-school graduates (1970)
4461/// * ‘Frost’: mean number of days with minimum temperature below
4462/// freezing (1931-1960) in capital or large city
4463/// * ‘Area’: land area in square miles
4464///
4465/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4466/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4467///
4468/// ## Source:
4469///
4470/// U.S. Department of Commerce, Bureau of the Census (1977)
4471/// _Statistical Abstract of the United States_.
4472///
4473/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4474/// and City Data Book_.
4475///
4476/// ## References:
4477///
4478/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4479/// Language_.  Wadsworth & Brooks/Cole.
4480///
4481/// ## Examples:
4482///
4483/// ```r
4484/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4485/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4486/// dst[c("AK", "HI"),]
4487/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4488/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4489/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4490/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4491/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4492/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4493/// state.center2 <- as.list(dxy)
4494///
4495/// plot(dxy, asp=1.2, pch=3, col=2)
4496/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4497/// i <- c("AK","HI")
4498/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4499/// col=adjustcolor(4, .7), length=1/8))
4500/// points(dst[i,], col=2)
4501/// if(FALSE) { # if(require("maps")) {
4502/// map("state", interior = FALSE, add = TRUE)
4503/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4504/// }
4505/// ```
4506pub fn state_region() -> PolarsResult<DataFrame> {
4507    CsvReader::new(Cursor::new(include_str!("state.region.csv"))).finish()
4508}
4509
4510/// # US State Facts and Figures
4511///
4512/// ## Description:
4513///
4514/// Data sets related to the 50 states of the United States of
4515/// America.
4516///
4517/// ## Usage:
4518///
4519/// state.abb
4520/// state.area
4521/// state.center
4522/// state.division
4523/// state.name
4524/// state.region
4525/// state.x77
4526///
4527/// ## Details:
4528///
4529/// R currently contains the following “state” data sets.  Note that
4530/// all data are arranged according to alphabetical order of the state
4531/// names.
4532///
4533/// * ‘state.abb’: character vector of 2-letter abbreviations for the
4534/// state names.
4535/// * ‘state.area’: numeric vector of state areas (in square miles).
4536/// * ‘state.center’: list with components named ‘x’ and ‘y’ giving the
4537/// approximate geographic center of each state in negative
4538/// longitude and latitude.  Alaska and Hawaii are placed just
4539/// off the West Coast.  See ‘Examples’ on how to “correct”.
4540/// * ‘state.division’: ‘factor’ giving state divisions (New England,
4541/// Middle Atlantic, South Atlantic, East South Central, West
4542/// South Central, East North Central, West North Central,
4543/// Mountain, and Pacific).
4544/// * ‘state.name’: character vector giving the full state names.
4545/// * ‘state.region’: ‘factor’ giving the region (Northeast, South,
4546/// North Central, West) that each state belongs to.
4547/// * ‘state.x77’: matrix with 50 rows and 8 columns giving the
4548/// following statistics in the respective columns.
4549/// * ‘Population’: population estimate as of July 1, 1975
4550/// * ‘Income’: per capita income (1974)
4551/// * ‘Illiteracy’: illiteracy (1970, percent of population)
4552/// * ‘Life Exp’: life expectancy in years (1969-71)
4553/// * ‘Murder’: murder and non-negligent manslaughter rate per
4554/// 100,000 population (1976)
4555/// * ‘HS Grad’: percent high-school graduates (1970)
4556/// * ‘Frost’: mean number of days with minimum temperature below
4557/// freezing (1931-1960) in capital or large city
4558/// * ‘Area’: land area in square miles
4559///
4560/// Note that a square mile is by definition exactly ‘(cm(1760 * 3 *
4561/// 12) / 100 / 1000)^2’ km^2, i.e., 2.589988110336 km^2.
4562///
4563/// ## Source:
4564///
4565/// U.S. Department of Commerce, Bureau of the Census (1977)
4566/// _Statistical Abstract of the United States_.
4567///
4568/// U.S. Department of Commerce, Bureau of the Census (1977) _County
4569/// and City Data Book_.
4570///
4571/// ## References:
4572///
4573/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4574/// Language_.  Wadsworth & Brooks/Cole.
4575///
4576/// ## Examples:
4577///
4578/// ```r
4579/// (dst <- dxy <- data.frame(state.center, row.names=state.abb))
4580/// ## Alaska and Hawaii are placed just off the West Coast (for compact map drawing):
4581/// dst[c("AK", "HI"),]
4582/// ## state.center2 := version of state.center with "correct" coordinates for AK & HI:
4583/// ## From https://pubs.usgs.gov/gip/Elevations-Distances/elvadist.html#Geographic%20Centers
4584/// ##Alaska63°50' N., 152°00' W., 60 miles northwest of Mount McKinley
4585/// ##Hawaii20°15' N., 156°20' W., off Maui Island
4586/// dxy["AK",] <- c(-152.  , 63.83) # or  c(-152.11, 65.17)
4587/// dxy["HI",] <- c(-156.33, 20.25) # or  c(-156.69, 20.89)
4588/// state.center2 <- as.list(dxy)
4589///
4590/// plot(dxy, asp=1.2, pch=3, col=2)
4591/// text(state.center2, state.abb, cex=1/2, pos=4, offset=1/4)
4592/// i <- c("AK","HI")
4593/// do.call(arrows, c(setNames(c(dst[i,], dxy[i,]), c("x0","y0", "x1","y1")),
4594/// col=adjustcolor(4, .7), length=1/8))
4595/// points(dst[i,], col=2)
4596/// if(FALSE) { # if(require("maps")) {
4597/// map("state", interior = FALSE, add = TRUE)
4598/// map("state", boundary = FALSE, lty = 2, add = TRUE)
4599/// }
4600/// ```
4601pub fn state_x77() -> PolarsResult<DataFrame> {
4602    CsvReader::new(Cursor::new(include_str!("state.x77.csv"))).finish()
4603}
4604
4605/// # Monthly Sunspot Data, from 1749 to "Present"
4606///
4607/// ## Description:
4608///
4609/// Monthly numbers of sunspots, as from the World Data Center, aka
4610/// SIDC.  This is the version of the data that will occasionally be
4611/// updated when new counts become available.
4612///
4613/// ## Usage:
4614///
4615/// sunspot.month
4616///
4617/// ## Format:
4618///
4619/// The univariate time series ‘sunspot.year’ and ‘sunspot.month’
4620/// contain 289 and 2988 observations, respectively.  The objects are
4621/// of class ‘"ts"’.
4622///
4623/// ## Author(s):
4624///
4625/// R
4626///
4627/// ## Source:
4628///
4629/// WDC-SILSO, Solar Influences Data Analysis Center (SIDC), Royal
4630/// Observatory of Belgium, Av. Circulaire, 3, B-1180 BRUSSELS
4631/// Currently at <http://www.sidc.be/silso/datafiles>
4632///
4633/// ## See Also:
4634///
4635/// ‘sunspot.month’ is a longer version of ‘sunspots’; the latter runs
4636/// until 1983 and is kept fixed (for reproducibility as example
4637/// dataset).
4638///
4639/// ## Examples:
4640///
4641/// ```r
4642/// require(stats); require(graphics)
4643/// ## Compare the monthly series
4644/// plot (sunspot.month,
4645/// main="sunspot.month & sunspots [package'datasets']", col=2)
4646/// lines(sunspots) # -> faint differences where they overlap
4647///
4648/// ## Now look at the difference :
4649/// all(tsp(sunspots)  [c(1,3)] ==
4650/// tsp(sunspot.month)[c(1,3)]) ## Start & Periodicity are the same
4651/// n1 <- length(sunspots)
4652/// table(eq <- sunspots == sunspot.month[1:n1]) #>  132  are different !
4653/// i <- which(!eq)
4654/// rug(time(eq)[i])
4655/// s1 <- sunspots[i] ; s2 <- sunspot.month[i]
4656/// cbind(i = i, time = time(sunspots)[i], sunspots = s1, ss.month = s2,
4657/// perc.diff = round(100*2*abs(s1-s2)/(s1+s2), 1))
4658///
4659/// ## How to recreate the "old" sunspot.month (R <= 3.0.3):
4660/// .sunspot.diff <- cbind(
4661/// i = c(1202L, 1256L, 1258L, 1301L, 1407L, 1429L, 1452L, 1455L,
4662/// 1663L, 2151L, 2329L, 2498L, 2594L, 2694L, 2819L),
4663/// res10 = c(1L, 1L, 1L, -1L, -1L, -1L, 1L, -1L,
4664/// 1L, 1L, 1L, 1L, 1L, 20L, 1L))
4665/// ssm0 <- sunspot.month[1:2988]
4666/// with(as.data.frame(.sunspot.diff), ssm0[i] <<- ssm0[i] - res10/10)
4667/// sunspot.month.0 <- ts(ssm0, start = 1749, frequency = 12)
4668/// ```
4669pub fn sunspot_month() -> PolarsResult<DataFrame> {
4670    CsvReader::new(Cursor::new(include_str!("sunspot.month.csv"))).finish()
4671}
4672
4673/// # Yearly Sunspot Data, 1700-1988
4674///
4675/// ## Description:
4676///
4677/// Yearly numbers of sunspots from 1700 to 1988 (rounded to one
4678/// digit).
4679///
4680/// Note that monthly numbers are available as ‘sunspot.month’, though
4681/// starting slightly later.
4682///
4683/// ## Usage:
4684///
4685/// sunspot.year
4686///
4687/// ### Format:
4688///
4689/// The univariate time series ‘sunspot.year’ contains 289
4690/// observations, and is of class ‘"ts"’.
4691///
4692/// ## Source:
4693///
4694/// H. Tong (1996) _Non-Linear Time Series_. Clarendon Press, Oxford,
4695/// p. 471.
4696///
4697/// ## See Also:
4698///
4699/// For _monthly_ sunspot numbers, see ‘sunspot.month’ and ‘sunspots’.
4700///
4701/// Regularly updated yearly sunspot numbers are available from
4702/// WDC-SILSO, Royal Observatory of Belgium, at
4703/// <http://www.sidc.be/silso/datafiles>
4704///
4705/// ## Examples:
4706///
4707/// ```r
4708/// utils::str(sm <- sunspots)# the monthly version we keep unchanged
4709/// utils::str(sy <- sunspot.year)
4710/// ## The common time interval
4711/// (t1 <- c(max(start(sm), start(sy)),  1)) # Jan 1749
4712/// (t2 <- c(min(  end(sm)[1],end(sy)[1]), 12)) # Dec 1983
4713/// s.m <- window(sm, start=t1, end=t2)
4714/// s.y <- window(sy, start=t1, end=t2[1]) # {irrelevant warning}
4715/// stopifnot(length(s.y) * 12 == length(s.m),
4716/// ## The yearly series *is* close to the averages of the monthly one:
4717/// all.equal(s.y, aggregate(s.m, FUN = mean), tolerance = 0.0020))
4718/// ## NOTE: Strangely, correctly weighting the number of days per month
4719/// ## (using 28.25 for February) is *not* closer than the simple mean:
4720/// ndays <- c(31, 28.25, rep(c(31,30, 31,30, 31), 2))
4721/// all.equal(s.y, aggregate(s.m, FUN = mean))# 0.0013
4722/// all.equal(s.y, aggregate(s.m, FUN = weighted.mean, w = ndays)) # 0.0017
4723/// ```
4724pub fn sunspot_year() -> PolarsResult<DataFrame> {
4725    CsvReader::new(Cursor::new(include_str!("sunspot.year.csv"))).finish()
4726}
4727
4728/// # Monthly Sunspot Numbers, 1749-1983
4729///
4730/// ## Description:
4731///
4732/// Monthly mean relative sunspot numbers from 1749 to 1983.
4733/// Collected at Swiss Federal Observatory, Zurich until 1960, then
4734/// Tokyo Astronomical Observatory.
4735///
4736/// ## Usage:
4737///
4738/// sunspots
4739///
4740/// ## Format:
4741///
4742/// A time series of monthly data from 1749 to 1983.
4743///
4744/// ## Source:
4745///
4746/// Andrews, D. F. and Herzberg, A. M. (1985) _Data: A Collection of
4747/// Problems from Many Fields for the Student and Research Worker_.
4748/// New York: Springer-Verlag.
4749///
4750/// ## See Also:
4751///
4752/// ‘sunspot.month’ has a longer (and a bit different) series,
4753/// ‘sunspot.year’ is a much shorter one.  See there for getting more
4754/// current sunspot numbers.
4755///
4756/// ## Examples:
4757///
4758/// ```r
4759/// require(graphics)
4760/// plot(sunspots, main = "sunspots data", xlab = "Year",
4761///  ylab = "Monthly sunspot numbers")
4762/// ```
4763pub fn sunspots() -> PolarsResult<DataFrame> {
4764    CsvReader::new(Cursor::new(include_str!("sunspots.csv"))).finish()
4765}
4766
4767/// # Swiss Fertility and Socioeconomic Indicators (1888) Data
4768///
4769/// ## Description:
4770///
4771/// Standardized fertility measure and socio-economic indicators for
4772/// each of 47 French-speaking provinces of Switzerland at about 1888.
4773///
4774/// ## Usage:
4775///
4776/// swiss
4777///
4778/// ## Format:
4779///
4780/// A data frame with 47 observations on 6 variables, _each_ of which
4781/// is in percent, i.e., in [0, 100].
4782///
4783/// * \[,1\]  FertilityIg,‘common standardized fertility measure’
4784/// * \[,2\]  Agriculture % of males involved in agriculture
4785/// as occupation
4786/// * \[,3\]  Examination % draftees receiving highest mark
4787/// on army examination
4788/// * \[,4\]  Education% education beyond primary school for draftees.
4789/// * \[,5\]  Catholic % ‘catholic’ (as opposed to ‘protestant’).
4790/// * \[,6\]  Infant.Mortality  live births who live less than 1year.
4791///
4792/// All variables but ‘Fertility’ give proportions of the population.
4793///
4794/// ## Details:
4795///
4796/// (paraphrasing Mosteller and Tukey):
4797///
4798/// Switzerland, in 1888, was entering a period known as the
4799/// _demographic transition_; i.e., its fertility was beginning to
4800/// fall from the high level typical of underdeveloped countries.
4801///
4802/// The data collected are for 47 French-speaking “provinces” at about
4803/// 1888.
4804///
4805/// Here, all variables are scaled to \[0, 100\], where in the original,
4806/// all but ‘"Catholic"’ were scaled to \[0, 1\].
4807///
4808/// ## Note:
4809///
4810/// Files for all 182 districts in 1888 and other years have been
4811/// available at <https://opr.princeton.edu/archive/pefp/switz.aspx>.
4812///
4813/// They state that variables ‘Examination’ and ‘Education’ are
4814/// averages for 1887, 1888 and 1889.
4815///
4816/// ## Source:
4817///
4818/// Project “16P5”, pages 549-551 in
4819///
4820/// Mosteller, F. and Tukey, J. W. (1977) _Data Analysis and
4821/// Regression: A Second Course in Statistics_.  Addison-Wesley,
4822/// Reading Mass.
4823///
4824/// indicating their source as “Data used by permission of Franice van
4825/// de Walle. Office of Population Research, Princeton University,
4826/// 1976.  Unpublished data assembled under NICHD contract number No
4827/// 1-HD-O-2077.”
4828///
4829/// ## References:
4830///
4831/// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
4832/// Language_.  Wadsworth & Brooks/Cole.
4833///
4834/// ## Examples:
4835///
4836/// ```r
4837/// require(stats); require(graphics)
4838/// pairs(swiss, panel = panel.smooth, main = "swiss data",
4839/// col = 3 + (swiss$Catholic > 50))
4840/// summary(lm(Fertility ~ . , data = swiss))
4841/// ```
4842pub fn swiss() -> PolarsResult<DataFrame> {
4843    CsvReader::new(Cursor::new(include_str!("swiss.csv"))).finish()
4844}
4845
4846/// # Pharmacokinetics of Theophylline
4847///
4848/// ## Description:
4849///
4850/// The ‘Theoph’ data frame has 132 rows and 5 columns of data from an
4851/// experiment on the pharmacokinetics of theophylline.
4852///
4853/// ## Usage:
4854///
4855/// Theoph
4856///
4857/// ## Format:
4858///
4859/// An object of class ‘c("nfnGroupedData", "nfGroupedData",
4860/// "groupedData", "data.frame")’ containing the following columns:
4861///
4862/// Subject an ordered factor with levels ‘1’, ..., ‘12’ identifying
4863///  the subject on whom the observation was made.  The ordering
4864///  is by increasing maximum concentration of theophylline
4865///  observed.
4866///
4867/// * Wt weight of the subject (kg).
4868/// * Dose dose of theophylline administered orally to the subject
4869/// (mg/kg).
4870/// * Time time since drug administration when the sample was drawn
4871/// (hr).
4872/// * conc theophylline concentration in the sample (mg/L).
4873///
4874/// ## Details:
4875///
4876/// Boeckmann, Sheiner and Beal (1994) report data from a study by Dr.
4877/// Robert Upton of the kinetics of the anti-asthmatic drug
4878/// theophylline.  Twelve subjects were given oral doses of
4879/// theophylline then serum concentrations were measured at 11 time
4880/// points over the next 25 hours.
4881///
4882/// These data are analyzed in Davidian and Giltinan (1995) and
4883/// Pinheiro and Bates (2000) using a two-compartment open
4884/// pharmacokinetic model, for which a self-starting model function,
4885/// ‘SSfol’, is available.
4886///
4887/// This dataset was originally part of package ‘nlme’, and that has
4888/// methods (including for ‘[’, ‘as.data.frame’, ‘plot’ and ‘print’)
4889/// for its grouped-data classes.
4890///
4891/// ## Source:
4892///
4893/// Boeckmann, A. J., Sheiner, L. B. and Beal, S. L. (1994), _NONMEM
4894/// Users Guide: Part V_, NONMEM Project Group, University of
4895/// California, San Francisco.
4896///
4897/// Davidian, M. and Giltinan, D. M. (1995) _Nonlinear Models for
4898/// Repeated Measurement Data_, Chapman & Hall (section 5.5, p. 145
4899/// and section 6.6, p. 176)
4900///
4901/// Pinheiro, J. C. and Bates, D. M. (2000) _Mixed-effects Models in S
4902/// and S-PLUS_, Springer (Appendix A.29)
4903///
4904/// ## See Also:
4905///
4906/// ‘SSfol’
4907///
4908/// ## Examples:
4909///
4910/// ```r
4911/// require(stats); require(graphics)
4912///
4913/// coplot(conc ~ Time | Subject, data = Theoph, show.given = FALSE)
4914/// Theoph.4 <- subset(Theoph, Subject == 4)
4915/// fm1 <- nls(conc ~ SSfol(Dose, Time, lKe, lKa, lCl),
4916///  data = Theoph.4)
4917/// summary(fm1)
4918/// plot(conc ~ Time, data = Theoph.4,
4919///  xlab = "Time since drug administration (hr)",
4920///  ylab = "Theophylline concentration (mg/L)",
4921///  main = "Observed concentrations and fitted model",
4922///  sub  = "Theophylline data - Subject 4 only",
4923///  las = 1, col = 4)
4924/// xvals <- seq(0, par("usr")[2], length.out = 55)
4925/// lines(xvals, predict(fm1, newdata = list(Time = xvals)),
4926/// col = 4)
4927/// ```
4928pub fn theoph() -> PolarsResult<DataFrame> {
4929    CsvReader::new(Cursor::new(include_str!("Theoph.csv"))).finish()
4930}
4931
4932/// # Survival of passengers on the Titanic
4933///
4934/// ## Description:
4935///
4936/// This data set provides information on the fate of passengers on
4937/// the fatal maiden voyage of the ocean liner ‘Titanic’, summarized
4938/// according to economic status (class), sex, age and survival.
4939///
4940/// ## Usage:
4941///
4942/// Titanic
4943///
4944/// ## Format:
4945///
4946/// A 4-dimensional array resulting from cross-tabulating 2201
4947/// observations on 4 variables.  The variables and their levels are
4948/// as follows:
4949///
4950/// | No | Name     | Levels              |
4951/// |----|----------|---------------------|
4952/// | 1  | Class    | 1st, 2nd, 3rd, Crew |
4953/// | 2  | Sex      | Male, Female        |
4954/// | 3  | Age      | Child, Adult        |
4955/// | 4  | Survived | No, Yes             |
4956///
4957/// ## Details:
4958///
4959/// The sinking of the Titanic is a famous event, and new books are
4960/// still being published about it.  Many well-known facts-from the
4961/// proportions of first-class passengers to the ‘women and children
4962/// first’ policy, and the fact that that policy was not entirely
4963/// successful in saving the women and children in the third class-are
4964/// reflected in the survival rates for various classes of passenger.
4965///
4966/// These data were originally collected by the British Board of Trade
4967/// in their investigation of the sinking.  Note that there is not
4968/// complete agreement among primary sources as to the exact numbers
4969/// on board, rescued, or lost.
4970///
4971/// Due in particular to the very successful film ‘Titanic’, the last
4972/// years saw a rise in public interest in the Titanic.  Very detailed
4973/// data about the passengers is now available on the Internet, at
4974/// sites such as _Encyclopedia Titanica_
4975/// (<https://www.encyclopedia-titanica.org/>).
4976///
4977/// ## Source:
4978///
4979/// Dawson, Robert J. MacG. (1995), The ‘Unusual Episode’ Data
4980/// Revisited.  _Journal of Statistics Education_, *3*.
4981/// doi:10.1080/10691898.1995.11910499
4982/// <https://doi.org/10.1080/10691898.1995.11910499>.
4983///
4984/// The source provides a data set recording class, sex, age, and
4985/// survival status for each person on board of the Titanic, and is
4986/// based on data originally collected by the British Board of Trade
4987/// and reprinted in:
4988///
4989/// British Board of Trade (1990), _Report on the Loss of the
4990/// ‘Titanic’ (S.S.)_.  British Board of Trade Inquiry Report
4991/// (reprint).  Gloucester, UK: Allan Sutton Publishing.
4992///
4993/// ## Examples:
4994///
4995/// ```r
4996/// require(graphics)
4997/// mosaicplot(Titanic, main = "Survival on the Titanic")
4998/// ## Higher survival rates in children?
4999/// apply(Titanic, c(3, 4), sum)
5000/// ## Higher survival rates in females?
5001/// apply(Titanic, c(2, 4), sum)
5002/// ## Use loglm() in package 'MASS' for further analysis ...
5003/// ```
5004pub fn titanic() -> PolarsResult<DataFrame> {
5005    CsvReader::new(Cursor::new(include_str!("Titanic.csv"))).finish()
5006}
5007
5008/// # The Effect of Vitamin C on Tooth Growth in Guinea Pigs
5009///
5010/// ## Description:
5011///
5012/// The response is the length of odontoblasts (cells responsible for
5013/// tooth growth) in 60 guinea pigs.  Each animal received one of
5014/// three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of
5015/// two delivery methods, orange juice or ascorbic acid (a form of
5016/// vitamin C and coded as ‘VC’).
5017///
5018/// ## Usage:
5019///
5020/// ToothGrowth
5021///
5022/// ## Format:
5023///
5024/// A data frame with 60 observations on 3 variables.
5025///
5026/// * \[,1\]  lennumeric  Tooth length
5027/// * \[,2\]  supp  factorSupplement type (VC or OJ).
5028/// * \[,3\]  dose  numeric  Dose in milligrams/day
5029///
5030/// ## Source:
5031///
5032/// C. I. Bliss (1952).  _The Statistics of Bioassay_.  Academic
5033/// Press.
5034///
5035/// ## References:
5036///
5037/// McNeil, D. R. (1977).  _Interactive Data Analysis_.  New York:
5038/// Wiley.
5039///
5040/// Crampton, E. W. (1947).  The growth of the odontoblast of the
5041/// incisor teeth as a criterion of vitamin C intake of the guinea
5042/// pig.  _The Journal of Nutrition_, *33*(5), 491-504.
5043/// doi:10.1093/jn/33.5.491 <https://doi.org/10.1093/jn/33.5.491>.
5044///
5045/// ## Examples:
5046///
5047/// ```r
5048/// require(graphics)
5049/// coplot(len ~ dose | supp, data = ToothGrowth, panel = panel.smooth,
5050/// xlab = "ToothGrowth data: length vs dose, given type of supplement")
5051/// ```
5052pub fn tooth_growth() -> PolarsResult<DataFrame> {
5053    CsvReader::new(Cursor::new(include_str!("ToothGrowth.csv"))).finish()
5054}
5055
5056/// # Yearly Treering Data, -6000-1979
5057///
5058/// ## Description:
5059///
5060/// Contains normalized tree-ring widths in dimensionless units.
5061///
5062/// ## Usage:
5063///
5064/// treering
5065///
5066/// ## Format:
5067///
5068/// A univariate time series with 7981 observations. The object is of
5069/// class ‘"ts"’.
5070///
5071/// Each tree ring corresponds to one year.
5072///
5073/// ## Details:
5074///
5075/// The data were recorded by Donald A. Graybill, 1980, from Gt Basin
5076/// Bristlecone Pine 2805M, 3726-11810 in Methuselah Walk, California.
5077///
5078/// ## Source:
5079///
5080/// Time Series Data Library: <https://robjhyndman.com/TSDL/>, series
5081/// ‘CA535.DAT’
5082///
5083/// ## References:
5084///
5085/// For some photos of Methuselah Walk see
5086/// <https://web.archive.org/web/20110523225828/http://www.ltrr.arizona.edu/~hallman/sitephotos/meth.html>
5087pub fn tree_ring() -> PolarsResult<DataFrame> {
5088    CsvReader::new(Cursor::new(include_str!("treering.csv"))).finish()
5089}
5090
5091/// # Diameter, Height and Volume for Black Cherry Trees
5092///
5093/// ## Description:
5094///
5095/// This data set provides measurements of the diameter, height and
5096/// volume of timber in 31 felled black cherry trees.  Note that the
5097/// diameter (in inches) is erroneously labelled Girth in the data. It
5098/// is measured at 4 ft 6 in above the ground.
5099///
5100/// ## Usage:
5101///
5102/// trees
5103///
5104/// ## Format:
5105///
5106/// A data frame with 31 observations on 3 variables.
5107///
5108/// * ‘\[,1\]’  ‘Girth’numeric  Tree diameter (rather than girth, actually) in inches
5109/// * ‘\[,2\]’  ‘Height’  numeric  Height in ft
5110/// * ‘\[,3\]’  ‘Volume’  numeric  Volume of timber in cubic ft
5111///
5112/// ##  Source:
5113///
5114/// Ryan, T. A., Joiner, B. L. and Ryan, B. F. (1976) _The Minitab
5115/// Student Handbook_.  Duxbury Press.
5116///
5117/// ## References:
5118///
5119/// Atkinson, A. C. (1985) _Plots, Transformations and Regression_.
5120/// Oxford University Press.
5121///
5122/// ## Examples:
5123///
5124/// ```r
5125/// require(stats); require(graphics)
5126/// pairs(trees, panel = panel.smooth, main = "trees data")
5127/// plot(Volume ~ Girth, data = trees, log = "xy")
5128/// coplot(log(Volume) ~ log(Girth) | Height, data = trees,
5129/// panel = panel.smooth)
5130/// summary(fm1 <- lm(log(Volume) ~ log(Girth), data = trees))
5131/// summary(fm2 <- update(fm1, ~ . + log(Height), data = trees))
5132/// step(fm2)
5133/// ## i.e., Volume ~= c * Height * Girth^2  seems reasonable
5134/// ```
5135pub fn trees() -> PolarsResult<DataFrame> {
5136    CsvReader::new(Cursor::new(include_str!("trees.csv"))).finish()
5137}
5138
5139/// # Student Admissions at UC Berkeley
5140///
5141/// ## Description:
5142///
5143/// Aggregate data on applicants to graduate school at Berkeley for
5144/// the six largest departments in 1973 classified by admission and
5145/// sex.
5146///
5147/// ## Usage:
5148///
5149/// UCBAdmissions
5150///
5151/// ## Format:
5152///
5153/// A 3-dimensional array resulting from cross-tabulating 4526
5154/// observations on 3 variables.  The variables and their levels are
5155/// as follows:
5156///
5157/// | No | Name   | Levels             |
5158/// |----|--------|--------------------|
5159/// | 1  | Admit  | Admitted, Rejected |
5160/// | 2  | Gender | Male, Female       |
5161/// | 3  | Dept   | A, B, C, D, E, F   |
5162///
5163/// ## Details:
5164///
5165/// This data set is frequently used for illustrating Simpson's
5166/// paradox, see Bickel _et al_ (1975).  At issue is whether the data
5167/// show evidence of sex bias in admission practices.  There were 2691
5168/// male applicants, of whom 1198 (44.5%) were admitted, compared with
5169/// 1835 female applicants of whom 557 (30.4%) were admitted.  This
5170/// gives a sample odds ratio of 1.83, indicating that males were
5171/// almost twice as likely to be admitted.  In fact, graphical methods
5172/// (as in the example below) or log-linear modelling show that the
5173/// apparent association between admission and sex stems from
5174/// differences in the tendency of males and females to apply to the
5175/// individual departments (females used to apply _more_ to
5176/// departments with higher rejection rates).
5177///
5178/// This data set can also be used for illustrating methods for
5179/// graphical display of categorical data, such as the general-purpose
5180/// mosaicplot or the fourfoldplot for 2-by-2-by-k tables.
5181///
5182/// ## References:
5183///
5184/// Bickel, P. J., Hammel, E. A., and O'Connell, J. W. (1975).  Sex
5185/// bias in graduate admissions: Data from Berkeley.  _Science_,
5186/// *187*, 398-403.  doi:10.1126/science.187.4175.398
5187/// <https://doi.org/10.1126/science.187.4175.398>.
5188///
5189/// ## Examples:
5190///
5191/// ```r
5192/// require(graphics)
5193/// ## Data aggregated over departments
5194/// apply(UCBAdmissions, c(1, 2), sum)
5195/// mosaicplot(apply(UCBAdmissions, c(1, 2), sum),
5196///  main = "Student admissions at UC Berkeley")
5197/// ## Data for individual departments
5198/// opar <- par(mfrow = c(2, 3), oma = c(0, 0, 2, 0))
5199/// for(i in 1:6)
5200///  mosaicplot(UCBAdmissions[,,i],
5201/// xlab = "Admit", ylab = "Sex",
5202/// main = paste("Department", LETTERS[i]))
5203/// mtext(expression(bold("Student admissions at UC Berkeley")),
5204/// outer = TRUE, cex = 1.5)
5205/// par(opar)
5206/// ```
5207pub fn ucb_admissions() -> PolarsResult<DataFrame> {
5208    CsvReader::new(Cursor::new(include_str!("UCBAdmissions.csv"))).finish()
5209}
5210
5211/// # Road Casualties in Great Britain 1969-84
5212///
5213/// ## Description:
5214///
5215/// ‘UKDriverDeaths’ is a time series giving the monthly totals of car
5216/// drivers in Great Britain killed or seriously injured Jan 1969 to
5217/// Dec 1984.  Compulsory wearing of seat belts was introduced on 31
5218/// Jan 1983.
5219///
5220/// ‘Seatbelts’ is more information on the same problem.
5221///
5222/// ## Usage:
5223///
5224/// UKDriverDeaths
5225/// Seatbelts
5226///
5227/// ## Format:
5228///
5229/// * ‘Seatbelts’ is a multiple time series, with columns
5230/// * ‘DriversKilled’ car drivers killed.
5231/// * ‘drivers’ same as ‘UKDriverDeaths’.
5232/// * ‘front’ front-seat passengers killed or seriously injured.
5233/// * ‘rear’ rear-seat passengers killed or seriously injured.
5234/// * ‘kms’ distance driven.
5235/// * ‘PetrolPrice’ petrol price.
5236/// * ‘VanKilled’ number of van (‘light goods vehicle’) drivers.
5237/// * ‘law’ 0/1: was the law in effect that month?
5238///
5239/// ##  Source:
5240///
5241/// Harvey, A.C. (1989).  _Forecasting, Structural Time Series Models
5242/// and the Kalman Filter_.  Cambridge University Press, pp. 519-523.
5243///
5244/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
5245/// State Space Methods_.  Oxford University Press.
5246///
5247/// ## References:
5248///
5249/// Harvey, A. C. and Durbin, J. (1986).  The effects of seat belt
5250/// legislation on British road casualties: A case study in structural
5251/// time series modelling.  _Journal of the Royal Statistical Society_
5252/// series A, *149*, 187-227.  doi:10.2307/2981553
5253/// <https://doi.org/10.2307/2981553>.
5254///
5255/// ## Examples:
5256///
5257/// ```r
5258/// require(stats); require(graphics)
5259/// ## work with pre-seatbelt period to identify a model, use logs
5260/// work <- window(log10(UKDriverDeaths), end = 1982+11/12)
5261/// par(mfrow = c(3, 1))
5262/// plot(work); acf(work); pacf(work)
5263/// par(mfrow = c(1, 1))
5264/// (fit <- arima(work, c(1, 0, 0), seasonal = list(order = c(1, 0, 0))))
5265/// z <- predict(fit, n.ahead = 24)
5266/// ts.plot(log10(UKDriverDeaths), z$pred, z$pred+2*z$se, z$pred-2*z$se,
5267///  lty = c(1, 3, 2, 2), col = c("black", "red", "blue", "blue"))
5268///
5269/// ## now see the effect of the explanatory variables
5270/// X <- Seatbelts[, c("kms", "PetrolPrice", "law")]
5271/// X[, 1] <- log10(X[, 1]) - 4
5272/// arima(log10(Seatbelts[, "drivers"]), c(1, 0, 0),
5273/// seasonal = list(order = c(1, 0, 0)), xreg = X)
5274/// ```
5275pub fn uk_driver_deaths() -> PolarsResult<DataFrame> {
5276    CsvReader::new(Cursor::new(include_str!("UKDriverDeaths.csv"))).finish()
5277}
5278
5279/// # UK Quarterly Gas Consumption
5280///
5281/// ## Description:
5282///
5283/// Quarterly UK gas consumption from 1960Q1 to 1986Q4, in millions of
5284/// therms.
5285///
5286/// ## Usage:
5287///
5288/// UKgas
5289///
5290/// ## Format:
5291///
5292/// A quarterly time series of length 108.
5293///
5294/// ## Source:
5295///
5296/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
5297/// State Space Methods_.  Oxford University Press.
5298///
5299/// ## Examples:
5300///
5301/// ```r
5302/// ## maybe str(UKgas) ; plot(UKgas) ...
5303/// ```
5304pub fn uk_gas() -> PolarsResult<DataFrame> {
5305    CsvReader::new(Cursor::new(include_str!("UKgas.csv"))).finish()
5306}
5307
5308/// # Accidental Deaths in the US 1973-1978
5309///
5310/// ## Description:
5311///
5312/// A time series giving the monthly totals of accidental deaths in
5313/// the USA.  The values for the first six months of 1979 are 7798
5314/// 7406 8363 8460 9217 9316.
5315///
5316/// ## Usage:
5317///
5318/// USAccDeaths
5319///
5320/// ## Source:
5321///
5322/// P. J. Brockwell and R. A. Davis (1991) _Time Series: Theory and
5323/// Methods._ Springer, New York.
5324pub fn us_acc_deaths() -> PolarsResult<DataFrame> {
5325    CsvReader::new(Cursor::new(include_str!("USAccDeaths.csv"))).finish()
5326}
5327
5328/// # Violent Crime Rates by US State
5329///
5330/// ## Description:
5331///
5332/// This data set contains statistics, in arrests per 100,000
5333/// residents for assault, murder, and rape in each of the 50 US
5334/// states in 1973.  Also given is the percent of the population
5335/// living in urban areas.
5336///
5337/// ## Usage:
5338///
5339/// USArrests
5340///
5341/// ## Format:
5342///
5343/// A data frame with 50 observations on 4 variables.
5344///
5345/// * \[,1\]  Murder numeric  Murder arrests (per 100,000)
5346/// * \[,2\]  Assaultnumeric  Assault arrests (per 100,000)
5347/// * \[,3\]  UrbanPop  numeric  Percent urban population
5348/// * \[,4\]  Rapenumeric  Rape arrests (per 100,000)
5349///
5350/// ## Note:
5351///
5352/// ‘USArrests’ contains the data as in McNeil's monograph.  For the
5353/// ‘UrbanPop’ percentages, a review of the table (No. 21) in the
5354/// Statistical Abstracts 1975 reveals a transcription error for
5355/// Maryland (and that McNeil used the same “round to even” rule that
5356/// R's ‘round()’ uses), as found by Daniel S Coven (Arizona).
5357///
5358/// See the example below on how to correct the error and improve
5359/// accuracy for the ‘<n>.5’ percentages.
5360///
5361/// ## Source:
5362///
5363/// World Almanac and Book of facts 1975.  (Crime rates).
5364///
5365/// Statistical Abstracts of the United States 1975, p.20, (Urban
5366/// rates), possibly available as
5367/// <https://books.google.ch/books?id=zl9qAAAAMAAJ&pg=PA20>.
5368///
5369/// ## References:
5370///
5371/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
5372/// Wiley.
5373///
5374/// ## See Also:
5375///
5376/// The ‘state’ data sets.
5377///
5378/// ## Examples:
5379///
5380/// ```r
5381/// summary(USArrests)
5382///
5383/// require(graphics)
5384/// pairs(USArrests, panel = panel.smooth, main = "USArrests data")
5385///
5386/// ## Difference between 'USArrests' and its correction
5387/// USArrests["Maryland", "UrbanPop"] # 67 -- the transcription error
5388/// UA.C <- USArrests
5389/// UA.C["Maryland", "UrbanPop"] <- 76.6
5390///
5391/// ## also +/- 0.5 to restore the original  <n>.5  percentages
5392/// s5u <- c("Colorado", "Florida", "Mississippi", "Wyoming")
5393/// s5d <- c("Nebraska", "Pennsylvania")
5394/// UA.C[s5u, "UrbanPop"] <- UA.C[s5u, "UrbanPop"] + 0.5
5395/// UA.C[s5d, "UrbanPop"] <- UA.C[s5d, "UrbanPop"] - 0.5
5396///
5397/// ## ==> UA.C  is now a *C*orrected version of  USArrests
5398/// ```
5399pub fn us_arrests() -> PolarsResult<DataFrame> {
5400    CsvReader::new(Cursor::new(include_str!("USArrests.csv"))).finish()
5401}
5402
5403/// # Distances Between European Cities and Between US Cities
5404///
5405/// ## Description:
5406///
5407/// The ‘eurodist’ gives the road distances (in km) between 21 cities
5408/// in Europe.  The data are taken from a table in _The Cambridge
5409/// Encyclopaedia_.
5410///
5411/// ‘UScitiesD’ gives “straight line” distances between 10 cities in
5412/// the US.
5413///
5414/// ## Usage:
5415///
5416/// eurodist
5417/// UScitiesD
5418///
5419/// ## Format:
5420///
5421/// ‘dist’ objects based on 21 and 10 objects, respectively.  (You
5422/// must have the ‘stats’ package loaded to have the methods for this
5423/// kind of object available).
5424///
5425/// ## Source:
5426///
5427/// Crystal, D. Ed. (1990) _The Cambridge Encyclopaedia_.  Cambridge:
5428/// Cambridge University Press,
5429///
5430/// The US cities distances were provided by Pierre Legendre.
5431pub fn us_cities_d() -> PolarsResult<DataFrame> {
5432    CsvReader::new(Cursor::new(include_str!("UScitiesD.csv"))).finish()
5433}
5434
5435/// # Lawyers' Ratings of State Judges in the US Superior Court
5436///
5437/// ## Description:
5438///
5439/// Lawyers' ratings of state judges in the US Superior Court.
5440///
5441/// ## Usage:
5442///
5443/// USJudgeRatings
5444///
5445/// ## Format:
5446///
5447/// A data frame containing 43 observations on 12 numeric variables.
5448///
5449/// * \[,1\]  CONT  Number of contacts of lawyer with judge.
5450/// * \[,2\]  INTG  Judicial integrity.
5451/// * \[,3\]  DMNR  Demeanor.
5452/// * \[,4\]  DILG  Diligence.
5453/// * \[,5\]  CFMG  Case flow managing.
5454/// * \[,6\]  DECI  Prompt decisions.
5455/// * \[,7\]  PREP  Preparation for trial.
5456/// * \[,8\]  FAMI  Familiarity with law.
5457/// * \[,9\]  ORAL  Sound oral rulings.
5458/// * \[,10\]  WRIT  Sound written rulings.
5459/// * \[,11\]  PHYS  Physical ability.
5460/// * \[,12\]  RTEN  Worthy of retention.
5461///
5462/// ## Source:
5463///
5464/// New Haven Register, 14 January, 1977 (from John Hartigan).
5465///
5466/// ## Examples:
5467///
5468/// ```r
5469/// require(graphics)
5470/// pairs(USJudgeRatings, main = "USJudgeRatings data")
5471/// ```
5472pub fn us_judge_ratings() -> PolarsResult<DataFrame> {
5473    CsvReader::new(Cursor::new(include_str!("USJudgeRatings.csv"))).finish()
5474}
5475
5476/// # Personal Expenditure Data
5477///
5478/// ## Description:
5479///
5480/// This data set consists of United States personal expenditures (in
5481/// billions of dollars) in the categories; food and tobacco,
5482/// household operation, medical and health, personal care, and
5483/// private education for the years 1940, 1945, 1950, 1955 and 1960.
5484///
5485/// ## Usage:
5486///
5487/// USPersonalExpenditure
5488///
5489/// ## Format:
5490///
5491/// A matrix with 5 rows and 5 columns.
5492///
5493/// ## Source:
5494///
5495/// The World Almanac and Book of Facts, 1962, page 756.
5496///
5497/// ## References:
5498///
5499/// Tukey, J. W. (1977) _Exploratory Data Analysis_.  Addison-Wesley.
5500///
5501/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
5502///
5503/// ## Examples:
5504///
5505/// ```r
5506/// require(stats) # for medpolish
5507/// USPersonalExpenditure
5508/// medpolish(log10(USPersonalExpenditure))
5509/// ```
5510pub fn us_personal_expenditure() -> PolarsResult<DataFrame> {
5511    CsvReader::new(Cursor::new(include_str!("USPersonalExpenditure.csv"))).finish()
5512}
5513
5514/// # Populations Recorded by the US Census
5515///
5516/// ## Description:
5517///
5518/// This data set gives the population of the United States (in
5519/// millions) as recorded by the decennial census for the period
5520/// 1790-1970.
5521///
5522/// ## Usage:
5523///
5524/// uspop
5525///
5526/// ## Format:
5527///
5528/// A time series of 19 values.
5529///
5530/// ## Source:
5531///
5532/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
5533/// Wiley.
5534///
5535/// ## Examples:
5536///
5537/// ```r
5538/// require(graphics)
5539/// plot(uspop, log = "y", main = "uspop data", xlab = "Year",
5540///  ylab = "U.S. Population (millions)")
5541/// ```
5542pub fn us_pop() -> PolarsResult<DataFrame> {
5543    CsvReader::new(Cursor::new(include_str!("uspop.csv"))).finish()
5544}
5545
5546/// # Death Rates in Virginia (1940)
5547///
5548/// ## Description:
5549///
5550/// Death rates per 1000 in Virginia in 1940.
5551///
5552/// ## Usage:
5553///
5554/// VADeaths
5555///
5556/// ## Format:
5557///
5558/// A matrix with 5 rows and 4 columns.
5559///
5560/// ## Details:
5561///
5562/// The death rates are measured per 1000 population per year.  They
5563/// are cross-classified by age group (rows) and population group
5564/// (columns).  The age groups are: 50-54, 55-59, 60-64, 65-69, 70-74
5565/// and the population groups are Rural/Male, Rural/Female, Urban/Male
5566/// and Urban/Female.
5567///
5568/// This provides a rather nice 3-way analysis of variance example.
5569///
5570/// ## Source:
5571///
5572/// Molyneaux, L., Gilliam, S. K., and Florant, L. C.(1947)
5573/// Differences in Virginia death rates by color, sex, age, and rural
5574/// or urban residence.  _American Sociological Review_, *12*,
5575/// 525-535.
5576///
5577/// ## References:
5578///
5579/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
5580///
5581/// ## Examples:
5582///
5583/// ```r
5584/// require(stats); require(graphics)
5585/// n <- length(dr <- c(VADeaths))
5586/// nam <- names(VADeaths)
5587/// d.VAD <- data.frame(
5588/// Drate = dr,
5589/// age = rep(ordered(rownames(VADeaths)), length.out = n),
5590/// gender = gl(2, 5, n, labels = c("M", "F")),
5591/// site =  gl(2, 10, labels = c("rural", "urban")))
5592/// coplot(Drate ~ as.numeric(age) | gender * site, data = d.VAD,
5593/// panel = panel.smooth, xlab = "VADeaths data - Given: gender")
5594/// summary(aov.VAD <- aov(Drate ~ .^2, data = d.VAD))
5595/// opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0))
5596/// plot(aov.VAD)
5597/// par(opar)
5598/// ```
5599pub fn va_deaths() -> PolarsResult<DataFrame> {
5600    CsvReader::new(Cursor::new(include_str!("VADeaths.csv"))).finish()
5601}
5602
5603/// # Topographic Information on Auckland's Maunga Whau Volcano
5604///
5605/// ## Description:
5606///
5607/// Maunga Whau (Mt Eden) is one of about 50 volcanos in the Auckland
5608/// volcanic field.  This data set gives topographic information for
5609/// Maunga Whau on a 10m by 10m grid.
5610///
5611/// ## Usage:
5612///
5613/// volcano
5614///
5615/// ## Format:
5616///
5617/// A matrix with 87 rows and 61 columns, rows corresponding to grid
5618/// lines running east to west and columns to grid lines running south
5619/// to north.
5620///
5621/// ## Source:
5622///
5623/// Digitized from a topographic map by Ross Ihaka.  These data should
5624/// not be regarded as accurate.
5625///
5626/// ## See Also:
5627///
5628/// ‘filled.contour’ for a nice plot.
5629///
5630/// ## Examples:
5631///
5632/// ```r
5633/// require(grDevices); require(graphics)
5634/// filled.contour(volcano, color.palette = terrain.colors, asp = 1)
5635/// title(main = "volcano data: filled contour map")
5636/// ```
5637pub fn volcano() -> PolarsResult<DataFrame> {
5638    CsvReader::new(Cursor::new(include_str!("volcano.csv"))).finish()
5639}
5640
5641/// # The Number of Breaks in Yarn during Weaving
5642///
5643/// ## Description:
5644///
5645/// This data set gives the number of warp breaks per loom, where a
5646/// loom corresponds to a fixed length of yarn.
5647///
5648/// ## Usage:
5649///
5650/// warpbreaks
5651///
5652/// ## Format:
5653///
5654/// A data frame with 54 observations on 3 variables.
5655///
5656/// * ‘\[,1\]’  ‘breaks’numeric  The number of breaks
5657/// * ‘\[,2\]’  ‘wool’  factorThe type of wool (A or B)
5658/// * ‘\[,3\]’  ‘tension’  factorThe level of tension (L, M, H)
5659///
5660/// There are measurements on 9 looms for each of the six types of
5661/// warp (‘AL’, ‘AM’, ‘AH’, ‘BL’, ‘BM’, ‘BH’).
5662///
5663/// ## Source:
5664///
5665/// Tippett, L. H. C. (1950) _Technological Applications of
5666/// Statistics_.  Wiley.  Page 106.
5667///
5668/// ## References:
5669///
5670/// Tukey, J. W. (1977) _Exploratory Data Analysis_.  Addison-Wesley.
5671///
5672/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
5673///
5674/// ## See Also:
5675///
5676/// ‘xtabs’ for ways to display these data as a table.
5677///
5678/// ## Examples:
5679///
5680/// ```r
5681/// require(stats); require(graphics)
5682/// summary(warpbreaks)
5683/// opar <- par(mfrow = c(1, 2), oma = c(0, 0, 1.1, 0))
5684/// plot(breaks ~ tension, data = warpbreaks, col = "lightgray",
5685///  varwidth = TRUE, subset = wool == "A", main = "Wool A")
5686/// plot(breaks ~ tension, data = warpbreaks, col = "lightgray",
5687///  varwidth = TRUE, subset = wool == "B", main = "Wool B")
5688/// mtext("warpbreaks data", side = 3, outer = TRUE)
5689/// par(opar)
5690/// summary(fm1 <- lm(breaks ~ wool*tension, data = warpbreaks))
5691/// anova(fm1)
5692/// ```
5693pub fn warp_breaks() -> PolarsResult<DataFrame> {
5694    CsvReader::new(Cursor::new(include_str!("warpbreaks.csv"))).finish()
5695}
5696
5697/// # Average Heights and Weights for American Women
5698///
5699/// ## Description:
5700///
5701/// This data set gives the average heights and weights for American
5702/// women aged 30-39.
5703///
5704/// ## Usage:
5705///
5706/// women
5707///
5708/// ## Format:
5709///
5710/// A data frame with 15 observations on 2 variables.
5711///
5712/// * ‘\[,1\]’  ‘height’  numeric  Height (in)
5713/// * ‘\[,2\]’  ‘weight’  numeric  Weight (lbs)
5714///
5715/// ## Details:
5716///
5717/// The data set appears to have been taken from the American Society
5718/// of Actuaries _Build and Blood Pressure Study_ for some (unknown to
5719/// us) earlier year.
5720///
5721/// The World Almanac notes: “The figures represent weights in
5722/// ordinary indoor clothing and shoes, and heights with shoes”.
5723///
5724/// ## Source:
5725///
5726/// The World Almanac and Book of Facts, 1975.
5727///
5728/// ## References:
5729///
5730/// McNeil, D. R. (1977) _Interactive Data Analysis_.  Wiley.
5731///
5732/// ## Examples:
5733///
5734/// ```r
5735/// require(graphics)
5736/// plot(women, xlab = "Height (in)", ylab = "Weight (lb)",
5737///  main = "women data: American women aged 30-39")
5738/// ```
5739pub fn women() -> PolarsResult<DataFrame> {
5740    CsvReader::new(Cursor::new(include_str!("women.csv"))).finish()
5741}
5742
5743/// # The World's Telephones
5744///
5745/// ## Description:
5746///
5747/// The number of telephones in various regions of the world (in
5748/// thousands).
5749///
5750/// ## Usage:
5751///
5752/// WorldPhones
5753///
5754/// ## Format:
5755///
5756/// A matrix with 7 rows and 8 columns.  The columns of the matrix
5757/// give the figures for a given region, and the rows the figures for
5758/// a year.
5759///
5760/// The regions are: North America, Europe, Asia, South America,
5761/// Oceania, Africa, Central America.
5762///
5763/// The years are: 1951, 1956, 1957, 1958, 1959, 1960, 1961.
5764///
5765/// ## Source:
5766///
5767/// AT&T (1961) _The World's Telephones_.
5768///
5769/// ## References:
5770///
5771/// McNeil, D. R. (1977) _Interactive Data Analysis_.  New York:
5772/// Wiley.
5773///
5774/// ## Examples:
5775///
5776/// ```r
5777/// require(graphics)
5778/// matplot(rownames(WorldPhones), WorldPhones, type = "b", log = "y",
5779///  xlab = "Year", ylab = "Number of telephones (1000's)")
5780/// legend(1951.5, 80000, colnames(WorldPhones), col = 1:6, lty = 1:5,
5781/// pch = rep(21, 7))
5782/// title(main = "World phones data: log scale for response")
5783/// ```
5784pub fn world_phones() -> PolarsResult<DataFrame> {
5785    CsvReader::new(Cursor::new(include_str!("WorldPhones.csv"))).finish()
5786}
5787
5788/// # Internet Usage per Minute
5789///
5790/// ## Description:
5791///
5792/// A time series of the numbers of users connected to the Internet
5793/// through a server every minute.
5794///
5795/// ## Usage:
5796///
5797/// WWWusage
5798///
5799/// ## Format:
5800///
5801/// A time series of length 100.
5802///
5803/// ## Source:
5804///
5805/// Durbin, J. and Koopman, S. J. (2001).  _Time Series Analysis by
5806/// State Space Methods_.  Oxford University Press.
5807///
5808/// ## References:
5809///
5810/// Makridakis, S., Wheelwright, S. C. and Hyndman, R. J. (1998).
5811/// _Forecasting: Methods and Applications_.  Wiley.
5812///
5813/// ## Examples:
5814///
5815/// ```r
5816/// require(graphics)
5817/// work <- diff(WWWusage)
5818/// par(mfrow = c(2, 1)); plot(WWWusage); plot(work)
5819/// ## Not run:
5820///
5821/// require(stats)
5822/// aics <- matrix(, 6, 6, dimnames = list(p = 0:5, q = 0:5))
5823/// for(q in 1:5) aics[1, 1+q] <- arima(WWWusage, c(0, 1, q),
5824/// optim.control = list(maxit = 500))$aic
5825/// for(p in 1:5)
5826/// for(q in 0:5) aics[1+p, 1+q] <- arima(WWWusage, c(p, 1, q),
5827/// optim.control = list(maxit = 500))$aic
5828/// round(aics - min(aics, na.rm = TRUE), 2)
5829/// ## End(Not run)
5830/// ```
5831pub fn www_usage() -> PolarsResult<DataFrame> {
5832    CsvReader::new(Cursor::new(include_str!("WWWusage.csv"))).finish()
5833}
r2rs_datasets/data/mod.rs

r2rs_datasets/data/
mod.rs