r2rs_rfit/data/mod.rs
1use std::io::Cursor;
2
3use polars::prelude::*;
4
5/// # Baseball Card Data
6///
7/// ## Description:
8///
9/// These data come from the back-side of 59 baseball cards that
10/// Carrie had.
11///
12/// ## Usage:
13///
14/// data(baseball)
15///
16/// ## Format:
17///
18/// A data frame with 59 observations on the following 6 variables.
19///
20/// * ‘height’ Height in inches
21/// * ‘weight’ Weight in pounds
22/// * ‘bat’ a factor with levels ‘L’ ‘R’ ‘S’
23/// * ‘throw’ a factor with levels ‘L’ ‘R’
24/// * ‘field’ a factor with levels ‘0’ ‘1’
25/// * ‘average’ ERA if the player is a pitcher and his batting average
26/// if the player is a fielder
27///
28/// ## Source:
29///
30/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
31/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
32///
33/// ## Examples:
34///
35/// ```r
36/// data(baseball)
37/// wilcox.test(height~field,data=baseball)
38/// rfit(weight~height,data=baseball)
39/// ```
40pub fn baseball() -> PolarsResult<DataFrame> {
41 CsvReader::new(Cursor::new(include_str!("baseball.csv"))).finish()
42}
43
44/// # Baseball Salaries
45///
46/// ## Description:
47///
48/// Salaries of 176 professional baseball players for the 1987 season.
49///
50/// ## Usage:
51///
52/// data(bbsalaries)
53///
54/// ## Format:
55///
56/// A data frame with 176 observations on the following 8 variables.
57///
58/// * ‘logYears’ Log of the number of years experience
59/// * ‘aveWins’ Average wins per year
60/// * ‘aveLosses’ Average losses per year
61/// * ‘era’ Earned Run Average
62/// * ‘aveGames’ Average games pitched in per year
63/// * ‘aveInnings’ Average number of innings pitched per year
64/// * ‘aveSaves’ Average number of saves per year
65/// * ‘logSalary’ Log of the base salary in dollars
66///
67/// ## Source:
68///
69/// <http://lib.stat.cmu.edu/datasets/baseball.data>
70///
71/// ## References:
72///
73/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
74/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
75///
76/// ## Examples:
77///
78/// ```r
79/// data(bbsalaries)
80/// summary(rfit(logSalary~logYears+aveWins+aveLosses+era+aveGames+aveInnings+aveSaves,data=bbsalaries))
81/// ```
82pub fn bbsalaries() -> PolarsResult<DataFrame> {
83 CsvReader::new(Cursor::new(include_str!("bbsalaries.csv"))).finish()
84}
85
86/// # Box and Cox (1964) data.
87///
88/// ## Description:
89///
90/// The data are the results of a 3 * 4 two-way design, where
91/// forty-eight animals were exposed to three different poisons and
92/// four different treatments. The design is balanced with four
93/// replications per cell. The response was the log survival time of
94/// the animal.
95///
96/// ## Usage:
97///
98/// data(BoxCox)
99///
100/// ## Format:
101///
102/// A data frame with 48 observations on the following 3 variables.
103///
104/// * ‘logSurv’ log Survival Time
105/// * ‘Poison’ a factor indicating poison level
106/// * ‘Treatment’ a factor indicating treatment level
107///
108/// ## Source:
109///
110/// Box, G.E.P. and Cox, D.R. (1964), An analysis of transformations,
111/// _ Journal of the Royal Statistical Society, Series B,
112/// Methodological_, 26, 211-252.
113///
114/// ## References:
115///
116/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
117/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
118///
119/// ## Examples:
120///
121/// ```r
122/// data(BoxCox)
123/// with(BoxCox,interaction.plot(Treatment,Poison,logSurv,median))
124/// raov(logSurv~Poison+Treatment,data=BoxCox)
125/// ```
126pub fn boxcox() -> PolarsResult<DataFrame> {
127 CsvReader::new(Cursor::new(include_str!("BoxCox.csv"))).finish()
128}
129
130/// # Cardiovascular risk factors
131///
132/// ## Description:
133///
134/// Data from a study to investigate assocation between uric acid and
135/// various cardiovascular risk factors in developing countries
136/// (Heritier et. al. 2009). There are 474 men and 524 women aged
137/// 25-64.
138///
139/// ## Usage:
140///
141/// data(CardioRiskFactors)
142///
143/// ## Format:
144///
145/// A data frame with 998 observations on the following 14 variables.
146///
147/// * ‘age’ Age of subject
148/// * ‘bmi’ Body Mass Index
149/// * ‘waisthip’ waist/hip ratio(?)
150/// * ‘smok’ indicator for regular smoker
151/// * ‘choles’ total cholesterol
152/// * ‘trig’ triglycerides level in body fat
153/// * ‘hdl’ high-density lipoprotien(?)
154/// * ‘ldl’ low-density lipoprotein
155/// * ‘sys’ systolic blood pressure
156/// * ‘dia’ diastolic blood pressure(?)
157/// * ‘Uric’ serum uric
158/// * ‘sex’ indicator for male
159/// * ‘alco’ alcohol intake (mL/day)
160/// * ‘apoa’ apoprotein A
161///
162/// ## Details:
163///
164/// Data set and description taken from Heritier et. al. (2009) (c.f.
165/// Conen et. al. 2004).
166///
167/// ## Source:
168///
169/// Heritier, S., Cantoni, E., Copt, S., and Victoria-Feser, M.
170/// (2009), _Robust Methods in Biostatistics_, New York: John Wiley
171/// and Sons.
172///
173/// Conen, D., Wietlisbach, V., Bovet, P., Shamlaye, C., Riesen, W.,
174/// Paccaud, F., and Burnier, M. (2004), Prevalence of hyperuricemia
175/// and relation of serum uric acid with cardiovascular risk factors
176/// in a developing country. _BMC Public Health_.
177///
178/// ## Examples:
179///
180/// ```r
181/// data(CardioRiskFactors)
182/// fitF<-rfit(Uric~bmi+sys+choles+ldl+sex+smok+alco+apoa+trig+age,data=CardioRiskFactors)
183/// fitR<-rfit(Uric~bmi+sys+choles+ldl+sex,data=CardioRiskFactors)
184/// drop.test(fitF,fitR)
185/// summary(fitR)
186/// ```
187pub fn cardioriskfactors() -> PolarsResult<DataFrame> {
188 CsvReader::new(Cursor::new(include_str!("CardioRiskFactors.csv"))).finish()
189}
190
191/// # Free Fatty Acid Data
192///
193/// ## Description:
194///
195/// The response variable is level of free fatty acid in a sample of
196/// prepubescent boys. The explanatory variables are age (in months),
197/// weight (in lbs), and skin fold thickness.
198///
199/// ## Usage:
200///
201/// data(ffa)
202///
203/// ## Format:
204///
205/// A data frame with 41 rows and 4 columns.
206///
207/// * ‘age’ age in years
208/// * ‘weight’ weight in lbs
209/// * ‘skin’ skin fold thinkness
210/// * ‘ffa’ free fatty acid
211///
212/// ## Source:
213///
214/// Morrison, D.F. (1983), _Applied Linear Statistical Models_,
215/// Englewood Cliffs, NJ:Prentice Hall.
216///
217/// ## References:
218///
219/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
220/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
221///
222/// ## Examples:
223///
224/// ```r
225/// data(ffa)
226/// summary(rfit(ffa~age+weight+skin,data=ffa)) #using the default (Wilcoxon scores)
227/// summary(rfit(ffa~age+weight+skin,data=ffa,scores=bentscores1))
228/// ```
229pub fn ffa() -> PolarsResult<DataFrame> {
230 CsvReader::new(Cursor::new(include_str!("ffa.csv"))).finish()
231}
232
233/// # Quail Data
234///
235/// ## Description:
236///
237/// Thirty-nine quail were randomized to one of for treatments for
238/// lowering cholesterol.
239///
240/// ## Usage:
241///
242/// data(quail)
243///
244/// ## Format:
245///
246/// A data frame with 39 observations on the following 2 variables.
247///
248/// * ‘treat’ a factor with levels ‘1’ ‘2’ ‘3’ ‘4’
249/// * ‘ldl’ a numeric vector
250///
251/// ## Source:
252///
253/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
254/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
255///
256/// ## Examples:
257///
258/// ```r
259/// data(quail)
260/// boxplot(ldl~treat,data=quail)
261/// ```
262pub fn quail() -> PolarsResult<DataFrame> {
263 CsvReader::new(Cursor::new(include_str!("quail.csv"))).finish()
264}
265
266/// # Serum Level of luteinizing hormone (LH)
267///
268/// ## Description:
269///
270/// Hollander and Wolfe (1999) discuss a 2 by 5 factorial design for a
271/// study to determine the effect of light on the release of
272/// luteinizing hormone (LH). The factors in the design are: light
273/// regimes at two levels (constant light and 14 hours of light
274/// followed by 10 hours of darkness) and a luteinizing release factor
275/// (LRF) at 5 different dosage levels. The response is the level of
276/// luteinizing hormone (LH), nanograms per ml of serum in blood
277/// samples. Sixty rats were put on test under these 10 treatment
278/// combinations, six rats per combination.
279///
280/// ## Usage:
281///
282/// data(serumLH)
283///
284/// ## Format:
285///
286/// A data frame with 60 observations on the following 3 variables.
287///
288/// * ‘serum’ a numeric vector
289/// * ‘light.regime’ a factor with levels ‘Constant’ ‘Intermittent’
290/// * ‘LRF.dose’ a factor with levels ‘0’ ‘10’ ‘1250’ ‘250’ ‘50’
291///
292/// ## Source:
293///
294/// Hollander, M. and Wolfe, D.A. (1999), _Nonparametric Statistical
295/// Methods_, New York: Wiley.
296///
297/// ## References:
298///
299/// Hollander, M. and Wolfe, D.A. (1999), _Nonparametric Statistical
300/// Methods_, New York: Wiley.
301///
302/// ## Examples:
303///
304/// ```r
305/// data(serumLH)
306/// raov(serum~light.regime + LRF.dose + light.regime*LRF.dose, data = serumLH)
307/// ```
308pub fn serumlh() -> PolarsResult<DataFrame> {
309 CsvReader::new(Cursor::new(include_str!("serumLH.csv"))).finish()
310}
311
312/// # Telephone Data
313///
314/// ## Description:
315///
316/// The number of telephone calls (in tens of millions) made in
317/// Belgium from 1950-1973.
318///
319/// ## Usage:
320///
321/// data(telephone)
322///
323/// ## Format:
324///
325/// A data frame with 24 observations on the following 2 variables.
326///
327/// * ‘year’ years since 1950 AD
328/// * ‘calls’ number of telephone calls in tens of millions
329///
330/// ## Source:
331///
332/// Rousseeuw, P.J. and Leroy, A.M. (1987), _Robust Regression and
333/// Outlier Detection_, New York: Wiley.
334///
335/// ## References:
336///
337/// Hettmansperger, T.P. and McKean J.W. (2011), _Robust Nonparametric
338/// Statistical Methods, 2nd ed._, New York: Chapman-Hall.
339///
340/// ## Examples:
341///
342/// ```r
343/// data(telephone)
344/// plot(telephone)
345/// abline(rfit(calls~year,data=telephone))
346/// ```
347pub fn telephone() -> PolarsResult<DataFrame> {
348 CsvReader::new(Cursor::new(include_str!("telephone.csv"))).finish()
349}