Skip to main content

agnes/
view.rs

1/*!
2Main `DataView` struct and associated implementations.
3
4# Aggregation
5
6There are three types of data aggregation supported by `agnes`:
7* Data merging -- combining two `DataView` objects with the same number of records together,
8creating a new `DataView` with all the fields of the two source `DataView`s.
9* Data appending -- combining two `DataView` objects with the same fields, creating a new `DataView`
10object with all of the records of the two source `DataView`s.
11* Data joining -- combining two `DataView` objects using specified join, creating a new
12`DataView` object with a subset of records from the two source `DataView`s according to the join
13parameters.
14
15*/
16#[cfg(test)]
17use std::collections::VecDeque;
18use std::collections::{HashMap, HashSet};
19use std::fmt::{self, Debug, Display, Formatter};
20use std::hash::{Hash, Hasher};
21use std::marker::PhantomData;
22
23use prettytable as pt;
24#[cfg(feature = "serialize")]
25use serde::ser::{Serialize, SerializeMap, Serializer};
26
27use access::*;
28use cons::*;
29use error;
30use field::{FieldData, Value};
31use fieldlist::FieldPayloadCons;
32#[cfg(test)]
33use frame::StoreRefCount;
34use frame::{Framed, IntoFrame, IntoMeltFrame, IntoStrFrame};
35use join::*;
36use label::*;
37use partial::{DeriveCapabilities, Func, FuncDefault, Implemented, IsImplemented, PartialMap};
38use permute::{
39    FilterPerm, SortOrder, SortOrderComparator, SortOrderUnstable, SortOrderUnstableComparator,
40    UpdatePermutation,
41};
42use select::{FieldSelect, SelectFieldByLabel};
43use store::{IntoStore, IntoView};
44
45/// Cons-list of `DataFrame`s held by a `DataView. `FrameIndex` is simply an index used by
46/// `FrameLookupCons` to look up `DataFrame`s for a specified `Label`, and `Frame` is the type
47/// of the associated `DataFrame`.
48pub(crate) type ViewFrameCons<FrameIndex, Frame, Tail> = LVCons<FrameIndex, Frame, Tail>;
49
50/// Cons-list of field labels along with the details necessary to look up that label in a
51/// `DataView`'s `ViewFrameCons` cons-list of `DataFrame`s. The `FrameIndex` specifies the index
52/// of the `DataFrame` containing the field labeled `Label` in the `ViewFrameCons`, and the
53/// `FrameLabel` specifies the potentially-different (since `DataView` supports renaming fields)
54/// `Label` within that `DataFrame`.
55pub(crate) type FrameLookupCons<Label, FrameIndex, FrameLabel, Tail> =
56    LMCons<Label, FrameDetailMarkers<FrameIndex, FrameLabel>, Tail>;
57
58/// A `DataView` is a specific view of data stored inside a `DataStore`. It consists of a list of
59/// `DataFrame` objects, which themselves reference individual `DataStore`s.
60///
61/// The type parameter `Frames` is a `ViewFrameCons` cons-list which contains the `DataFrame`
62/// objects referenced by this `DataView`. The type parameter `Labels` is a `FrameLookupCons` which
63/// provides lookup functionality from a specific `Label` into the `Frames` cons-list.
64#[derive(Debug, Clone, Default)]
65pub struct DataView<Labels, Frames> {
66    pub(crate) _labels: PhantomData<Labels>,
67    pub(crate) frames: Frames,
68}
69
70/// Marker struct with the details of where to find a field's data. The `FrameIndex` specifies
71/// the index of the [DataFrame](../frame/struct.DataFrame.html) in a
72/// [DataView](struct.DataView.html)'s `Frames` cons-list. The `FrameLabel` denotes the label of
73/// the field within that frame.
74pub struct FrameDetailMarkers<FrameIndex, FrameLabel> {
75    _marker: PhantomData<(FrameIndex, FrameLabel)>,
76}
77/// A trait for providing the associated `FrameIndex` and `FrameLabel` types for a
78/// [FrameDetailMarkers](struct.FrameDetailMarkers.html) struct.
79pub trait FrameDetails {
80    /// The associated frame index.
81    type FrameIndex: Identifier;
82    /// The associated `Label` within the frame.
83    type FrameLabel: Label;
84}
85impl<FrameIndex, FrameLabel> FrameDetails for FrameDetailMarkers<FrameIndex, FrameLabel>
86where
87    FrameIndex: Identifier,
88    FrameLabel: Label,
89{
90    type FrameIndex = FrameIndex;
91    type FrameLabel = FrameLabel;
92}
93
94impl<Labels, Frames> DataView<Labels, Frames> {
95    /// Creates a new `DataView` with `frames`.
96    pub fn new(frames: Frames) -> DataView<Labels, Frames> {
97        DataView {
98            _labels: PhantomData,
99            frames,
100        }
101    }
102}
103
104impl<Labels, Frames> DataView<Labels, Frames> {
105    /// Field names in this data view
106    pub fn fieldnames<'a>(&'a self) -> Vec<&'a str>
107    where
108        Labels: StrLabels,
109    {
110        <Labels as StrLabels>::labels().into()
111    }
112}
113
114/// A trait for deriving the [LabelCons](../label/type.LabelCons.html) of field indices of a type.
115pub trait FrameIndexList {
116    /// The associated `LabelCons` for this type.
117    type LabelList;
118}
119
120impl FrameIndexList for Nil {
121    type LabelList = Nil;
122}
123
124impl<Label, FrameIndex, FrameLabel, Tail> FrameIndexList
125    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
126where
127    Tail: FrameIndexList,
128{
129    type LabelList = LCons<FrameIndex, <Tail as FrameIndexList>::LabelList>;
130}
131
132impl<Labels, Frames> DataView<Labels, Frames>
133where
134    Frames: Clone,
135{
136    /// Generate a new subview of this `DataView`. LabelList is a
137    /// [LabelCons](../label/type.LabelCons.html) list of labels, which can be generated using the
138    /// [Labels](../macro.Labels.html) macro.
139    pub fn v<LabelList>(&self) -> <Self as Subview<LabelList>>::Output
140    where
141        Self: Subview<LabelList>,
142    {
143        Subview::<LabelList>::subview(self)
144    }
145    /// Generate a new subview of this `DataView`. Equivalent to [v](struct.DataView.html#method.v).
146    pub fn subview<LabelList>(&self) -> <Self as Subview<LabelList>>::Output
147    where
148        Self: Subview<LabelList>,
149    {
150        Subview::<LabelList>::subview(self)
151    }
152}
153
154/// Trait for generating a subview of a [DataView](struct.DataView.html). `LabelList` is the fields
155/// to keep in the generated `DataView`.
156pub trait Subview<LabelList> {
157    /// Resulting subview `DataView` type.
158    type Output;
159
160    /// Generate a new subview of this `DataView`, resulting in a newly created `DataView` object
161    /// only containing the fields matching the labels in `LabelList`.
162    fn subview(&self) -> Self::Output;
163}
164
165impl<Labels, Frames, LabelList> Subview<LabelList> for DataView<Labels, Frames>
166where
167    Labels: FrameIndexList + HasLabels<LabelList> + LabelSubset<LabelList>,
168    <Labels as LabelSubset<LabelList>>::Output: Reorder<LabelList>,
169    Frames: Clone + SubsetClone<<Labels as FrameIndexList>::LabelList>,
170{
171    type Output = DataView<
172        <<Labels as LabelSubset<LabelList>>::Output as Reorder<LabelList>>::Output,
173        <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output,
174    >;
175
176    fn subview(&self) -> Self::Output {
177        DataView {
178            _labels: PhantomData,
179            frames: self.frames.subset_clone(),
180        }
181    }
182}
183
184impl<Labels, Frames> NRows for DataView<Labels, Frames>
185where
186    Frames: NRows,
187{
188    fn nrows(&self) -> usize {
189        self.frames.nrows()
190    }
191}
192
193impl<Labels, Frames> DataView<Labels, Frames>
194where
195    Self: NRows,
196{
197    /// Number of rows in this data view
198    pub fn nrows(&self) -> usize {
199        NRows::nrows(self)
200    }
201}
202
203impl<Labels, Frames> DataView<Labels, Frames>
204where
205    Labels: Len,
206    Frames: Len,
207{
208    /// Returns `true` if the DataView is empty (has no rows or has no fields)
209    pub fn is_empty(&self) -> bool {
210        length![Labels] == 0 || Frames::is_empty()
211    }
212}
213impl<Labels, Frames> DataView<Labels, Frames>
214where
215    Labels: Len,
216{
217    /// Number of fields in this data view
218    pub fn nfields(&self) -> usize {
219        length![Labels]
220    }
221}
222impl<Labels, Frames> DataView<Labels, Frames>
223where
224    Frames: Len,
225{
226    /// Number of frames this data view covers
227    pub fn nframes(&self) -> usize {
228        length![Frames]
229    }
230}
231
232#[cfg(test)]
233pub trait StoreRefCounts {
234    fn store_ref_counts(&self) -> VecDeque<usize>;
235}
236
237#[cfg(test)]
238impl StoreRefCounts for Nil {
239    fn store_ref_counts(&self) -> VecDeque<usize> {
240        VecDeque::new()
241    }
242}
243#[cfg(test)]
244impl<FrameIndex, Frame, Tail> StoreRefCounts for ViewFrameCons<FrameIndex, Frame, Tail>
245where
246    Frame: Valued,
247    ValueOf<Frame>: StoreRefCount,
248    Tail: StoreRefCounts,
249{
250    fn store_ref_counts(&self) -> VecDeque<usize> {
251        let mut previous = self.tail.store_ref_counts();
252        previous.push_front(self.head.value_ref().store_ref_count());
253        previous
254    }
255}
256
257#[cfg(test)]
258impl<Labels, Frames> DataView<Labels, Frames>
259where
260    Frames: StoreRefCounts,
261{
262    pub fn store_ref_counts(&self) -> VecDeque<usize> {
263        Frames::store_ref_counts(&self.frames)
264    }
265}
266
267/// A trait for finding the associated frame details (implementing
268/// [FrameDetails](trait.FrameDetails.html) -- frame index and label within that frame) for
269/// for specific label within this type.
270pub trait FindFrameDetails<Label>: LookupMarkedElemByLabel<Label> {
271    /// The associated frame details for this type.
272    type FrameDetails: FrameDetails;
273}
274impl<Labels, Label> FindFrameDetails<Label> for Labels
275where
276    Labels: LookupMarkedElemByLabel<Label>,
277    MarkerOfElemOf<Labels, Label>: FrameDetails,
278{
279    type FrameDetails = MarkerOfElemOf<Labels, Label>;
280}
281/// Type alias for the [FrameDetails](trait.FrameDetails.html)-implementing struct associated with
282/// the label `Label` in the label lookup list `Labels`.
283pub type FrameDetailsOf<Labels, Label> = <Labels as FindFrameDetails<Label>>::FrameDetails;
284/// Type alias for the `FrameIndex` of [FrameDetails](trait.FrameDetails.html)-implementing struct
285/// associated with the label `Label` in the label lookup list `Labels`.
286pub type FrameIndexOf<Labels, Label> =
287    <<Labels as FindFrameDetails<Label>>::FrameDetails as FrameDetails>::FrameIndex;
288/// Type alias for the `FrameLLabel` of [FrameDetails](trait.FrameDetails.html)-implementing struct
289/// associated with the label `Label` in the label lookup list `Labels`.
290pub type FrameLabelOf<Labels, Label> =
291    <<Labels as FindFrameDetails<Label>>::FrameDetails as FrameDetails>::FrameLabel;
292
293/// Marker trait for being able to find a frame of label `Label` within label lookup list `Labels`
294/// in this type
295pub trait FindFrame<Labels, Label>: LookupValuedElemByLabel<FrameIndexOf<Labels, Label>>
296where
297    Labels: FindFrameDetails<Label>,
298{
299}
300impl<Frames, Labels, Label> FindFrame<Labels, Label> for Frames
301where
302    Labels: FindFrameDetails<Label>,
303    Frames: LookupValuedElemByLabel<FrameIndexOf<Labels, Label>>,
304{
305}
306
307/// Type alias for the cons-list element within `Frames` associated with a `FrameIndex`.
308pub type FrameElemByFrameIndexOf<Frames, FrameIndex> =
309    <Frames as LookupValuedElemByLabel<FrameIndex>>::Elem;
310/// Type alias for the [DataFrame](../frame/struct.DataFrame.html) within `Frames` associated with
311/// a `FrameIndex`.
312pub type FrameByFrameIndexOf<Frames, FrameIndex> =
313    <FrameElemByFrameIndexOf<Frames, FrameIndex> as Valued>::Value;
314/// Type alias for the cons-list element within `Frames` associated with label `Label` in the label
315/// lookup list `Labels`.
316pub type FrameElemOf<Frames, Labels, Label> =
317    FrameElemByFrameIndexOf<Frames, FrameIndexOf<Labels, Label>>;
318/// Type alias for the [DataFrame](../frame/struct.DataFrame.html) within `Frames` associated
319/// with the label `Label` in the label lookup list `Labels`.
320pub type FrameOf<Frames, Labels, Label> = <FrameElemOf<Frames, Labels, Label> as Valued>::Value;
321
322/// Type alias for the field (implementing [DataIndex](../access/trait.DataIndex.html)) within the
323/// frames list `Frames` associated with the `FrameIndex` and `FrameLabel`.
324pub type FieldFromFrameDetailsOf<Frames, FrameIndex, FrameLabel> =
325    <FrameByFrameIndexOf<Frames, FrameIndex> as SelectFieldByLabel<FrameLabel>>::Output;
326
327/// Type alias for the data type of the field (implementing
328/// [DataIndex](../access/trait.DataIndex.html)) within the frames list `Frames` associated with
329/// the `FrameIndex` and `FrameLabel`.
330pub type FieldTypeFromFrameDetailsOf<Frames, FrameIndex, FrameLabel> =
331    <FrameByFrameIndexOf<Frames, FrameIndex> as SelectFieldByLabel<FrameLabel>>::DType;
332
333/// Type alias for the field (implementing [DataIndex](../access/trait.DataIndex.html)) within the
334/// frames list `Frames` associated with the label `Label` in the label lookup list `Labels`.
335pub type FieldOf<Frames, Labels, Label> =
336    <FrameOf<Frames, Labels, Label> as SelectFieldByLabel<FrameLabelOf<Labels, Label>>>::Output;
337/// Type alias for the data type of the field (implementing
338/// [DataIndex](../access/trait.DataIndex.html)) within the frames list `Frames` associated with
339/// the label `Label` in the label lookup list `Labels`.
340pub type FieldTypeOf<Frames, Labels, Label> =
341    <FrameOf<Frames, Labels, Label> as SelectFieldByLabel<FrameLabelOf<Labels, Label>>>::DType;
342
343/// Type alias for the field (implementing [DataIndex](../access/trait.DataIndex.html)) within
344/// the [DataView](struct.DataView.html) `View` associated with label `Label`.
345pub type VFieldOf<View, Label> = <View as SelectFieldByLabel<Label>>::Output;
346/// Type alias for the datta type of the field (implementing
347/// [DataIndex](../access/trait.DataIndex.html)) within the [DataView](struct.DataView.html) `View`
348/// associated with label `Label`.
349pub type VFieldTypeOf<View, Label> = <View as SelectFieldByLabel<Label>>::DType;
350
351/// Trait for selecting a field (implementing [DataIndex](../access/trait.DataIndex.html))
352/// associated with the label `Label` from the label lookup list `Labels` from a type.
353pub trait SelectFieldFromLabels<Labels, Label> {
354    /// Data type of field accessor
355    type DType;
356    /// Selected field accessor.
357    type Output: DataIndex<DType = Self::DType>;
358
359    /// Returns an accessor (implementing [DataIndex](../access/trait.DataIndex.html)) for the
360    /// selected field.
361    fn select_field(&self) -> Self::Output;
362}
363impl<Labels, Frames, Label> SelectFieldFromLabels<Labels, Label> for Frames
364where
365    Labels: FindFrameDetails<Label>,
366    Frames: FindFrame<Labels, Label>,
367    FrameOf<Frames, Labels, Label>: SelectFieldByLabel<FrameLabelOf<Labels, Label>>,
368    FieldOf<Frames, Labels, Label>: SelfValued + Clone,
369    FieldTypeOf<Frames, Labels, Label>: fmt::Debug,
370{
371    type DType = FieldTypeOf<Frames, Labels, Label>;
372    type Output = FieldOf<Frames, Labels, Label>;
373
374    fn select_field(&self) -> Self::Output {
375        SelectFieldByLabel::<FrameLabelOf<Labels, Label>>::select_field(
376            LookupValuedElemByLabel::<FrameIndexOf<Labels, Label>>::elem(self).value_ref(),
377        )
378        .clone()
379    }
380}
381
382impl<Labels, Frames, Label> SelectFieldByLabel<Label> for DataView<Labels, Frames>
383where
384    Frames: SelectFieldFromLabels<Labels, Label>,
385{
386    type DType = <Frames as SelectFieldFromLabels<Labels, Label>>::DType;
387    type Output = <Frames as SelectFieldFromLabels<Labels, Label>>::Output;
388
389    fn select_field(&self) -> Self::Output {
390        SelectFieldFromLabels::<Labels, Label>::select_field(&self.frames)
391    }
392}
393
394impl<Labels, Frames> FieldSelect for DataView<Labels, Frames> {}
395
396/// Type alias for the cons-list of fields implementing [DataIndex](../access/trait.DataIndex.html).
397pub type DataIndexCons<Label, DType, DI, Tail> = FieldPayloadCons<Label, DType, DI, Tail>;
398
399/// Trait for finding the associated [DataIndexCons](type.DataIndexCons.html) (cons-list of fields)
400/// in a type given labels in a labels list.
401pub trait AssocDataIndexCons<Labels> {
402    /// Type of associated data index cons-list.
403    type Output;
404    /// Returns the associated `DataIndexCons`.
405    fn assoc_data(&self) -> Self::Output;
406}
407impl<Frames> AssocDataIndexCons<Nil> for Frames {
408    type Output = Nil;
409    fn assoc_data(&self) -> Nil {
410        Nil
411    }
412}
413impl<Label, FrameIndex, FrameLabel, LookupTail, Frames>
414    AssocDataIndexCons<FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>> for Frames
415where
416    Self: SelectFieldFromLabels<FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>, Label>,
417    Self: AssocDataIndexCons<LookupTail>,
418    <Self as SelectFieldFromLabels<
419        FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>,
420        Label,
421    >>::Output: Typed,
422{
423    type Output = DataIndexCons<
424        Label,
425        TypeOf<
426            <Frames as SelectFieldFromLabels<
427                FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>,
428                Label,
429            >>::Output,
430        >,
431        <Frames as SelectFieldFromLabels<
432            FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>,
433            Label,
434        >>::Output,
435        <Frames as AssocDataIndexCons<LookupTail>>::Output,
436    >;
437    fn assoc_data(&self) -> Self::Output {
438        DataIndexCons {
439            head: TypedValue::from(SelectFieldFromLabels::<
440                FrameLookupCons<Label, FrameIndex, FrameLabel, LookupTail>,
441                Label,
442            >::select_field(self))
443            .into(),
444            tail: AssocDataIndexCons::<LookupTail>::assoc_data(self),
445        }
446    }
447}
448
449/// Type alias for finding the [DataIndexCons](type.DataIndexCons.html) within the frames `Frames`
450/// associated with labels `Labels`.
451pub type AssocDataIndexConsOf<Labels, Frames> = <Frames as AssocDataIndexCons<Labels>>::Output;
452
453const MAX_DISP_ROWS: usize = 1000;
454
455impl<Labels, Frames> Display for DataView<Labels, Frames>
456where
457    Frames: Len + NRows + AssocDataIndexCons<Labels>,
458    AssocDataIndexConsOf<Labels, Frames>: DeriveCapabilities<AddCellToRowFn>,
459    Labels: StrLabels,
460{
461    fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
462        if Frames::is_empty() {
463            return write!(f, "Empty DataView");
464        }
465        let mut table = pt::Table::new();
466
467        let nrows = self.nrows();
468        let mut func = AddCellToRowFn {
469            rows: vec![pt::row::Row::empty(); nrows.min(MAX_DISP_ROWS)],
470        };
471        self.frames.assoc_data().derive().map(&mut func);
472        for row in func.rows.drain(..) {
473            table.add_row(row);
474        }
475
476        table.set_titles(<Labels as StrLabels>::labels().into());
477        table.set_format(*pt::format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
478
479        Display::fmt(&table, f)
480    }
481}
482
483/// Function (implementing [Func](../partial/trait.Func.html)) that adds cells to
484/// `prettytable::row::Row`.
485pub struct AddCellToRowFn {
486    rows: Vec<pt::row::Row>,
487}
488impl<DType> Func<DType> for AddCellToRowFn
489where
490    for<'a> Value<&'a DType>: ToString,
491{
492    type Output = ();
493    fn call<DI>(&mut self, data: &DI) -> Self::Output
494    where
495        DI: DataIndex<DType = DType>,
496    {
497        debug_assert!(data.len() >= self.rows.len());
498        for i in 0..self.rows.len() {
499            self.rows[i].add_cell(cell!(data.get_datum(i).unwrap()));
500        }
501    }
502}
503impl FuncDefault for AddCellToRowFn {
504    type Output = ();
505    fn call(&mut self) -> Self::Output {
506        for i in 0..self.rows.len() {
507            self.rows[i].add_cell(cell!());
508        }
509    }
510}
511macro_rules! impl_addcell_is_impl {
512    ($($dtype:ty)*) => {$(
513        impl IsImplemented<AddCellToRowFn> for $dtype {
514            type IsImpl = Implemented;
515        }
516    )*}
517}
518impl_addcell_is_impl![String &str f64 f32 u64 u32 i64 i32 bool];
519
520impl<Labels, Frames> DataView<Labels, Frames> {
521    /// Construct a new `DataView` with the label `CurrLabel` relabeled with the label `NewLabel`.
522    pub fn relabel<CurrLabel, NewLabel>(
523        self,
524    ) -> DataView<<Labels as Relabel<CurrLabel, NewLabel>>::Output, Frames>
525    where
526        Labels: Relabel<CurrLabel, NewLabel>,
527    {
528        DataView {
529            _labels: PhantomData,
530            frames: self.frames,
531        }
532    }
533}
534
535/// Trait for relabeling the label `TargetLabel` with `NewLabel`.
536pub trait Relabel<TargetLabel, NewLabel> {
537    /// The output type after relabeling `TargetLabel` to `NewLabel`.
538    type Output;
539}
540
541impl<TargetLabel, NewLabel, Label, FrameIndex, FrameLabel, Tail> Relabel<TargetLabel, NewLabel>
542    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
543where
544    TargetLabel: LabelEq<Label>,
545    FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>:
546        RelabelMatch<TargetLabel, NewLabel, <TargetLabel as LabelEq<Label>>::Eq>,
547{
548    type Output = <FrameLookupCons<Label, FrameIndex, FrameLabel, Tail> as RelabelMatch<
549        TargetLabel,
550        NewLabel,
551        <TargetLabel as LabelEq<Label>>::Eq,
552    >>::Output;
553}
554
555/// Helper trait for relabeling. Used by [Relabel](trait.Relabel.html). `TargetLabel` is the label
556/// to change, `NewLabel` is the desired label to change to, and `Match` is whether or not
557/// `TargetLabel` matches the head label in this type.
558pub trait RelabelMatch<TargetLabel, NewLabel, Match> {
559    /// The output type after relabeling `TargetLabel` to `NewLabel`.
560    type Output;
561}
562// TargetLabel == Label, replace with NewLabel
563impl<TargetLabel, NewLabel, Label, FrameIndex, FrameLabel, Tail>
564    RelabelMatch<TargetLabel, NewLabel, True>
565    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
566{
567    type Output = FrameLookupCons<NewLabel, FrameIndex, FrameLabel, Tail>;
568}
569// TargetLabel != Label, recurse
570impl<TargetLabel, NewLabel, Label, FrameIndex, FrameLabel, Tail>
571    RelabelMatch<TargetLabel, NewLabel, False>
572    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
573where
574    Tail: Relabel<TargetLabel, NewLabel>,
575{
576    type Output = FrameLookupCons<
577        Label,
578        FrameIndex,
579        FrameLabel,
580        <Tail as Relabel<TargetLabel, NewLabel>>::Output,
581    >;
582}
583
584/// Trait for merging the data from two [DataView](struct.DataView.html)s into one new `DataView`.
585/// The two `DataView`s should have the same number of rows, and the resultant `DataView` is one
586/// with all the fields of both of the two original `DataView`s.
587///
588/// This trait does not consume the source `DataView`s: the resultant `DataView` should contain
589/// new references to the original field data.
590pub trait ViewMerge<Other> {
591    /// Resultant `DataView` type.
592    type Output;
593    /// Merge this `DataView` with another `DataView`. Can fail if the `DataView`s do not have the
594    /// same number of rows.
595    fn merge(&self, right: &Other) -> error::Result<Self::Output>;
596}
597impl<Labels, Frames, RLabels, RFrames> ViewMerge<DataView<RLabels, RFrames>>
598    for DataView<Labels, Frames>
599where
600    Self: Merge<RLabels, RFrames>,
601    RFrames: NRows,
602    Frames: NRows,
603    <Self as Merge<RLabels, RFrames>>::OutLabels: IsLabelSet<IsSet = True>,
604{
605    type Output = DataView<
606        <Self as Merge<RLabels, RFrames>>::OutLabels,
607        <Self as Merge<RLabels, RFrames>>::OutFrames,
608    >;
609
610    fn merge(&self, right: &DataView<RLabels, RFrames>) -> error::Result<Self::Output> {
611        if self.nrows() != right.nrows() {
612            return Err(error::AgnesError::DimensionMismatch(
613                "number of rows mismatch in merge".into(),
614            ));
615        }
616        Ok(Merge::merge(self, right))
617    }
618}
619
620impl<Labels, Frames> DataView<Labels, Frames> {
621    /// Merge this `DataView` with another `DataView` object, creating a new `DataView` with the
622    /// same number of rows and all the fields from both source `DataView` objects.
623    ///
624    /// Fails if the two `DataView`s have different number of rows.
625    pub fn merge<RLabels, RFrames>(
626        &self,
627        right: &DataView<RLabels, RFrames>,
628    ) -> error::Result<<Self as ViewMerge<DataView<RLabels, RFrames>>>::Output>
629    where
630        Self: ViewMerge<DataView<RLabels, RFrames>>,
631    {
632        ViewMerge::merge(self, right)
633    }
634}
635
636impl<Labels, Frames> DataView<Labels, Frames> {
637    /// Combine two `DataView` objects using specified join, creating a new `DataStore` object with
638    /// a subset of records from the two source `DataView`s according to the join parameters.
639    ///
640    /// Note that since this is creating a new `DataStore` object, it will be allocated new data to
641    /// store the contents of the joined `DataView`s.
642    pub fn join<Join, RLabels, RFrames>(
643        &self,
644        right: &DataView<RLabels, RFrames>,
645    ) -> <Self as SortMergeJoin<RLabels, RFrames, Join>>::Output
646    where
647        Self: SortMergeJoin<RLabels, RFrames, Join>,
648    {
649        SortMergeJoin::join(self, right)
650        // match join.predicate {
651        //     // TODO: implement hash join
652        //     // Predicate::Equal => {
653        //     //     hash_join(self, other, join)
654        //     // },
655        //     _ => {
656        //         sort_merge_join(self, other, join)
657        //     }
658        // }
659    }
660}
661
662impl<FrameIndex, Frame, Tail> UpdatePermutation for ViewFrameCons<FrameIndex, Frame, Tail>
663where
664    Frame: Valued<Value = Frame>,
665    ValueOf<Frame>: UpdatePermutation,
666    Tail: UpdatePermutation,
667{
668    fn update_permutation(mut self, order: &[usize]) -> Self {
669        self.head = Labeled::from(self.head.value().update_permutation(order));
670        self.tail = self.tail.update_permutation(order);
671        self
672    }
673}
674
675impl<Labels, Frames> DataView<Labels, Frames>
676where
677    Frames: UpdatePermutation,
678{
679    /// Sorts this `DataView` by the provided label. This sort is stable -- it preserves the
680    /// original order of equal elements. Consumes the `DataView` and returns a `DataView`
681    /// sorted by values from field identified by `Label` in ascending order, with missing (NA)
682    /// values at the beginning of the order (considered to be of 'lesser' value than existing
683    /// values).
684    pub fn sort_by_label<Label>(mut self) -> Self
685    where
686        Self: SelectFieldByLabel<Label>,
687        <Self as SelectFieldByLabel<Label>>::Output: SortOrder,
688    {
689        // find sort order for this field
690        let sorted = self.field::<Label>().sort_order();
691        // apply sort order to each frame
692        self.frames = self.frames.update_permutation(&sorted);
693        self
694    }
695
696    /// Sorts this `DataView` by the provided label. This sort is unstable -- it does not
697    /// necessarily preserve the original order of equal elements, but may be faster. Consumes the
698    /// `DataView` and returns a `DataView sorted by values from field identifier by `Label` in
699    /// ascending order, with missing (NA) values at the beginning of the order (considered to be of
700    /// 'lesser' value than existing values).
701    pub fn sort_unstable_by_label<Label>(mut self) -> Self
702    where
703        Self: SelectFieldByLabel<Label>,
704        <Self as SelectFieldByLabel<Label>>::Output: SortOrderUnstable,
705    {
706        // find sort order for this field
707        let sorted = self.field::<Label>().sort_order_unstable();
708        // apply sort order to each frame
709        self.frames = self.frames.update_permutation(&sorted);
710        self
711    }
712
713    /// Sorts this `DataView` by the provided label using a specific comparator. This sort is
714    /// stable -- it preserves the original order of equal elements. Consumes the `DataView` and
715    /// returns a `DataView sorted by values from field identifier by `Label` in ascending order,
716    /// with missing (NA) values at the beginning of the order (considered to be of 'lesser' value
717    /// than existing values).
718    pub fn sort_by_label_comparator<Label, F>(mut self, compare: F) -> Self
719    where
720        Self: SelectFieldByLabel<Label>,
721        <Self as SelectFieldByLabel<Label>>::Output: SortOrderComparator<F>,
722    {
723        // find sort order for this field
724        let sorted = self.field::<Label>().sort_order_by(compare);
725        // apply sort order to each frame
726        self.frames = self.frames.update_permutation(&sorted);
727        self
728    }
729
730    /// Sorts this `DataView` by the provided label using a specific comparator. This sort is
731    /// unstable -- it does not necessarily preserve the original order of equal elements, but may
732    /// be faster. Consumes the `DataView` and returns a `DataView sorted by values from field
733    /// identifier by `Label` in ascending order, with missing (NA) values at the beginning of the
734    /// order (considered to be of 'lesser' value than existing values).
735    pub fn sort_unstable_by_label_comparator<Label, F>(mut self, compare: F) -> Self
736    where
737        Self: SelectFieldByLabel<Label>,
738        <Self as SelectFieldByLabel<Label>>::Output: SortOrderUnstableComparator<F>,
739    {
740        // find sort order for this field
741        let sorted = self.field::<Label>().sort_order_unstable_by(compare);
742        // apply sort order to each frame
743        self.frames = self.frames.update_permutation(&sorted);
744        self
745    }
746
747    /// Filters this `DataView` by `predicate` (a function mapping from `Value<&T>` to `bool` where
748    /// `T` is the type of the field with label `Label`). Consumes this `DataView` and returns a new
749    /// `DataView` such that only those rows where values within the field with label `Label`
750    /// matching `predicate` remain.
751    pub fn filter<Label, P>(mut self, predicate: P) -> Self
752    where
753        Self: SelectFieldByLabel<Label>,
754        <Self as SelectFieldByLabel<Label>>::Output: FilterPerm<P>,
755    {
756        let perm = self.field::<Label>().filter_perm(predicate);
757        self.frames = self.frames.update_permutation(&perm);
758        self
759    }
760}
761
762/// Trait for finding a cons-list of fields (implementing
763/// [DataIndex](../access/trait.DataIndex.html)) from frames list `Frames` using the `LabelList`
764/// list of labels. `LabelList` should consist of labels that exist within `Self` (this trait is
765/// implemented by label lookup lists).
766pub trait FieldList<LabelList, Frames> {
767    /// Resultant cons-list of fields.
768    type Output;
769
770    /// Returns the cons-list of fields from the frames list `frames`.
771    fn field_list(frames: &Frames) -> Self::Output;
772}
773
774impl<LabelList, Frames> FieldList<LabelList, Frames> for Nil {
775    type Output = Nil;
776
777    fn field_list(_frames: &Frames) -> Nil {
778        Nil
779    }
780}
781
782impl<LabelList, Frames, Label, FrameIndex, FrameLabel, Tail> FieldList<LabelList, Frames>
783    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
784where
785    LabelList: Member<Label>,
786    Self: FieldListPred<LabelList, Frames, <LabelList as Member<Label>>::IsMember>,
787{
788    type Output =
789        <Self as FieldListPred<LabelList, Frames, <LabelList as Member<Label>>::IsMember>>::Output;
790
791    fn field_list(frames: &Frames) -> Self::Output {
792        Self::field_list_pred(frames)
793    }
794}
795
796/// Helper trait for ([FieldList](trait.FieldList.html)). `IsMember` is whether or not the head of
797/// `Self` is a member of the list `LabelList`.
798pub trait FieldListPred<LabelList, Frames, IsMember> {
799    /// The output field list.
800    type Output;
801
802    /// Returns the cons-list of fields from `frames`.
803    fn field_list_pred(frames: &Frames) -> Self::Output;
804}
805
806impl<LabelList, Frames, Label, FrameIndex, FrameLabel, Tail> FieldListPred<LabelList, Frames, True>
807    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
808where
809    Frames: SelectFieldFromLabels<Self, Label>,
810    Tail: FieldList<LabelList, Frames>,
811{
812    type Output = Cons<
813        <Frames as SelectFieldFromLabels<
814            FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>,
815            Label,
816        >>::Output,
817        <Tail as FieldList<LabelList, Frames>>::Output,
818    >;
819
820    fn field_list_pred(frames: &Frames) -> Self::Output {
821        Cons {
822            head: SelectFieldFromLabels::<Self, Label>::select_field(frames),
823            tail: Tail::field_list(frames),
824        }
825    }
826}
827
828impl<LabelList, Frames, Label, FrameIndex, FrameLabel, Tail> FieldListPred<LabelList, Frames, False>
829    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
830where
831    Tail: FieldList<LabelList, Frames>,
832{
833    type Output = <Tail as FieldList<LabelList, Frames>>::Output;
834
835    fn field_list_pred(frames: &Frames) -> Self::Output {
836        Tail::field_list(frames)
837    }
838}
839
840/// A struct representing a single record across the fields in the field list `Fields`.
841#[derive(Debug, Clone)]
842pub struct Record<'a, Fields> {
843    // a field cons-list (returned from FieldList trait method)
844    fields: &'a Fields,
845    idx: usize,
846}
847
848impl<'a, Fields> Record<'a, Fields> {
849    fn new(field_list: &'a Fields, idx: usize) -> Record<'a, Fields> {
850        Record {
851            fields: field_list,
852            idx,
853        }
854    }
855}
856
857/// Trait for computing the hash of a single index (record) within a list of data fields.
858pub trait HashIndex {
859    /// Compute the hash of the values within this list of data fields with the index `idx`,
860    /// updating the hash state.
861    fn hash_index<H>(&self, idx: usize, state: &mut H)
862    where
863        H: Hasher;
864}
865
866impl<T, DI> HashIndex for Framed<T, DI>
867where
868    for<'a> Value<&'a T>: Hash,
869    Self: DataIndex<DType = T>,
870{
871    fn hash_index<H>(&self, idx: usize, state: &mut H)
872    where
873        H: Hasher,
874    {
875        self.get_datum(idx).unwrap().hash(state);
876    }
877}
878
879impl HashIndex for Nil {
880    fn hash_index<H>(&self, _idx: usize, _state: &mut H)
881    where
882        H: Hasher,
883    {
884    }
885}
886
887impl<Head, Tail> HashIndex for Cons<Head, Tail>
888where
889    Head: HashIndex,
890    Tail: HashIndex,
891{
892    fn hash_index<H>(&self, idx: usize, state: &mut H)
893    where
894        H: Hasher,
895    {
896        self.head.hash_index(idx, state);
897        self.tail.hash_index(idx, state);
898    }
899}
900
901impl<'a, Fields> Hash for Record<'a, Fields>
902where
903    Fields: HashIndex,
904{
905    fn hash<H>(&self, state: &mut H)
906    where
907        H: Hasher,
908    {
909        self.fields.hash_index(self.idx, state)
910    }
911}
912
913/// Trait for computing equality of a single index (record) within a list of data fields.
914pub trait PartialEqIndex {
915    /// Returns equality of the values within this list of data fields with the index `idx`.
916    fn eq_index(&self, other: &Self, idx: usize) -> bool;
917}
918
919impl<T, DI> PartialEqIndex for Framed<T, DI>
920where
921    for<'a> Value<&'a T>: PartialEq,
922    Self: DataIndex<DType = T>,
923{
924    fn eq_index(&self, other: &Self, idx: usize) -> bool {
925        self.get_datum(idx)
926            .unwrap()
927            .eq(&other.get_datum(idx).unwrap())
928    }
929}
930
931impl PartialEqIndex for Nil {
932    fn eq_index(&self, _other: &Nil, _idx: usize) -> bool {
933        true
934    }
935}
936
937impl<Head, Tail> PartialEqIndex for Cons<Head, Tail>
938where
939    Head: PartialEqIndex,
940    Tail: PartialEqIndex,
941{
942    fn eq_index(&self, other: &Self, idx: usize) -> bool {
943        self.head.eq_index(&other.head, idx) && self.tail.eq_index(&other.tail, idx)
944    }
945}
946
947impl<'a, Fields> PartialEq for Record<'a, Fields>
948where
949    Fields: PartialEqIndex,
950{
951    fn eq(&self, other: &Self) -> bool {
952        self.fields.eq_index(other.fields, self.idx)
953    }
954}
955
956impl<'a, Fields> Eq for Record<'a, Fields> where Self: PartialEq {}
957
958impl<'a> Display for Record<'a, Nil> {
959    fn fmt(&self, _f: &mut Formatter) -> Result<(), fmt::Error> {
960        Ok(())
961    }
962}
963
964impl<'a, Head, Tail> Display for Record<'a, Cons<Head, Tail>>
965where
966    Head: DataIndex,
967    <Head as DataIndex>::DType: Display,
968    Record<'a, Tail>: Display,
969{
970    fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
971        write!(f, "{},", self.fields.head.get_datum(self.idx).unwrap())?;
972        Record {
973            fields: &self.fields.tail,
974            idx: self.idx,
975        }
976        .fmt(f)
977    }
978}
979
980impl<Labels, Frames> DataView<Labels, Frames> {
981    /// Returns a cons-list of fields (implementing [DataIndex](../access/trait.DataIndex.html))
982    /// that match the labels in `LabelList`.
983    pub fn field_list<LabelList>(&self) -> <Labels as FieldList<LabelList, Frames>>::Output
984    where
985        Labels: FieldList<LabelList, Frames>,
986    {
987        <Labels as FieldList<LabelList, Frames>>::field_list(&self.frames)
988    }
989
990    /// Computes the set of unique composite values among the fields in this `DataView` associated
991    /// with labels in `LabelList`. Returns the indices of exemplar rows, one index for each unique
992    /// value. Taken as a set, the values of the `LabelList`-labeled fields at the indices returned
993    /// by this method represent all the possible combinations of values of these fields that exist
994    /// in this `DataView`.
995    ///
996    /// Fields referenced by `LabelList` must implement `Hash`.
997    pub fn unique_indices<LabelList>(&self) -> Vec<usize>
998    where
999        Self: Unique<LabelList>,
1000    {
1001        Unique::<LabelList>::unique_indices(self)
1002    }
1003
1004    /// Computes the set of unique composite values among the fields in this `DataView` associated
1005    /// with labels in `LabelList`. Returns a new `DataView` with those specific sets of values. The
1006    /// returned `DataView` contains the values of the `LabelList`-labeled fields that represent
1007    /// all the possible combinations of values of these fields that exist in the original
1008    /// `DataView`.
1009    ///
1010    /// Fields referenced by `LabelList` must implement `Hash`.
1011    pub fn unique_values<LabelList>(&self) -> <Self as Unique<LabelList>>::Output
1012    where
1013        Self: Unique<LabelList>,
1014    {
1015        Unique::<LabelList>::unique_values(self)
1016    }
1017}
1018
1019/// Trait providing methods for finding the unique indices and values for a
1020/// [DataView](struct.DataView.html). See the intrinsic methods
1021/// [unique_indices](struct.DataView.html#method.unique_indices) and
1022/// [unique_values](struct.DataView.html#method.unique_values) for more details.
1023pub trait Unique<LabelList> {
1024    /// Output of the `unique_values` method.
1025    type Output;
1026    /// Compute the unique indices for fields with labels in `LabelList`. See the intrinsic method
1027    /// [unique_indices](struct.DataView.html#method.unique_indices) for more details.
1028    fn unique_indices(&self) -> Vec<usize>;
1029    /// Compute the unique values for fields with labels in `LabelList`. See the intrinsic method
1030    /// [unique_values](struct.DataView.html#method.unique_values) for more details.
1031    fn unique_values(&self) -> Self::Output;
1032}
1033
1034impl<Labels, Frames, LabelList> Unique<LabelList> for DataView<Labels, Frames>
1035where
1036    Labels: FieldList<LabelList, Frames>
1037        + HasLabels<LabelList>
1038        + LabelSubset<LabelList>
1039        + FrameIndexList,
1040    <Labels as FieldList<LabelList, Frames>>::Output: HashIndex + PartialEqIndex,
1041    <Labels as LabelSubset<LabelList>>::Output: Reorder<LabelList>,
1042    Frames: NRows + SubsetClone<<Labels as FrameIndexList>::LabelList>,
1043    <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output: UpdatePermutation,
1044{
1045    type Output = DataView<
1046        <<Labels as LabelSubset<LabelList>>::Output as Reorder<LabelList>>::Output,
1047        <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output,
1048    >;
1049
1050    fn unique_indices(&self) -> Vec<usize> {
1051        let fl = self.field_list::<LabelList>();
1052        let mut indices = vec![];
1053        let mut set = HashSet::new();
1054        for i in 0..self.nrows() {
1055            let record = Record::new(&fl, i);
1056            if !set.contains(&record) {
1057                set.insert(record);
1058                indices.push(i);
1059            }
1060        }
1061        indices
1062    }
1063
1064    fn unique_values(&self) -> Self::Output {
1065        let indices = self.unique_indices::<LabelList>();
1066        let new_frames = self.frames.subset_clone().update_permutation(&indices);
1067        DataView {
1068            _labels: PhantomData,
1069            frames: new_frames,
1070        }
1071    }
1072}
1073
1074#[cfg(feature = "serialize")]
1075impl<Labels, Frames> Serialize for DataView<Labels, Frames>
1076where
1077    Labels: Len + SerializeViewField<Frames>,
1078{
1079    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1080    where
1081        S: Serializer,
1082    {
1083        let map = serializer.serialize_map(Some(self.nfields()))?;
1084        Labels::serialize_view_field(&self.frames, map)
1085    }
1086}
1087
1088/// Trait for serializing a single field in a view. Used for serializing a
1089/// [DataView](struct.DataView.html).
1090#[cfg(feature = "serialize")]
1091pub trait SerializeViewField<Frames> {
1092    /// Serialize this single field using data from `frames`, and adding to map `SerializeMap`.
1093    fn serialize_view_field<M>(frames: &Frames, map: M) -> Result<M::Ok, M::Error>
1094    where
1095        M: SerializeMap;
1096}
1097
1098#[cfg(feature = "serialize")]
1099impl<Frames> SerializeViewField<Frames> for Nil {
1100    fn serialize_view_field<M>(_frames: &Frames, map: M) -> Result<M::Ok, M::Error>
1101    where
1102        M: SerializeMap,
1103    {
1104        map.end()
1105    }
1106}
1107
1108#[cfg(feature = "serialize")]
1109impl<Frames, Label, FrameIndex, FrameLabel, Tail> SerializeViewField<Frames>
1110    for FrameLookupCons<Label, FrameIndex, FrameLabel, Tail>
1111where
1112    Frames: SelectFieldFromLabels<Self, Label>,
1113    <Frames as SelectFieldFromLabels<Self, Label>>::Output: Serialize,
1114    Label: LabelName,
1115    Tail: SerializeViewField<Frames>,
1116{
1117    fn serialize_view_field<M>(frames: &Frames, mut map: M) -> Result<M::Ok, M::Error>
1118    where
1119        M: SerializeMap,
1120    {
1121        map.serialize_entry(
1122            Label::name(),
1123            &SelectFieldFromLabels::<Self, Label>::select_field(frames),
1124        )?;
1125        Tail::serialize_view_field(frames, map)
1126    }
1127}
1128
1129impl<Labels, Frames> DataView<Labels, Frames> {
1130    /// Creates a new a `DataView` that accesses source data in a different way, viewing the data
1131    /// as a series of identifier / value pairs instead of a having values in multiple
1132    /// related fields.
1133    ///
1134    /// This is useful when converting a data table in a wide format where several fields represent
1135    /// different instances of some quantity to a long format where each record only has one
1136    /// instance of the appropriate value.
1137    ///
1138    /// The type parameter `MeltLabels` is a [LabelCons](../label/type.LabelCons.html) list of the
1139    /// labels of the fields containing the values to 'melt'. `NameLabel` is the desired label for
1140    /// the new identifier field, which will contain the `String` identifiers for where a record's
1141    /// value originally came from. `ValueLabel` is the desired label for the new value field, which
1142    /// will contain the values associated with each of the corresponding `String` identifiers.
1143    /// `HoldLabels` should be left for the compiler to infer using `_` -- it specifies the
1144    /// remaining fields that are not affected by this method.
1145    ///
1146    /// Since the values from the fields denoted in `MeltLabels` will all be combined into one field
1147    /// they must be the same data type.
1148    ///
1149    /// The resultant `DataView` will be have the following field order: all the fields with labels
1150    /// in `HoldLabels`, the `NameLabel` field, then the `ValueLabel` field.
1151    ///
1152    /// # Example
1153    /// Let us consider a table of employee salaries with the tablespace:
1154    /// ```
1155    /// # #[macro_use] extern crate agnes;
1156    /// tablespace![
1157    ///     table salary {
1158    ///         EmpId: u64,
1159    ///         Year2010: f64,
1160    ///         Year2011: f64,
1161    ///         Year2012: f64,
1162    ///         Year2013: f64,
1163    ///         Year2014: f64,
1164    ///     }
1165    /// ];
1166    /// ```
1167    /// which, when first loaded from the source file, looks like this:
1168    /// ```text
1169    ///  EmpId | Year2010 | Year2011 | Year2012 | Year2013 | Year2014
1170    /// -------+----------+----------+----------+----------+----------
1171    ///  0     | 1500     | 1600     | 1700     | 1850     | 2000
1172    ///  1     | 900      | 920      | 940      | 940      | 970
1173    ///  2     | 600      | 800      | 900      | 1020     | 1100
1174    /// ```
1175    /// While this is a valid way to store and present this data, there are definitely cases where
1176    /// you might want to have the different years separated into different records instead of
1177    /// having a column for each year. That's what `melt` is for!
1178    ///
1179    /// For the first step, we need to create new labels for `melt`'s `NameLabel` and `ValueLabel`
1180    /// type arguments. The `NameLabel` will be filled in with `String` identifiers for the field
1181    /// a data point came from, and the `ValueLabel` will be filled with the data values themselves.
1182    /// We can add these two labels to our previous `tablespace` call.
1183    ///
1184    /// Next, after we load the original data, we call `melt`:
1185    /// ```
1186    /// # #[macro_use] extern crate agnes;
1187    /// tablespace![
1188    ///     table salary {
1189    ///         EmpId: u64,
1190    ///         Year2010: f64,
1191    ///         Year2011: f64,
1192    ///         Year2012: f64,
1193    ///         Year2013: f64,
1194    ///         Year2014: f64,
1195    ///         SalaryYear: String,
1196    ///         Salary: f64,
1197    ///     }
1198    /// ];
1199    /// #
1200    /// # use salary::*;
1201    /// # use agnes::{store, cons::Nil};
1202    /// #
1203    /// fn main() {
1204    /// #     let orig_table = store::DataStore::<Nil>::empty()
1205    /// #         .push_back_cloned_from_iter::<EmpId, _, _, _>(&[0u64, 1u64, 2u64])
1206    /// #         .push_back_cloned_from_iter::<Year2010, _, _, _>(&[1500.0, 900.0, 600.0])
1207    /// #         .push_back_cloned_from_iter::<Year2011, _, _, _>(&[1600.0, 920.0, 800.0])
1208    /// #         .push_back_cloned_from_iter::<Year2012, _, _, _>(&[1700.0, 940.0, 900.0])
1209    /// #         .push_back_cloned_from_iter::<Year2013, _, _, _>(&[1850.0, 940.0, 1020.0])
1210    /// #         .push_back_cloned_from_iter::<Year2014, _, _, _>(&[2000.0, 970.0, 1100.0])
1211    /// #         .into_view();
1212    ///     // <load data into DataView orig_table>
1213    ///     // quick check to make sure we loaded the right table: with 3 rows, 6 fields
1214    ///     assert_eq!((orig_table.nrows(), orig_table.nfields()), (3, 6));
1215    ///
1216    ///     let melted_table = orig_table.melt::<
1217    ///         Labels![Year2010, Year2011, Year2012, Year2013, Year2014],
1218    ///         SalaryYear,
1219    ///         Salary,
1220    ///         _,
1221    ///     >();
1222    ///
1223    ///     // melted table should have 15 rows -- 5 for each of our 3 employees -- and 3 fields
1224    ///     assert_eq!((melted_table.nrows(), melted_table.nfields()), (15, 3));
1225    ///     assert_eq!(melted_table.fieldnames(), vec!["EmpId", "SalaryYear", "Salary"]);
1226    ///     println!("{}", melted_table);
1227    /// }
1228    /// ```
1229    /// This call to `melt` transforms the year fields into two new fields: one which contains the
1230    /// salary year (text) and has the label SalaryYear, and one which contains the salary values
1231    /// (floating-point) with the label Salary.
1232    ///
1233    /// The first type argument is the list of year labels we want to melt, the second is the
1234    /// new label for the year specifier field, the third is the new label for the year value field,
1235    /// and we let the compiler compute the list of labels we aren't melting (in this case, the
1236    /// EmpId field).
1237    ///
1238    /// As a result we should have a table with 15 rows, five for each of our three employees, and
1239    /// three fields: `EmpId`, `SalaryYear`, and `Salary`. This code should output:
1240    /// ```text
1241    ///  EmpId | SalaryYear | Salary
1242    /// -------+------------+--------
1243    ///  0     | Year2010   | 1500
1244    ///  0     | Year2011   | 1600
1245    ///  0     | Year2012   | 1700
1246    ///  0     | Year2013   | 1850
1247    ///  0     | Year2014   | 2000
1248    ///  1     | Year2010   | 900
1249    ///  1     | Year2011   | 920
1250    ///  1     | Year2012   | 940
1251    ///  1     | Year2013   | 940
1252    ///  1     | Year2014   | 970
1253    ///  2     | Year2010   | 600
1254    ///  2     | Year2011   | 800
1255    ///  2     | Year2012   | 900
1256    ///  2     | Year2013   | 1020
1257    ///  2     | Year2014   | 1100
1258    /// ```
1259    pub fn melt<MeltLabels, NameLabel, ValueLabel, HoldLabels>(
1260        &self,
1261    ) -> <Self as Melt<MeltLabels, NameLabel, ValueLabel, HoldLabels>>::Output
1262    where
1263        Self: Melt<MeltLabels, NameLabel, ValueLabel, HoldLabels>,
1264    {
1265        Melt::<MeltLabels, NameLabel, ValueLabel, HoldLabels>::melt(self)
1266    }
1267}
1268
1269/// Trait providing the `melt` method for converting wide-format tables into long-format tables.
1270/// See the intrinsic method [melt](struct.DataView.html#method.melt) for more details.
1271pub trait Melt<MeltLabels, NameLabel, ValueLabel, HoldLabels> {
1272    /// Type produced by this melt method.
1273    type Output;
1274
1275    /// Perform the 'melt' operation. See the intrinsic method
1276    /// [melt](struct.DataView.html#method.melt) for more details.
1277    fn melt(&self) -> Self::Output;
1278}
1279
1280// type aliases to hopefully help with readability of Melt trait bounds.
1281type AsView<Orig> = <Orig as IntoView>::Output;
1282type AsFrame<Orig> = <Orig as IntoFrame>::Output;
1283type AsMeltFrame<Orig, ValueLabel> = <Orig as IntoMeltFrame<ValueLabel>>::Output;
1284type WithFrame<Orig, Added> = <Orig as AddFrame<Added>>::Output;
1285
1286impl<Frames, Labels, MeltLabels, NameLabel, ValueLabel, HoldLabels>
1287    Melt<MeltLabels, NameLabel, ValueLabel, HoldLabels> for DataView<Labels, Frames>
1288where
1289    Frames: NRows + Clone,
1290    NameLabel: Debug,
1291    Labels: SetDiff<MeltLabels, Set = HoldLabels>,
1292    MeltLabels: Len + IntoStrFrame<NameLabel>,
1293    <MeltLabels as IntoStrFrame<NameLabel>>::Output: IntoView,
1294    Self: Subview<HoldLabels>,
1295    <Self as Subview<HoldLabels>>::Output: IntoFrame,
1296    <<Self as Subview<HoldLabels>>::Output as IntoFrame>::Output: UpdatePermutation,
1297    AsView<<MeltLabels as IntoStrFrame<NameLabel>>::Output>:
1298        AddFrame<<<Self as Subview<HoldLabels>>::Output as IntoFrame>::Output>,
1299    Self: Subview<MeltLabels>,
1300    <Self as Subview<MeltLabels>>::Output: IntoMeltFrame<ValueLabel>,
1301    WithFrame<
1302        AsView<<MeltLabels as IntoStrFrame<NameLabel>>::Output>,
1303        AsFrame<<Self as Subview<HoldLabels>>::Output>,
1304    >: AddFrame<AsMeltFrame<<Self as Subview<MeltLabels>>::Output, ValueLabel>>,
1305    HoldLabels: AssocLabels,
1306    <HoldLabels as AssocLabels>::Labels: Append<Labels![NameLabel, ValueLabel]>,
1307    WithFrame<
1308        WithFrame<
1309            AsView<<MeltLabels as IntoStrFrame<NameLabel>>::Output>,
1310            AsFrame<<Self as Subview<HoldLabels>>::Output>,
1311        >,
1312        AsMeltFrame<<Self as Subview<MeltLabels>>::Output, ValueLabel>,
1313    >: Subview<
1314        <<HoldLabels as AssocLabels>::Labels as Append<Labels![NameLabel, ValueLabel]>>::Appended,
1315    >,
1316{
1317    type Output = <WithFrame<
1318        WithFrame<
1319            AsView<<MeltLabels as IntoStrFrame<NameLabel>>::Output>,
1320            AsFrame<<Self as Subview<HoldLabels>>::Output>,
1321        >,
1322        AsMeltFrame<<Self as Subview<MeltLabels>>::Output, ValueLabel>,
1323    > as Subview<
1324        <<HoldLabels as AssocLabels>::Labels as Append<Labels![NameLabel, ValueLabel]>>::Appended,
1325    >>::Output;
1326
1327    fn melt(&self) -> Self::Output {
1328        let premelt_nrows = self.nrows();
1329        let melt_len = MeltLabels::len();
1330
1331        // create a new FieldData<String> with the label names from MeltLabels, and convert it into
1332        // a DataStore. Build a DataFrame around it with an index permutation that repeats the whole
1333        // list `premelt_nrows` times (e.g. [0,1,2,3,0,1,2,3,0,1,2,3,...,0,1,2,3])
1334        let melt_label_view = MeltLabels::into_repeated_str_frame(premelt_nrows).into_view();
1335
1336        // create new frame based on the hold labels, with an index permutation that repeats
1337        // every element `melt_len` times
1338        // (e.g. [0,0,0,0,1,1,1,1,...,nrows-1,nrows-1,nrows-1,nrows-1])
1339        let hold_frame = Subview::<HoldLabels>::subview(self).into_frame();
1340        let mut hold_permutation = Vec::with_capacity(melt_len * premelt_nrows);
1341        for i in 0..premelt_nrows {
1342            for _ in 0..melt_len {
1343                hold_permutation.push(i);
1344            }
1345        }
1346        let hold_frame = hold_frame.update_permutation(&hold_permutation);
1347        let label_hold_dv = melt_label_view.add_frame(hold_frame);
1348
1349        // create a new frame based on the MeltLabels as a LabelSpan-based frame (switches the
1350        // store field it draws from for each index)
1351        let melt_frame =
1352            IntoMeltFrame::<ValueLabel>::into_melt_frame(Subview::<MeltLabels>::subview(self));
1353        let final_dv = label_hold_dv.add_frame(melt_frame);
1354        // call subview to reorder fields properly
1355        final_dv.subview()
1356    }
1357}
1358
1359impl<Labels, Frames> DataView<Labels, Frames> {
1360    /// Creates a new `DataView` that aggregates values in the `ValueLabel` field, grouping by
1361    /// records in the `KeyLabels` set of fields, and storing the result in a new field with
1362    /// label `AggLabel`. The resulting `DataView` will contain the `KeyLabels` fields and the
1363    /// newly constructed `AggLabel` field.
1364    ///
1365    /// For each unique set of key values in `KeyLabels`, this method will find all the records
1366    /// in the `DataView` which match, initialize an accumulator value with the argument `init`,
1367    /// and call `AggFunc` for each of the values in the `ValueLabel` field. `AggFunc` takes a
1368    /// mutable `AggType` value which it updates with the
1369    /// [Value](../field/enum.Value.html)s of type `DType` from the `ValueLabel` field.
1370    ///
1371    /// # Example
1372    /// Let's start with the data table which contains three fields: an employee ID `EmpId`, an
1373    /// annual salary `Salary`, and a text field denoting which year this salary took place:
1374    /// `SalaryYear`. This table (which is the final result of the example for the
1375    /// [melt](struct.DataView.html#method.melt) documentation) can be represented with the
1376    /// tablespace:
1377    /// ```
1378    /// # #[macro_use] extern crate agnes;
1379    /// tablespace![
1380    ///     table salary {
1381    ///         EmpId: u64,
1382    ///         SalaryYear: String,
1383    ///         Salary: f64,
1384    ///     }
1385    /// ];
1386    /// ```
1387    /// and data:
1388    /// ```text
1389    ///  EmpId | SalaryYear | Salary
1390    /// -------+------------+--------
1391    ///  0     | Year2010   | 1500
1392    ///  0     | Year2011   | 1600
1393    ///  0     | Year2012   | 1700
1394    ///  0     | Year2013   | 1850
1395    ///  0     | Year2014   | 2000
1396    ///  1     | Year2010   | 900
1397    ///  1     | Year2011   | 920
1398    ///  1     | Year2012   | 940
1399    ///  1     | Year2013   | 940
1400    ///  1     | Year2014   | 970
1401    ///  2     | Year2010   | 600
1402    ///  2     | Year2011   | 800
1403    ///  2     | Year2012   | 900
1404    ///  2     | Year2013   | 1020
1405    ///  2     | Year2014   | 1100
1406    /// ```
1407    /// For this example, let's compute the total yearly salary being payed out to all employees.
1408    /// Thus, we want to aggregate over each value in `SalaryYear`, and compute the sum of `Salary`.
1409    /// Therefore, our `KeyLabels` (our groups) would be `Labels![SalaryYear]` (since we can have
1410    /// more than one labels as our key, we need to use the label list-making macro
1411    /// [Labels](../macro.Labels.html)). Our `ValueLabel` (the value being summed) is `Salary`, and
1412    /// `AggLabel` will be a new label we need to add to our tablespace, which we'll call
1413    /// `TotalYearlySalary`.
1414    ///
1415    /// ```
1416    /// # #[macro_use] extern crate agnes;
1417    /// tablespace![
1418    ///     table salary {
1419    ///         EmpId: u64,
1420    ///         SalaryYear: String,
1421    ///         Salary: f64,
1422    ///         TotalYearlySalary: f64,
1423    ///     }
1424    /// ];
1425    /// #
1426    /// # use salary::*;
1427    /// #
1428    /// fn main() {
1429    /// #     let salary_table = table![
1430    /// #         EmpId = [0u64, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2];
1431    /// #         SalaryYear = [
1432    /// #             "Year2010", "Year2011", "Year2012", "Year2013", "Year2014",
1433    /// #             "Year2010", "Year2011", "Year2012", "Year2013", "Year2014",
1434    /// #             "Year2010", "Year2011", "Year2012", "Year2013", "Year2014"
1435    /// #         ];
1436    /// #         Salary = [
1437    /// #             1500.0, 1600.0, 1700.0, 1850.0, 2000.0,
1438    /// #             900.0, 920.0, 940.0, 940.0, 970.0,
1439    /// #             600.0, 800.0, 900.0, 1020.0, 1100.0
1440    /// #         ];
1441    /// #     ];
1442    ///     // <load data into DataView salary_table>
1443    ///     // salary table should have 15 rows -- 5 years of data for each of our 3 employees --
1444    ///     // and 3 fields (employee ID, salary year name, and salary value)
1445    ///     assert_eq!((salary_table.nrows(), salary_table.nfields()), (15, 3));
1446    ///     assert_eq!(salary_table.fieldnames(), vec!["EmpId", "SalaryYear", "Salary"]);
1447    ///     println!("{}", salary_table);
1448    ///
1449    ///     // compute the total salary per year, aggregated over employees
1450    ///     let agg_table = salary_table
1451    ///         .aggregate::<Labels![SalaryYear], Salary, TotalYearlySalary, _, _, _>(
1452    ///             0.0,
1453    ///             |accum, val| {
1454    ///                 *accum = *accum + val.unwrap_or(&0.0);
1455    ///             },
1456    ///         );
1457    ///
1458    ///     // we're left with five rows (one for each year of data), and two columns (year name and
1459    ///     // sum)
1460    ///     assert_eq!((agg_table.nrows(), agg_table.nfields()), (5, 2));
1461    ///     println!("{}", agg_table);
1462    /// }
1463    /// ```
1464    /// The call to aggregate takes two arguments: the value used to initialized each of our five
1465    /// aggregations (each of the five years), and a function which takes a mutable accumulator
1466    /// and the datum value (a [Value](../field/enum.Value.html) object) and updates the
1467    /// accumulator by adding the value. We use `unwrap_or` here to treat missing values a `0.0`.
1468    ///
1469    /// The resulting printed table should be:
1470    /// ```text
1471    ///  SalaryYear | TotalYearlySalary
1472    /// ------------+-------------------
1473    ///  Year2010   | 3000
1474    ///  Year2011   | 3320
1475    ///  Year2012   | 3540
1476    ///  Year2013   | 3810
1477    ///  Year2014   | 4070
1478    /// ```
1479    pub fn aggregate<KeyLabels, ValueLabel, AggLabel, DType, AggType, AggFunc>(
1480        &self,
1481        init: AggType,
1482        f: AggFunc,
1483    ) -> <Self as Aggregate<KeyLabels, ValueLabel, AggLabel, DType, AggType>>::Output
1484    where
1485        Self: Aggregate<KeyLabels, ValueLabel, AggLabel, DType, AggType>,
1486        AggFunc: Fn(&mut AggType, Value<&DType>),
1487    {
1488        Aggregate::<KeyLabels, ValueLabel, AggLabel, DType, AggType>::aggregate::<AggFunc>(
1489            self, init, f,
1490        )
1491    }
1492}
1493
1494/// Trait providing the `aggregate` method for aggregating values over a specified grouping of
1495/// records. See the intrinsic method [aggregate](struct.DataView.html#method.aggregate) for more
1496/// details.
1497pub trait Aggregate<KeyLabels, ValueLabel, AggLabel, DType, AggType> {
1498    /// Type produced by this aggregate method.
1499    type Output;
1500
1501    /// Perform the 'aggregate' operation. See the intrinsic method
1502    /// [aggregate](struct.DataView.html#method.aggregate) for more details.
1503    fn aggregate<AggFunc>(&self, init: AggType, f: AggFunc) -> Self::Output
1504    where
1505        AggFunc: Fn(&mut AggType, Value<&DType>);
1506}
1507
1508impl<Labels, Frames, KeyLabels, ValueLabel, AggLabel, DType, AggType>
1509    Aggregate<KeyLabels, ValueLabel, AggLabel, DType, AggType> for DataView<Labels, Frames>
1510where
1511    Self: NRows + SelectFieldByLabel<ValueLabel, DType = DType>,
1512    Labels: FieldList<KeyLabels, Frames> + LabelSubset<KeyLabels> + FrameIndexList,
1513    <Labels as FieldList<KeyLabels, Frames>>::Output: HashIndex + PartialEqIndex,
1514    <Labels as LabelSubset<KeyLabels>>::Output: Reorder<KeyLabels>,
1515    AggType: Clone,
1516    // AggFunc: Fn(&mut AggType, Value<&<Self as SelectFieldByLabel<ValueLabel>>::DType>),
1517    FieldData<AggType>: IntoStore<AggLabel>,
1518    <FieldData<AggType> as IntoStore<AggLabel>>::Output: IntoFrame,
1519    Frames: NRows + SubsetClone<<Labels as FrameIndexList>::LabelList>,
1520    <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output: UpdatePermutation,
1521    DataView<
1522        <<Labels as LabelSubset<KeyLabels>>::Output as Reorder<KeyLabels>>::Output,
1523        <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output,
1524    >: AddFrame<<<FieldData<AggType> as IntoStore<AggLabel>>::Output as IntoFrame>::Output>,
1525{
1526    // output is KeyLabels, then single ValueLabel column
1527    type Output = <DataView<
1528        <<Labels as LabelSubset<KeyLabels>>::Output as Reorder<KeyLabels>>::Output,
1529        <Frames as SubsetClone<<Labels as FrameIndexList>::LabelList>>::Output,
1530    > as AddFrame<
1531        <<FieldData<AggType> as IntoStore<AggLabel>>::Output as IntoFrame>::Output,
1532    >>::Output;
1533
1534    fn aggregate<AggFunc>(&self, init: AggType, f: AggFunc) -> Self::Output
1535    where
1536        AggFunc: Fn(&mut AggType, Value<&DType>),
1537    {
1538        let fl = self.field_list::<KeyLabels>();
1539        let values = self.field::<ValueLabel>();
1540        let mut map = HashMap::new();
1541        let mut indices = vec![];
1542        let mut aggregates = vec![];
1543        for i in 0..self.nrows() {
1544            let record = Record::new(&fl, i);
1545            let aggregates_idx = map.entry(record).or_insert_with(|| {
1546                indices.push(i);
1547                aggregates.push(init.clone());
1548                debug_assert_eq!(indices.len(), aggregates.len());
1549                indices.len() - 1
1550            });
1551            f(
1552                &mut aggregates[*aggregates_idx],
1553                values.get_datum(i).unwrap(),
1554            );
1555        }
1556        let agg_data: FieldData<_> = aggregates.into();
1557        let agg_frame = IntoStore::<AggLabel>::into_store(agg_data).into_frame();
1558
1559        let record_frames = self.frames.subset_clone().update_permutation(&indices);
1560
1561        DataView {
1562            _labels: PhantomData,
1563            frames: record_frames,
1564        }
1565        .add_frame(agg_frame)
1566    }
1567}
1568
1569#[cfg(test)]
1570mod tests {
1571    use std::fmt::Debug;
1572    use std::path::Path;
1573
1574    use csv_sniffer::metadata::Metadata;
1575
1576    use super::*;
1577    use source::csv::{CsvReader, CsvSource, IntoCsvSrcSchema};
1578
1579    #[cfg(feature = "test-utils")]
1580    use test_utils::*;
1581
1582    use access::DataIndex;
1583    use error::*;
1584
1585    fn load_csv_file<Schema>(
1586        filename: &str,
1587        schema: Schema,
1588    ) -> (CsvReader<Schema::CsvSrcSchema>, Metadata)
1589    where
1590        Schema: IntoCsvSrcSchema,
1591        <Schema as IntoCsvSrcSchema>::CsvSrcSchema: Debug,
1592    {
1593        let data_filepath = Path::new(file!()) // start as this file
1594            .parent()
1595            .unwrap() // navigate up to src directory
1596            .parent()
1597            .unwrap() // navigate up to root directory
1598            .join("tests") // navigate into integration tests directory
1599            .join("data") // navigate into data directory
1600            .join(filename); // navigate to target file
1601
1602        let source = CsvSource::new(data_filepath).unwrap();
1603        (
1604            CsvReader::new(&source, schema).unwrap(),
1605            source.metadata().clone(),
1606        )
1607    }
1608
1609    tablespace![
1610        pub table gdp {
1611            CountryName: String,
1612            CountryCode: String,
1613            Year1983: f64,
1614        }
1615    ];
1616
1617    #[test]
1618    fn lookup_field() {
1619        let gdp_schema = schema![
1620            fieldname gdp::CountryName = "Country Name";
1621            fieldname gdp::CountryCode = "Country Code";
1622            fieldname gdp::Year1983 = "1983";
1623        ];
1624
1625        let (mut csv_rdr, _metadata) = load_csv_file("gdp.csv", gdp_schema.clone());
1626        let ds = csv_rdr.read().unwrap();
1627        let view = ds.into_view();
1628
1629        let country_name = view.field::<gdp::CountryName>();
1630        println!("{:?}", country_name);
1631    }
1632
1633    #[test]
1634    fn generate_dataindex_cons() {
1635        let gdp_schema = schema![
1636            fieldname gdp::CountryName = "Country Name";
1637            fieldname gdp::CountryCode = "Country Code";
1638            fieldname gdp::Year1983 = "1983";
1639        ];
1640
1641        let (mut csv_rdr, _metadata) = load_csv_file("gdp.csv", gdp_schema.clone());
1642        let ds = csv_rdr.read().unwrap();
1643        let view = ds.into_view();
1644
1645        println!("{}", view);
1646    }
1647
1648    #[cfg(feature = "test-utils")]
1649    #[test]
1650    fn merge() {
1651        let dv1 = sample_emp_table().into_view();
1652        let dv2 = sample_emp_table_extra().into_view();
1653
1654        println!("{}", dv1);
1655        println!("{}", dv2);
1656
1657        let merged_dv = dv1.merge(&dv2).unwrap();
1658        println!("{}", merged_dv);
1659        assert_eq!(merged_dv.nrows(), 7);
1660        assert_eq!(merged_dv.nfields(), 6);
1661        assert_eq!(
1662            merged_dv.fieldnames(),
1663            vec![
1664                "EmpId",
1665                "DeptId",
1666                "EmpName",
1667                "SalaryOffset",
1668                "DidTraining",
1669                "VacationHrs"
1670            ]
1671        );
1672    }
1673
1674    #[cfg(feature = "test-utils")]
1675    #[test]
1676    fn merge_dimension_mismatch() {
1677        let dv1 = sample_emp_table().into_view();
1678        let dv2 = sample_dept_table().into_view();
1679
1680        println!("{}", dv1);
1681        println!("{}", dv2);
1682
1683        let merge_result = dv1.merge(&dv2);
1684        match merge_result {
1685            Ok(_) => {
1686                panic!("Merge was expected to fail (dimension mismatch), but succeeded");
1687            }
1688            Err(AgnesError::DimensionMismatch(_)) => { /* expected */ }
1689            Err(e) => {
1690                panic!("Incorrect error: {:?}", e);
1691            }
1692        };
1693    }
1694    #[cfg(feature = "test-utils")]
1695    tablespace![
1696        @continue(typenum::Add1<::test_utils::emp_table::Table>)
1697
1698        pub table emp_table2 {
1699            EmpId: u64,
1700            DeptId: u64,
1701            EmpName: String,
1702        }
1703    ];
1704
1705    #[cfg(feature = "test-utils")]
1706    #[test]
1707    fn merge_different_stores() {
1708        let dv1 = sample_emp_table().into_view();
1709
1710        // would NOT COMPILE due to field name collision (see compile-fail/merge_errors test)
1711        // let merge_result = dv1.merge(&sample_emp_table().into_view());
1712
1713        // if we use a sample employee table generated in another tablespace, however:
1714        let ds2: emp_table2::Store = sample_emp_table![];
1715        let dv2 = ds2.into_view();
1716
1717        println!("{}", dv1);
1718        println!("{}", dv2);
1719
1720        let merged_dv = dv1.merge(&dv2).unwrap();
1721
1722        println!("{}", merged_dv);
1723        assert_eq!(merged_dv.nrows(), 7);
1724        assert_eq!(merged_dv.nfields(), 6);
1725        assert_eq!(
1726            merged_dv.fieldnames(),
1727            vec!["EmpId", "DeptId", "EmpName", "EmpId", "DeptId", "EmpName"]
1728        );
1729    }
1730
1731    #[cfg(feature = "test-utils")]
1732    tablespace![
1733        @continue(typenum::Add1<::view::tests::emp_table2::Table>)
1734
1735        pub table emp_table3 {
1736            EmployeeId: u64,
1737            DepartmentId: u64,
1738            EmployeeName: String,
1739        }
1740    ];
1741
1742    #[cfg(feature = "test-utils")]
1743    #[test]
1744    fn relabel() {
1745        let dv1 = sample_emp_table().into_view();
1746        let dv2 = sample_emp_table().into_view();
1747
1748        // much like merge_different_stores, this won't compile
1749        // let merged_dv = dv1.merge(&dv2).unwrap();
1750        // if we relabel all the fields in one of the two tables, however, we can go ahead and merge
1751        let dv1 = dv1.relabel::<emp_table::EmpId, emp_table3::EmployeeId>();
1752        let dv1 = dv1.relabel::<emp_table::DeptId, emp_table3::DepartmentId>();
1753        let dv1 = dv1.relabel::<emp_table::EmpName, emp_table3::EmployeeName>();
1754
1755        let merged_dv = dv1.merge(&dv2).unwrap();
1756        println!("{}", merged_dv);
1757        assert_eq!(merged_dv.nrows(), 7);
1758        assert_eq!(merged_dv.nfields(), 6);
1759        assert_eq!(
1760            merged_dv.fieldnames(),
1761            vec![
1762                "EmployeeId",
1763                "DepartmentId",
1764                "EmployeeName",
1765                "EmpId",
1766                "DeptId",
1767                "EmpName"
1768            ]
1769        );
1770    }
1771
1772    #[cfg(feature = "test-utils")]
1773    tablespace![
1774        @continue(typenum::Add1<::view::tests::emp_table3::Table>)
1775
1776        pub table emp_table4 {
1777            EmplId: u64 = {"Employee Id"},
1778            DeptId: u64 = {"Department Id"},
1779            EmpName: String = {"Employee Name"},
1780        }
1781    ];
1782
1783    #[cfg(feature = "test-utils")]
1784    #[test]
1785    fn name_change() {
1786        let ds: emp_table4::Store = sample_emp_table![];
1787        let dv = ds.into_view();
1788
1789        println!("{}", dv);
1790        assert_eq!(dv.nrows(), 7);
1791        assert_eq!(dv.nfields(), 3);
1792        assert_eq!(
1793            dv.fieldnames(),
1794            vec!["Employee Id", "Department Id", "Employee Name"]
1795        );
1796    }
1797
1798    #[cfg(feature = "test-utils")]
1799    #[test]
1800    fn fieldnames() {
1801        let ds = sample_emp_table();
1802        let dv = ds.into_view();
1803        assert_eq!(dv.fieldnames(), vec!["EmpId", "DeptId", "EmpName"]);
1804    }
1805
1806    #[cfg(feature = "test-utils")]
1807    #[test]
1808    fn subview() {
1809        use test_utils::emp_table::*;
1810        let ds = sample_emp_table();
1811        let dv = ds.into_view();
1812        assert_eq!(dv.fieldnames(), vec!["EmpId", "DeptId", "EmpName"]);
1813        assert_eq!(dv.store_ref_counts(), vec![1]);
1814        assert_eq!(dv.nrows(), 7);
1815        assert_eq!(dv.nfields(), 3);
1816
1817        let subdv1 = dv.v::<Labels![EmpId]>();
1818        assert_eq!(subdv1.fieldnames(), vec!["EmpId"]);
1819        assert_eq!(dv.store_ref_counts(), vec![2]);
1820        assert_eq!(subdv1.nrows(), 7);
1821        assert_eq!(subdv1.nfields(), 1);
1822
1823        let subdv1 = dv.v::<Labels![EmpId]>();
1824        assert_eq!(subdv1.fieldnames(), vec!["EmpId"]);
1825        assert_eq!(dv.store_ref_counts(), vec![3]);
1826        assert_eq!(subdv1.nrows(), 7);
1827        assert_eq!(subdv1.nfields(), 1);
1828
1829        let subdv2 = dv.v::<Labels![EmpId, DeptId]>();
1830        assert_eq!(subdv2.fieldnames(), vec!["EmpId", "DeptId"]);
1831        assert_eq!(dv.store_ref_counts(), vec![4]);
1832        assert_eq!(subdv2.nrows(), 7);
1833        assert_eq!(subdv2.nfields(), 2);
1834
1835        let subdv2 = dv.v::<Labels![EmpId, DeptId]>();
1836        assert_eq!(subdv2.fieldnames(), vec!["EmpId", "DeptId"]);
1837        assert_eq!(dv.store_ref_counts(), vec![5]);
1838        assert_eq!(subdv2.nrows(), 7);
1839        assert_eq!(subdv2.nfields(), 2);
1840
1841        let subdv3 = dv.v::<Labels![EmpId, DeptId, EmpName]>();
1842        assert_eq!(subdv3.fieldnames(), vec!["EmpId", "DeptId", "EmpName"]);
1843        assert_eq!(dv.store_ref_counts(), vec![6]);
1844        assert_eq!(subdv3.nrows(), 7);
1845        assert_eq!(subdv3.nfields(), 3);
1846
1847        let subdv3 = dv.v::<Labels![EmpId, DeptId, EmpName]>();
1848        assert_eq!(subdv3.fieldnames(), vec!["EmpId", "DeptId", "EmpName"]);
1849        assert_eq!(dv.store_ref_counts(), vec![7]);
1850        assert_eq!(subdv3.nrows(), 7);
1851        assert_eq!(subdv3.nfields(), 3);
1852
1853        // Subview of a subview
1854        let subdv4 = subdv2.v::<Labels![DeptId]>();
1855        assert_eq!(subdv4.fieldnames(), vec!["DeptId"]);
1856        assert_eq!(dv.store_ref_counts(), vec![8]);
1857        assert_eq!(subdv4.nrows(), 7);
1858        assert_eq!(subdv4.nfields(), 1);
1859
1860        let subdv4 = subdv2.v::<Labels![EmpId]>();
1861        assert_eq!(subdv4.fieldnames(), vec!["EmpId"]);
1862        assert_eq!(dv.store_ref_counts(), vec![9]);
1863        assert_eq!(subdv4.nrows(), 7);
1864        assert_eq!(subdv4.nfields(), 1);
1865    }
1866
1867    #[cfg(feature = "test-utils")]
1868    #[test]
1869    fn subview_merged() {
1870        use test_utils::emp_table::*;
1871        use test_utils::extra_emp::*;
1872
1873        let dv = sample_merged_emp_table();
1874        println!("{:?}", dv.store_ref_counts());
1875
1876        let subdv = dv.v::<Labels![DeptId, DidTraining]>();
1877        println!("{}", subdv);
1878        assert_eq!(subdv.fieldnames(), vec!["DeptId", "DidTraining"]);
1879        assert_eq!(dv.store_ref_counts(), vec![2, 2]);
1880        assert_eq!(subdv.nrows(), 7);
1881        assert_eq!(subdv.nfields(), 2);
1882    }
1883
1884    #[cfg(feature = "test-utils")]
1885    #[test]
1886    fn subview_order() {
1887        use test_utils::emp_table::*;
1888        let dv = sample_emp_table().into_view();
1889        assert_eq!(dv.fieldnames(), vec!["EmpId", "DeptId", "EmpName"]);
1890
1891        let subdv = dv.v::<Labels![DeptId, EmpId]>();
1892        assert_eq!(subdv.fieldnames(), vec!["DeptId", "EmpId"]);
1893    }
1894
1895    //TODO: multi-frame subview tests (which filter out no-longer-needed frames)
1896
1897    #[cfg(feature = "test-utils")]
1898    #[test]
1899    fn sort() {
1900        use test_utils::emp_table::*;
1901        use test_utils::extra_emp::*;
1902        let orig_dv = sample_merged_emp_table();
1903        assert_eq!(orig_dv.nrows(), 7);
1904
1905        // sort by name
1906        let dv1 = orig_dv.clone();
1907        let dv1 = dv1.sort_by_label::<EmpName>();
1908        assert_eq!(
1909            dv1.field::<EmpName>().to_vec(),
1910            vec!["Ann", "Bob", "Cara", "Jamie", "Louis", "Louise", "Sally"]
1911        );
1912        assert_eq!(dv1.field::<EmpId>().to_vec(), vec![10u64, 5, 6, 2, 8, 9, 0]);
1913
1914        // re-sort by empid
1915        let dv2 = dv1.clone();
1916        let dv2 = dv2.sort_by_label::<EmpId>();
1917        assert_eq!(
1918            dv2.field::<EmpName>().to_vec(),
1919            vec!["Sally", "Jamie", "Bob", "Cara", "Louis", "Louise", "Ann"]
1920        );
1921        assert_eq!(dv2.field::<EmpId>().to_vec(), vec![0u64, 2, 5, 6, 8, 9, 10]);
1922
1923        // make sure dv1 is still sorted by EmpName
1924        assert_eq!(
1925            dv1.field::<EmpName>().to_vec(),
1926            vec!["Ann", "Bob", "Cara", "Jamie", "Louis", "Louise", "Sally"]
1927        );
1928        assert_eq!(dv1.field::<EmpId>().to_vec(), vec![10u64, 5, 6, 2, 8, 9, 0]);
1929
1930        // starting with sorted by name, sort by vacation hours
1931        let dv3 = dv1.clone();
1932        let dv3 = dv3.sort_by_label_comparator::<VacationHrs, _>(
1933            |left: Value<&f32>, right: Value<&f32>| left.partial_cmp(&right).unwrap(),
1934        );
1935        assert_eq!(
1936            dv3.field::<EmpName>().to_vec(),
1937            vec!["Louis", "Louise", "Cara", "Ann", "Sally", "Jamie", "Bob"]
1938        );
1939        assert_eq!(dv3.field::<EmpId>().to_vec(), vec![8u64, 9, 6, 10, 0, 2, 5]);
1940    }
1941
1942    #[cfg(feature = "test-utils")]
1943    #[test]
1944    fn filter() {
1945        use test_utils::emp_table::*;
1946        let orig_dv = sample_emp_table().into_view();
1947        assert_eq!(orig_dv.nrows(), 7);
1948
1949        // set filtering by department ID
1950        let dv1 = orig_dv.clone();
1951        let dv1 = dv1.filter::<DeptId, _>(|val: Value<&u64>| val == valref![1]);
1952        println!("{}", dv1);
1953        assert_eq!(dv1.nrows(), 3);
1954        assert_eq!(
1955            dv1.field::<EmpName>().to_vec(),
1956            vec!["Sally", "Bob", "Cara"]
1957        );
1958
1959        // filter a second time
1960        let dv1 = dv1.filter::<EmpId, _>(|val: Value<&u64>| val >= valref![6]);
1961        assert_eq!(dv1.nrows(), 1);
1962        assert_eq!(dv1.field::<EmpName>().to_vec(), vec!["Cara"]);
1963
1964        // that same filter on the original DV has different results
1965        let dv2 = orig_dv.clone();
1966        let dv2 = dv2.filter::<EmpId, _>(|val: Value<&u64>| val >= valref![6]);
1967        assert_eq!(dv2.nrows(), 4);
1968        assert_eq!(
1969            dv2.field::<EmpName>().to_vec(),
1970            vec!["Cara", "Louis", "Louise", "Ann"]
1971        );
1972
1973        // let's try filtering by a different department on dv2
1974        let dv2 = dv2.filter::<DeptId, _>(|val: Value<&u64>| val == valref![4]);
1975        assert_eq!(dv2.nrows(), 2);
1976        assert_eq!(dv2.field::<EmpName>().to_vec(), vec!["Louise", "Ann"]);
1977    }
1978
1979    #[cfg(feature = "test-utils")]
1980    #[test]
1981    fn filter_sort() {
1982        use test_utils::emp_table::*;
1983        use test_utils::extra_emp::*;
1984        let orig_dv = sample_merged_emp_table();
1985        assert_eq!(orig_dv.nrows(), 7);
1986
1987        // start by filtering for employees with remaining vacation hours
1988        let dv1 = orig_dv.clone();
1989        let dv1 = dv1.filter::<VacationHrs, _>(|val: Value<&f32>| val >= 0.0);
1990        assert_eq!(dv1.nrows(), 6);
1991        // only Louis has negative hours, so rest of employees still remain
1992        assert_eq!(
1993            dv1.field::<EmpName>().to_vec(),
1994            vec!["Sally", "Jamie", "Bob", "Cara", "Louise", "Ann"]
1995        );
1996
1997        // next, sort by employee name
1998        let dv2 = dv1.clone();
1999        let dv2 = dv2.sort_by_label::<EmpName>();
2000        assert_eq!(
2001            dv2.field::<EmpName>().to_vec(),
2002            vec!["Ann", "Bob", "Cara", "Jamie", "Louise", "Sally"]
2003        );
2004
2005        // filter by people in department 1
2006        let dv3 = dv2.clone();
2007        let dv3 = dv3.filter::<DeptId, _>(|val: Value<&u64>| val == 1);
2008        assert_eq!(dv3.nrows(), 3);
2009        // should just be the people in department 1, in employee name order
2010        assert_eq!(
2011            dv3.field::<EmpName>().to_vec(),
2012            vec!["Bob", "Cara", "Sally"]
2013        );
2014
2015        // check that dv1 still has the original ordering
2016        assert_eq!(
2017            dv1.field::<EmpName>().to_vec(),
2018            vec!["Sally", "Jamie", "Bob", "Cara", "Louise", "Ann"]
2019        );
2020
2021        // ok, now filter dv1 by department 1
2022        let dv1 = dv1.filter::<DeptId, _>(|val: Value<&u64>| val == 1);
2023        assert_eq!(dv1.nrows(), 3);
2024        // should be the people in department 1, but in original name order
2025        assert_eq!(
2026            dv1.field::<EmpName>().to_vec(),
2027            vec!["Sally", "Bob", "Cara"]
2028        );
2029
2030        // make sure dv2 hasn't been affected by any of the other changes
2031        assert_eq!(
2032            dv2.field::<EmpName>().to_vec(),
2033            vec!["Ann", "Bob", "Cara", "Jamie", "Louise", "Sally"]
2034        );
2035    }
2036
2037    #[cfg(feature = "test-utils")]
2038    #[test]
2039    fn unique_single() {
2040        let ds = sample_emp_table();
2041        let dv = ds.into_view();
2042        println!("{}", dv);
2043        let uniques = dv.unique_indices::<Labels![emp_table::DeptId]>();
2044        println!("{:?}", uniques);
2045        // there are four unique department IDs (1, 2, 3, 4) at indices 0, 1, 4, 5.
2046        assert_eq!(uniques, vec![0, 1, 4, 5]);
2047        let dept_ids = dv.field::<emp_table::DeptId>();
2048        assert_eq![
2049            uniques
2050                .iter()
2051                .map(|&idx| dept_ids.get_datum(idx).unwrap())
2052                .collect::<Vec<_>>(),
2053            vec![1, 2, 3, 4]
2054        ];
2055
2056        // can also check the unique department values with unique_values
2057        let unique_deptids = dv.unique_values::<Labels![emp_table::DeptId]>();
2058        println!("{}", unique_deptids);
2059        assert_eq!(
2060            unique_deptids.field::<emp_table::DeptId>().to_vec(),
2061            vec![1, 2, 3, 4]
2062        );
2063    }
2064
2065    #[cfg(feature = "test-utils")]
2066    #[test]
2067    fn unique_composite() {
2068        let dv = sample_merged_emp_table();
2069        let uniq_indices =
2070            dv.unique_indices::<Labels![emp_table::DeptId, extra_emp::DidTraining]>();
2071        // the only repeat is index 3
2072        assert_eq!(uniq_indices, vec![0, 1, 2, 4, 5, 6]);
2073
2074        let uniq_vals = dv.unique_values::<Labels![emp_table::DeptId, extra_emp::DidTraining]>();
2075        println!("{}", uniq_vals);
2076        assert_eq!(uniq_vals.fieldnames(), vec!["DeptId", "DidTraining",]);
2077        assert_eq!(
2078            uniq_vals.field::<emp_table::DeptId>().to_vec(),
2079            vec![1u64, 2, 1, 3, 4, 4]
2080        );
2081        assert_eq!(
2082            uniq_vals.field::<extra_emp::DidTraining>().to_vec(),
2083            vec![false, false, true, true, false, true]
2084        );
2085
2086        // check ordering
2087        let uniq_vals = dv.unique_values::<Labels![extra_emp::DidTraining, emp_table::DeptId]>();
2088        println!("{}", uniq_vals);
2089        assert_eq!(uniq_vals.fieldnames(), vec!["DidTraining", "DeptId",]);
2090    }
2091}