gitql_engine/
engine_distinct.rs

1use std::collections::HashSet;
2use std::hash::DefaultHasher;
3use std::hash::Hash;
4use std::hash::Hasher;
5
6use gitql_ast::statement::Distinct;
7use gitql_core::object::GitQLObject;
8use gitql_core::object::Group;
9use gitql_core::object::Row;
10
11/// Apply the distinct operator depending on the type of distinct
12pub(crate) fn apply_distinct_operator(
13    distinct: &Distinct,
14    object: &mut GitQLObject,
15    hidden_selections: &[String],
16) {
17    if object.is_empty() {
18        return;
19    }
20
21    match distinct {
22        Distinct::DistinctAll => apply_distinct_all_operation(object, hidden_selections),
23        Distinct::DistinctOn(fields) => apply_distinct_on_operation(object, fields),
24        _ => {}
25    }
26}
27
28/// Apply Distinct all operator that depend on all selected fields in the object
29fn apply_distinct_all_operation(object: &mut GitQLObject, hidden_selections: &[String]) {
30    let titles: Vec<&String> = object
31        .titles
32        .iter()
33        .filter(|s| !hidden_selections.contains(s))
34        .collect();
35
36    let titles_count = titles.len();
37    let hidden_selection_count = hidden_selections.len();
38
39    let objects = &object.groups[0].rows;
40    let mut new_objects = Group { rows: vec![] };
41    let mut values_set: HashSet<u64> = HashSet::new();
42
43    for object in objects {
44        // Build row of the selected only values
45        let mut row_values: Vec<String> = Vec::with_capacity(titles_count);
46        for i in 0..titles.len() {
47            if let Some(value) = object.values.get(i + hidden_selection_count) {
48                row_values.push(value.literal());
49            }
50        }
51
52        // Compute the hash for row of values
53        let mut hasher = DefaultHasher::new();
54        row_values.hash(&mut hasher);
55        let values_hash = hasher.finish();
56
57        // If this hash is unique, insert the row
58        if values_set.insert(values_hash) {
59            new_objects.rows.push(Row {
60                values: object.values.clone(),
61            });
62        }
63    }
64
65    // If number of total rows is changed, update the main group rows
66    if objects.len() != new_objects.len() {
67        object.groups[0].rows.clear();
68        object.groups[0].rows.append(&mut new_objects.rows);
69    }
70}
71
72/// Apply Distinct on one or more valid fields from the object
73fn apply_distinct_on_operation(object: &mut GitQLObject, distinct_fields: &[String]) {
74    let objects = &object.groups[0].rows;
75    let mut new_objects: Group = Group { rows: vec![] };
76    let mut values_set: HashSet<u64> = HashSet::new();
77    let titles = &object.titles;
78
79    for object in objects {
80        // Build row of the selected only values
81        let mut row_values: Vec<String> = Vec::with_capacity(distinct_fields.len());
82        for field in distinct_fields {
83            if let Some(index) = titles.iter().position(|r| r.eq(field)) {
84                row_values.push(object.values.get(index).unwrap().literal());
85            }
86        }
87
88        // Compute the hash for row of values
89        let mut hasher = DefaultHasher::new();
90        row_values.hash(&mut hasher);
91
92        // If this hash is unique, insert the row
93        if values_set.insert(hasher.finish()) {
94            new_objects.rows.push(Row {
95                values: object.values.clone(),
96            });
97        }
98    }
99
100    // If number of total rows is changed, update the main group rows
101    if objects.len() != new_objects.len() {
102        object.groups[0].rows.clear();
103        object.groups[0].rows.append(&mut new_objects.rows);
104    }
105}