1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
use std::collections::HashSet;
use std::fmt::{Debug, Display, Formatter};

use enum_as_inner::EnumAsInner;
use itertools::{Itertools, Position};
use serde::{Deserialize, Serialize};

use super::Ident;

/// Represents the object that is manipulated by the pipeline transforms.
/// Similar to a view in a database or a data frame.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Lineage {
    pub columns: Vec<LineageColumn>,

    pub inputs: Vec<LineageInput>,

    // A hack that allows name retention when applying `ExprKind::All { except }`
    #[serde(skip)]
    pub prev_columns: Vec<LineageColumn>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LineageInput {
    /// Id of the node in AST that declares this input.
    pub id: usize,

    /// Local name of this input within a query.
    pub name: String,

    /// Fully qualified name of the table that provides the data for this input.
    pub table: Ident,
}

#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, EnumAsInner)]
pub enum LineageColumn {
    Single {
        name: Option<Ident>,

        // id of the defining expr (which can be actual expr or lineage input expr)
        target_id: usize,

        // if target is a relation, this is the name within the relation
        target_name: Option<String>,
    },

    /// All columns (including unknown ones) from an input (i.e. `foo_table.*`)
    All {
        input_id: usize,
        except: HashSet<String>,
    },
}

impl Display for Lineage {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        display_lineage(self, f, false)
    }
}

fn display_lineage(lineage: &Lineage, f: &mut Formatter, display_ids: bool) -> std::fmt::Result {
    write!(f, "[")?;
    for (pos, col) in lineage.columns.iter().with_position() {
        let is_last = matches!(pos, Position::Last | Position::Only);
        display_lineage_column(col, f, display_ids)?;
        if !is_last {
            write!(f, ", ")?;
        }
    }
    write!(f, "]")
}

fn display_lineage_column(
    col: &LineageColumn,
    f: &mut Formatter,
    display_ids: bool,
) -> std::fmt::Result {
    match col {
        LineageColumn::All { input_id, .. } => {
            write!(f, "{input_id}.*")?;
        }
        LineageColumn::Single {
            name, target_id, ..
        } => {
            if let Some(name) = name {
                write!(f, "{name}")?
            } else {
                write!(f, "?")?
            }
            if display_ids {
                write!(f, ":{target_id}")?
            }
        }
    }
    Ok(())
}