Skip to main content

sk_core/k8s/
pod_lifecycle.rs

1use std::cmp::{
2    Ord,
3    Ordering,
4    max,
5    min,
6};
7
8use tracing::*;
9
10use super::*;
11use crate::prelude::*;
12
13// A PodLifecycleData object is how we track the length of time a pod was running in a cluster.  It
14// has three states, Empty, Running, and Finished.  For each state, we track the timestamps that
15// are relevant for that state, e.g., Running only has a start time, and Finished has both a start
16// and end time.
17//
18// We compute this by tracking the earliest container start time and the latest container end time
19// among all the containers in the pod (we don't want to use the pod's creation timestamp field,
20// for example, because this will include time when the pod was pending and not running;
21// additionally, the various pod phase statuses don't actually have a "first container started"
22// status -- "Running" means that all of the containers are created, and "Pending" means that "one
23// or more of the containers is not running".  So instead, we track it by hand.
24//
25// There's some slightly ugly code here, mostly because of annoyances in the k8s API spec.  We want
26// to look at all containers, including init containers, but the initContainer field is optional,
27// whereas the main container field is not.  So we have to treat these paths slightly differently.
28//
29// A pod can only be marked "finished" if all of the containers in the pod have terminated, OR if
30// the pod has been deleted externally -- in the happy path, even if the pod is deleted externally,
31// we'd still get a status update saying that the containers have terminated, but I'm not sure this
32// is guaranteed to be received, or received in the correct order.  So we have two different ways
33// of trying to determine this information: the `new_for` function will only return `Finished` if
34// all the containers have been definitively terminated, but the `guess_finished_lifecycle` will
35// just fill in the finished timestamp with `Utc::now()`.
36
37impl PodLifecycleData {
38    fn new(start_ts: Option<i64>, end_ts: Option<i64>) -> PodLifecycleData {
39        match (start_ts, end_ts) {
40            (None, _) => PodLifecycleData::Empty,
41            (Some(ts), None) => PodLifecycleData::Running(ts),
42            (Some(start), Some(end)) => PodLifecycleData::Finished(start, end),
43        }
44    }
45
46    pub fn new_for(pod: &corev1::Pod) -> anyhow::Result<PodLifecycleData> {
47        let (mut earliest_start_ts, mut latest_end_ts) = (None, None);
48        let mut terminated_container_count = 0;
49
50        let pod_status = pod.status()?;
51        if let Some(cstats) = pod_status.init_container_statuses.as_ref() {
52            for (container, state) in cstats.iter().filter_map(|s| Some((&s.name, s.state.as_ref()?))) {
53                let (start_ts, end_ts) = get_start_end_ts(pod, container, state);
54                earliest_start_ts = min_some(start_ts, earliest_start_ts);
55                latest_end_ts = max(latest_end_ts, end_ts);
56            }
57        }
58
59        if let Some(cstats) = pod_status.container_statuses.as_ref() {
60            for (container, state) in cstats.iter().filter_map(|s| Some((&s.name, s.state.as_ref()?))) {
61                let (start_ts, end_ts) = get_start_end_ts(pod, container, state);
62                earliest_start_ts = min_some(start_ts, earliest_start_ts);
63
64                if end_ts.is_some() {
65                    terminated_container_count += 1;
66                }
67                latest_end_ts = max(latest_end_ts, end_ts);
68            }
69        }
70
71        // all init containers must have terminated before any of the main containers
72        // start, so we don't need to additionally check the init containers here.
73        //
74        // TODO: I am not sure if or how this logic needs to change with the stabilization
75        // of the sidecar primitive as a "non-terminating init container"
76        if terminated_container_count != pod.spec()?.containers.len() {
77            latest_end_ts = None;
78        }
79        Ok(PodLifecycleData::new(earliest_start_ts, latest_end_ts))
80    }
81
82    pub fn end_ts(&self) -> Option<i64> {
83        match self {
84            &PodLifecycleData::Finished(_, ts) => Some(ts),
85            _ => None,
86        }
87    }
88
89    pub fn start_ts(&self) -> Option<i64> {
90        match *self {
91            PodLifecycleData::Running(ts) => Some(ts),
92            PodLifecycleData::Finished(ts, _) => Some(ts),
93            _ => None,
94        }
95    }
96
97    pub fn overlaps(&self, start_ts: i64, end_ts: i64) -> bool {
98        // If at least one of the pod's lifecycle events appears between the given time window, OR
99        // if the pod is still running at the end of the given time window, it counts as
100        // overlapping the time window.
101        match *self {
102            PodLifecycleData::Running(ts) => ts < end_ts,
103            PodLifecycleData::Finished(s, e) => (start_ts <= s && s < end_ts) || (start_ts <= e && e < end_ts),
104            _ => false,
105        }
106    }
107
108    pub fn empty(&self) -> bool {
109        self == PodLifecycleData::Empty
110    }
111
112    pub fn running(&self) -> bool {
113        matches!(self, PodLifecycleData::Running(_))
114    }
115
116    pub fn finished(&self) -> bool {
117        matches!(self, PodLifecycleData::Finished(..))
118    }
119}
120
121// We implement PartialOrd and PartialEq for PodLifecycleData; this is maybe a little bit magic,
122// but it makes the code at the calling site much cleaner.  The motivation here is thus: if we've
123// already received some lifecycle data, we don't want to override the data with differing data.
124// An example could be, if a pod is in CrashLoopBackoff, every time we get a status update, the
125// container is going to have a different start time recorded, but for the purposes of simulation,
126// we want to record the _earliest_ start time we saw for the pod.
127//
128// With this in mind, we implemnt a partial order over PodLifecycleData, as follows:
129//   - Empty < X, \forall X
130//   - Running(start) < Finished(start, end), \forall Running, Finished, start, end
131//   - Running(start1) <> Finished(start2, end), \forall start1 != start2
132//   - Finished(start1, end1) <> Finished(start2, end2) \forall (start1 != start2 || end1 != end2)
133//
134// This allows us to concisely check for _valid_ updates to pod lifecycle data with an expression
135// like if pld1 > pld2 { do update };  if pld1 and pld2 aren't comparable, no update will occur.
136impl PartialOrd for PodLifecycleData {
137    fn partial_cmp(&self, other: &PodLifecycleData) -> Option<Ordering> {
138        match self {
139            PodLifecycleData::Empty => {
140                if !other.empty() {
141                    Some(Ordering::Less)
142                } else {
143                    Some(Ordering::Equal)
144                }
145            },
146            PodLifecycleData::Running(ts) => match other {
147                PodLifecycleData::Empty => Some(Ordering::Greater),
148                PodLifecycleData::Running(other_ts) => {
149                    if ts == other_ts {
150                        Some(Ordering::Equal)
151                    } else {
152                        None
153                    }
154                },
155                PodLifecycleData::Finished(..) => Some(Ordering::Less),
156            },
157            PodLifecycleData::Finished(sts, ets) => match other {
158                PodLifecycleData::Empty => Some(Ordering::Greater),
159                PodLifecycleData::Running(other_ts) => {
160                    if sts == other_ts {
161                        Some(Ordering::Greater)
162                    } else {
163                        None
164                    }
165                },
166                PodLifecycleData::Finished(other_sts, other_ets) => {
167                    if sts == other_sts && ets == other_ets {
168                        Some(Ordering::Equal)
169                    } else {
170                        None
171                    }
172                },
173            },
174        }
175    }
176}
177
178fn get_start_end_ts(pod: &corev1::Pod, container: &str, state: &corev1::ContainerState) -> (Option<i64>, Option<i64>) {
179    let start_ts = state.start_ts().unwrap_or_else(|err| {
180        warn!("could not find start_ts for container {container} in {}: {err:?}", pod.namespaced_name());
181        None
182    });
183    let end_ts = state.end_ts().unwrap_or_else(|err| {
184        warn!("could not find end_ts for container {container} in {}: {err:?}", pod.namespaced_name());
185        None
186    });
187
188    (start_ts, end_ts)
189}
190
191impl PartialEq<Option<&PodLifecycleData>> for PodLifecycleData {
192    fn eq(&self, other: &Option<&PodLifecycleData>) -> bool {
193        match self {
194            PodLifecycleData::Empty => other.is_none() || other.as_ref().is_some_and(|plt| plt.empty()),
195            _ => other.as_ref().is_some_and(|plt| plt == self),
196        }
197    }
198}
199
200impl PartialOrd<Option<&PodLifecycleData>> for PodLifecycleData {
201    fn partial_cmp(&self, other: &Option<&PodLifecycleData>) -> Option<Ordering> {
202        match self {
203            PodLifecycleData::Empty => other.as_ref().map_or(Some(Ordering::Equal), |o| self.partial_cmp(o)),
204            _ => other.as_ref().map_or(Some(Ordering::Greater), |o| self.partial_cmp(o)),
205        }
206    }
207}
208
209// The default comparison between Option types returns `None` if either option is `None`, i.e.,
210// `None < X \forall X`.  This is not the correct behaviour if you want to compute the minimum
211// of a list of options, if it exists, and only return None if all the options are None.  That
212// is what min_some does.
213//
214// Note the asymmetry here: we don't need a corresponding max_some because 'greater-than' works
215// "correctly" for uninhabited objects.
216pub fn min_some<T: Ord>(o1: Option<T>, o2: Option<T>) -> Option<T> {
217    if o1.is_none() {
218        o2
219    } else if o2.is_none() {
220        o1
221    } else {
222        min(o1, o2)
223    }
224}
225
226#[cfg(test)]
227#[cfg_attr(coverage, coverage(off))]
228mod test {
229    use sk_testutils::*;
230
231    use super::*;
232
233    #[rstest]
234    #[case::both_none(None, None, None)]
235    #[case::left_some(Some(1), None, Some(1))]
236    #[case::right_some(None, Some(1), Some(1))]
237    #[case::both_some(Some(2), Some(1), Some(1))]
238    fn test_min_some(#[case] o1: Option<i32>, #[case] o2: Option<i32>, #[case] expected: Option<i32>) {
239        assert_eq!(min_some(o1, o2), expected);
240    }
241}