Skip to main content

sk_core/k8s/
pod_lifecycle.rs

1use std::cmp::{
2    Ord,
3    Ordering,
4    max,
5    min,
6};
7
8use tracing::*;
9
10use super::*;
11use crate::prelude::*;
12
13// A PodLifecycleData object is how we track the length of time a pod was running in a cluster.  It
14// has three states, Empty, Running, and Finished.  For each state, we track the timestamps that
15// are relevant for that state, e.g., Running only has a start time, and Finished has both a start
16// and end time.
17//
18// We compute this by tracking the earliest container start time and the latest container end time
19// among all the containers in the pod (we don't want to use the pod's creation timestamp field,
20// for example, because this will include time when the pod was pending and not running;
21// additionally, the various pod phase statuses don't actually have a "first container started"
22// status -- "Running" means that all of the containers are created, and "Pending" means that "one
23// or more of the containers is not running".  So instead, we track it by hand.
24//
25// There's some slightly ugly code here, mostly because of annoyances in the k8s API spec.  We want
26// to look at all containers, including init containers, but the initContainer field is optional,
27// whereas the main container field is not.  So we have to treat these paths slightly differently.
28//
29// A pod can only be marked "finished" if all of the containers in the pod have terminated, OR if
30// the pod has been deleted externally -- in the happy path, even if the pod is deleted externally,
31// we'd still get a status update saying that the containers have terminated, but I'm not sure this
32// is guaranteed to be received, or received in the correct order.  So we have two different ways
33// of trying to determine this information: the `new_for` function will only return `Finished` if
34// all the containers have been definitively terminated, but the `guess_finished_lifecycle` will
35// just fill in the finished timestamp with `Utc::now()`.
36
37impl PodLifecycleData {
38    fn new(start_ts: Option<i64>, end_ts: Option<i64>) -> PodLifecycleData {
39        match (start_ts, end_ts) {
40            (None, _) => PodLifecycleData::Empty,
41            (Some(ts), None) => PodLifecycleData::Running(ts),
42            (Some(start), Some(end)) => PodLifecycleData::Finished(start, end),
43        }
44    }
45
46    pub fn new_for(pod: &corev1::Pod) -> anyhow::Result<PodLifecycleData> {
47        let (mut earliest_start_ts, mut latest_end_ts) = (None, None);
48        let mut terminated_container_count = 0;
49
50        let pod_status = pod.status()?;
51        if let Some(cstats) = pod_status.init_container_statuses.as_ref() {
52            for (container, state) in cstats.iter().filter_map(|s| Some((&s.name, s.state.as_ref()?))) {
53                let (start_ts, end_ts) = get_start_end_ts(pod, container, state);
54                earliest_start_ts = min_some(start_ts, earliest_start_ts);
55                latest_end_ts = max(latest_end_ts, end_ts);
56            }
57        }
58
59        if let Some(cstats) = pod_status.container_statuses.as_ref() {
60            for (container, state) in cstats.iter().filter_map(|s| Some((&s.name, s.state.as_ref()?))) {
61                let (start_ts, end_ts) = get_start_end_ts(pod, container, state);
62                earliest_start_ts = min_some(start_ts, earliest_start_ts);
63
64                if end_ts.is_some() {
65                    terminated_container_count += 1;
66                }
67                latest_end_ts = max(latest_end_ts, end_ts);
68            }
69        }
70
71        // all init containers must have terminated before any of the main containers
72        // start, so we don't need to additionally check the init containers here.
73        //
74        // TODO: I am not sure if or how this logic needs to change with the stabilization
75        // of the sidecar primitive as a "non-terminating init container"
76        if terminated_container_count != pod.spec()?.containers.len() {
77            latest_end_ts = None;
78        }
79        Ok(PodLifecycleData::new(earliest_start_ts, latest_end_ts))
80    }
81
82    pub fn end_ts(&self) -> Option<i64> {
83        match self {
84            &PodLifecycleData::Finished(_, ts) => Some(ts),
85            _ => None,
86        }
87    }
88
89    pub fn start_ts(&self) -> Option<i64> {
90        match *self {
91            PodLifecycleData::Running(ts) => Some(ts),
92            PodLifecycleData::Finished(ts, _) => Some(ts),
93            _ => None,
94        }
95    }
96
97    pub fn overlaps(&self, start_ts: i64, end_ts: i64) -> bool {
98        // If at least one of the pod's lifecycle events appears between the given time window, OR
99        // if the pod is still running at the end of the given time window, it counts as
100        // overlapping the time window.
101        match *self {
102            PodLifecycleData::Running(ts) => ts < end_ts,
103            PodLifecycleData::Finished(s, e) => (start_ts <= s && s < end_ts) || (start_ts <= e && e < end_ts),
104            _ => false,
105        }
106    }
107
108    pub fn bound_start_ts(mut self, min_start_ts: i64) -> Self {
109        match self {
110            PodLifecycleData::Empty => (),
111            PodLifecycleData::Running(ref mut ts) => *ts = max(*ts, min_start_ts),
112            PodLifecycleData::Finished(ref mut ts, _) => *ts = max(*ts, min_start_ts),
113        }
114        self
115    }
116
117    pub fn empty(&self) -> bool {
118        self == PodLifecycleData::Empty
119    }
120
121    pub fn running(&self) -> bool {
122        matches!(self, PodLifecycleData::Running(_))
123    }
124
125    pub fn finished(&self) -> bool {
126        matches!(self, PodLifecycleData::Finished(..))
127    }
128}
129
130// We implement PartialOrd and PartialEq for PodLifecycleData; this is maybe a little bit magic,
131// but it makes the code at the calling site much cleaner.  The motivation here is thus: if we've
132// already received some lifecycle data, we don't want to override the data with differing data.
133// An example could be, if a pod is in CrashLoopBackoff, every time we get a status update, the
134// container is going to have a different start time recorded, but for the purposes of simulation,
135// we want to record the _earliest_ start time we saw for the pod.
136//
137// With this in mind, we implemnt a partial order over PodLifecycleData, as follows:
138//   - Empty < X, \forall X
139//   - Running(start) < Finished(start, end), \forall Running, Finished, start, end
140//   - Running(start1) <> Finished(start2, end), \forall start1 != start2
141//   - Finished(start1, end1) <> Finished(start2, end2) \forall (start1 != start2 || end1 != end2)
142//
143// This allows us to concisely check for _valid_ updates to pod lifecycle data with an expression
144// like if pld1 > pld2 { do update };  if pld1 and pld2 aren't comparable, no update will occur.
145impl PartialOrd for PodLifecycleData {
146    fn partial_cmp(&self, other: &PodLifecycleData) -> Option<Ordering> {
147        match self {
148            PodLifecycleData::Empty => {
149                if !other.empty() {
150                    Some(Ordering::Less)
151                } else {
152                    Some(Ordering::Equal)
153                }
154            },
155            PodLifecycleData::Running(ts) => match other {
156                PodLifecycleData::Empty => Some(Ordering::Greater),
157                PodLifecycleData::Running(other_ts) => {
158                    if ts == other_ts {
159                        Some(Ordering::Equal)
160                    } else {
161                        None
162                    }
163                },
164                PodLifecycleData::Finished(..) => Some(Ordering::Less),
165            },
166            PodLifecycleData::Finished(sts, ets) => match other {
167                PodLifecycleData::Empty => Some(Ordering::Greater),
168                PodLifecycleData::Running(other_ts) => {
169                    if sts == other_ts {
170                        Some(Ordering::Greater)
171                    } else {
172                        None
173                    }
174                },
175                PodLifecycleData::Finished(other_sts, other_ets) => {
176                    if sts == other_sts && ets == other_ets {
177                        Some(Ordering::Equal)
178                    } else {
179                        None
180                    }
181                },
182            },
183        }
184    }
185}
186
187fn get_start_end_ts(pod: &corev1::Pod, container: &str, state: &corev1::ContainerState) -> (Option<i64>, Option<i64>) {
188    let start_ts = state.start_ts().unwrap_or_else(|err| {
189        warn!("could not find start_ts for container {container} in {}: {err:?}", pod.namespaced_name());
190        None
191    });
192    let end_ts = state.end_ts().unwrap_or_else(|err| {
193        warn!("could not find end_ts for container {container} in {}: {err:?}", pod.namespaced_name());
194        None
195    });
196
197    (start_ts, end_ts)
198}
199
200impl PartialEq<Option<&PodLifecycleData>> for PodLifecycleData {
201    fn eq(&self, other: &Option<&PodLifecycleData>) -> bool {
202        match self {
203            PodLifecycleData::Empty => other.is_none() || other.as_ref().is_some_and(|plt| plt.empty()),
204            _ => other.as_ref().is_some_and(|plt| plt == self),
205        }
206    }
207}
208
209impl PartialOrd<Option<&PodLifecycleData>> for PodLifecycleData {
210    fn partial_cmp(&self, other: &Option<&PodLifecycleData>) -> Option<Ordering> {
211        match self {
212            PodLifecycleData::Empty => other.as_ref().map_or(Some(Ordering::Equal), |o| self.partial_cmp(o)),
213            _ => other.as_ref().map_or(Some(Ordering::Greater), |o| self.partial_cmp(o)),
214        }
215    }
216}
217
218// The default comparison between Option types returns `None` if either option is `None`, i.e.,
219// `None < X \forall X`.  This is not the correct behaviour if you want to compute the minimum
220// of a list of options, if it exists, and only return None if all the options are None.  That
221// is what min_some does.
222//
223// Note the asymmetry here: we don't need a corresponding max_some because 'greater-than' works
224// "correctly" for uninhabited objects.
225pub fn min_some<T: Ord>(o1: Option<T>, o2: Option<T>) -> Option<T> {
226    if o1.is_none() {
227        o2
228    } else if o2.is_none() {
229        o1
230    } else {
231        min(o1, o2)
232    }
233}
234
235#[cfg(test)]
236#[cfg_attr(coverage, coverage(off))]
237mod test {
238    use sk_testutils::*;
239
240    use super::*;
241
242    #[rstest]
243    #[case::both_none(None, None, None)]
244    #[case::left_some(Some(1), None, Some(1))]
245    #[case::right_some(None, Some(1), Some(1))]
246    #[case::both_some(Some(2), Some(1), Some(1))]
247    fn test_min_some(#[case] o1: Option<i32>, #[case] o2: Option<i32>, #[case] expected: Option<i32>) {
248        assert_eq!(min_some(o1, o2), expected);
249    }
250}