ceph_safe_disk/
diag.rs

1use ansi_term::Colour;
2use crate::pgmap::PGMap;
3use crate::osdmap::OsdMap;
4use crate::pgstate::RmSafety;
5use crate::error::CSDError;
6use crate::from::FromCeph;
7
8use std::collections::BinaryHeap;
9use std::fmt;
10
11// Format for printing
12#[derive(Clone, Copy, Debug)]
13pub enum Format {
14    Pretty,
15    Json,
16}
17
18// The removability status of an OSD. Using an enum for precedence:
19// Safe < Unknown < NonSafe
20#[derive(Serialize, Debug, Copy, Clone, Ord, Eq, PartialEq, PartialOrd)]
21pub enum Status {
22    Safe,
23    Unknown,
24    NonSafe,
25}
26
27impl fmt::Display for Status {
28    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
29        match *self {
30            Status::Unknown => write!(f, "Pending"),
31            Status::Safe => write!(f, "Removable"),
32            Status::NonSafe => write!(f, "Not removable"),
33        }
34    }
35}
36
37pub struct PgDiag {
38    osd_id: i32,
39    pg_info: PgInfo,
40}
41
42impl PgDiag {
43    fn new(osd_id: i32, pg_info: PgInfo) -> PgDiag {
44        PgDiag { osd_id, pg_info }
45    }
46}
47
48// Holds information about a PG's status, it's ID and state
49#[derive(Debug, Clone, Ord, Eq, PartialEq, PartialOrd)]
50pub struct PgInfo {
51    pg_id: String,
52    pg_state: String,
53    rm_safety: RmSafety,
54}
55
56impl PgInfo {
57    fn new(states: &str, pgid: String) -> PgInfo {
58        PgInfo {
59            pg_id: pgid,
60            pg_state: states.to_string(),
61            rm_safety: RmSafety::new(&states),
62        }
63    }
64}
65
66#[derive(Debug, Serialize)]
67pub struct OsdDiag {
68    osd_id: i32,
69    osd_status: BinaryHeap<Status>,
70}
71
72impl OsdDiag {
73    fn new(osd_id: i32) -> OsdDiag {
74        OsdDiag {
75            osd_id,
76            osd_status: BinaryHeap::new(),
77        }
78    }
79}
80
81// Used to print ClusterDiag in a nicer way. Since ClusterDiag.osd_diags use
82// binary heaps to order status priority then it is very inconvenient for
83// printing as JSON
84#[derive(Serialize, Default)]
85pub struct ClusterReview {
86    #[serde(rename = "Removable")]
87    removable: Vec<i32>,
88    #[serde(rename = "Not Removable")]
89    not_removable: Vec<i32>,
90    #[serde(rename = "Pending")]
91    pending: Vec<i32>,
92}
93
94impl ClusterReview {
95    fn from_diag(cluster_diag: &ClusterDiag) -> ClusterReview {
96        let mut review: ClusterReview = Default::default();
97        for osd in &cluster_diag.osd_diags {
98            if let Some(osd_status) = osd.osd_status.peek() {
99                match *osd_status {
100                    Status::NonSafe => review.not_removable.push(osd.osd_id),
101                    Status::Safe => review.removable.push(osd.osd_id),
102                    Status::Unknown => review.pending.push(osd.osd_id),
103                }
104            }
105        }
106        review
107    }
108}
109
110#[derive(Debug, Serialize)]
111pub struct ClusterDiag {
112    status: Status,
113    osd_diags: Vec<OsdDiag>,
114}
115
116impl ClusterDiag {
117    fn new() -> ClusterDiag {
118        ClusterDiag {
119            status: Status::Safe,
120            osd_diags: Vec::new(),
121        }
122    }
123
124    fn print(&mut self, format: Format) {
125        match format {
126            Format::Pretty => self.print_pretty(),
127            Format::Json => self.print_json(),
128        };
129    }
130
131    fn status(&mut self) -> Status {
132        for osd in &self.osd_diags {
133            if let Some(osd_status) = osd.osd_status.peek() {
134                // ClusterDiag.status defaults to safe and is only changed once
135                // an OSD that is unsafe to remove or pending is found
136                match *osd_status {
137                    // Short circuit if we find non safe status
138                    Status::NonSafe => return Status::NonSafe,
139                    Status::Unknown => return Status::Unknown,
140                    _ => (),
141                };
142            }
143        }
144        // Edge case where no osds are found
145        if self.osd_diags.is_empty() {
146            return Status::NonSafe;
147        }
148        self.status
149    }
150
151    fn print_pretty(&self) {
152        println!("Current OSD statuses:");
153        for osd in &self.osd_diags {
154            if let Some(osd_status) = osd.osd_status.peek() {
155                match *osd_status {
156                    Status::NonSafe => println!(
157                        "{} {}: {}",
158                        Colour::Red.paint("●"),
159                        osd.osd_id,
160                        osd_status
161                    ),
162                    Status::Safe => println!(
163                        "{} {}: {}",
164                        Colour::Green.paint("●"),
165                        osd.osd_id,
166                        osd_status
167                    ),
168                    Status::Unknown => println!(
169                        "{} {}: {}",
170                        Colour::Yellow.paint("●"),
171                        osd.osd_id,
172                        osd_status
173                    ),
174                }
175            }
176        }
177    }
178
179    fn print_json(&self) {
180        if let Ok(json) = serde_json::to_string(&ClusterReview::from_diag(&self)) {
181            println!("{}", json);
182        }
183    }
184}
185
186#[derive(Debug, Clone)]
187pub struct DiagMap {
188    pg_map: PGMap,
189    osd_map: OsdMap,
190}
191
192impl DiagMap {
193    pub fn new() -> Result<DiagMap, CSDError> {
194        Ok(DiagMap {
195            pg_map: PGMap::from_ceph("pg dump")?,
196            osd_map: OsdMap::from_ceph("osd dump")?,
197        })
198    }
199
200    // Quick check to see if `min_size +1` is satisfied
201    pub fn quick_diag(self, format: Format) -> bool {
202        let mut safe: bool = false;
203        for stat in self.pg_map.pg_stats {
204            for pool in self.osd_map.pools.iter() {
205                if (stat.up.len() as i32) >= (pool.min_size + 1) {
206                    safe = true;
207                }
208            }
209        }
210        match format {
211            Format::Pretty => {
212                if safe {
213                    println!("{} Safe to remove an OSD", Colour::Green.paint("●"));
214                } else {
215                    println!("{} Not safe to remove an OSD", Colour::Red.paint("●"));
216                };
217            }
218            Format::Json => println!("{{\"Safe to remove an OSD\":{}}}", safe),
219        };
220        safe
221    }
222
223    // Maps out PGs and their states to each OSD in their `acting` list.
224    // Returns a more general `Status` based on whether there is a removable
225    // OSD or not.
226    // `cluster_diag` holds an OSD's removability status. Using a binary heap we
227    // can always know which state it has that holds the highest precedent.
228    pub fn exhaustive_diag(self, format: Format) -> Status {
229        let mut pg_diags: Vec<PgDiag> = Vec::new();
230        let mut cluster_diag = ClusterDiag::new();
231
232        // Populate PG statuses. For each PG we push it's list of acting OSDs
233        // and the state of the PG
234        for pg_stat in self.pg_map.pg_stats {
235            for acting in pg_stat.acting {
236                pg_diags.push(PgDiag::new(
237                    acting,
238                    PgInfo::new(&pg_stat.state, pg_stat.pgid.clone()),
239                ));
240            }
241        }
242
243        // Generate OSD removability.
244        for pg in &pg_diags {
245            if cluster_diag
246                .osd_diags
247                .iter_mut()
248                .find(|ref osd| osd.osd_id == pg.osd_id)
249                .is_none()
250            {
251                cluster_diag.osd_diags.push(OsdDiag::new(pg.osd_id));
252            } else if let Some(osd) = cluster_diag
253                .osd_diags
254                .iter_mut()
255                .find(|ref osd| osd.osd_id == pg.osd_id)
256            {
257                match pg.pg_info.rm_safety {
258                    RmSafety::None => osd.osd_status.push(Status::NonSafe),
259                    RmSafety::Pending => osd.osd_status.push(Status::Unknown),
260                    RmSafety::Total => osd.osd_status.push(Status::Safe),
261                }
262            }
263        }
264
265        // Print the statuses of OSDs based on `format`
266        cluster_diag.print(format);
267        cluster_diag.status()
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use crate::from::FromFile;
275    use crate::osdmap::OsdMap;
276    use crate::pgmap::PGMap;
277
278    #[test]
279    fn quick_diag_jewel_safe() {
280        let status = DiagMap {
281            pg_map: PGMap::from_file("test/jewel/pg_dump_safe.json").unwrap(),
282            osd_map: OsdMap::from_file("test/jewel/osd_dump_safe.json").unwrap(),
283        }.quick_diag(Format::Pretty);
284
285        assert_eq!(status, true);
286    }
287
288    #[test]
289    fn exhaustive_diag_jewel_safe() {
290        let status: Status = DiagMap {
291            pg_map: PGMap::from_file("test/jewel/pg_dump_safe.json").unwrap(),
292            osd_map: OsdMap::from_file("test/jewel/osd_dump_safe.json").unwrap(),
293        }.exhaustive_diag(Format::Json);
294
295        assert_eq!(status, Status::Safe);
296    }
297
298    #[test]
299    fn exhaustive_diag_jewel_non_safe() {
300        let status: Status = DiagMap {
301            pg_map: PGMap::from_file("test/jewel/pg_dump_non_safe.json").unwrap(),
302            osd_map: OsdMap::from_file("test/jewel/osd_dump_non_safe.json").unwrap(),
303        }.exhaustive_diag(Format::Pretty);
304
305        assert_eq!(status, Status::NonSafe);
306    }
307
308    #[test]
309    fn exhaustive_diag_luminous_safe() {
310        let status: Status = DiagMap {
311            pg_map: PGMap::from_file("test/jewel/pg_dump_safe.json").unwrap(),
312            osd_map: OsdMap::from_file("test/jewel/osd_dump_safe.json").unwrap(),
313        }.exhaustive_diag(Format::Json);
314
315        assert_eq!(status, Status::Safe);
316    }
317
318    #[test]
319    fn exhaustive_diag_luminous_non_safe() {
320        let status: Status = DiagMap {
321            pg_map: PGMap::from_file("test/luminous/pg_dump_non_safe.json").unwrap(),
322            osd_map: OsdMap::from_file("test/luminous/osd_dump_non_safe.json").unwrap(),
323        }.exhaustive_diag(Format::Pretty);
324
325        assert_eq!(status, Status::NonSafe);
326    }
327
328    #[test]
329    fn exhaustive_diag_jewel_pending() {
330        let status: Status = DiagMap {
331            pg_map: PGMap::from_file("test/jewel/pg_dump_pending.json").unwrap(),
332            osd_map: OsdMap::from_file("test/jewel/osd_dump_pending.json").unwrap(),
333        }.exhaustive_diag(Format::Json);
334
335        assert_eq!(status, Status::Unknown);
336    }
337
338    #[test]
339    fn quick_diag_firefly_safe() {
340        let status = DiagMap {
341            pg_map: PGMap::from_file("test/firefly/pg_dump_safe.json").unwrap(),
342            osd_map: OsdMap::from_file("test/firefly/osd_dump_safe.json").unwrap(),
343        }.quick_diag(Format::Json);
344
345        assert_eq!(status, true);
346    }
347
348    #[test]
349    fn exhaustive_diag_firefly_safe() {
350        let status: Status = DiagMap {
351            pg_map: PGMap::from_file("test/firefly/pg_dump_safe.json").unwrap(),
352            osd_map: OsdMap::from_file("test/firefly/osd_dump_safe.json").unwrap(),
353        }.exhaustive_diag(Format::Pretty);
354
355        assert_eq!(status, Status::Safe);
356    }
357
358}