Skip to main content

flodl_cli/
diagnose.rs

1//! `fdl diagnose` -- system and GPU diagnostics.
2//!
3//! Thin formatting layer over `util::system` and `libtorch::detect`.
4
5use std::fmt::Write;
6use std::path::Path;
7
8use crate::context::Context;
9use crate::libtorch::detect;
10use crate::util::system;
11
12pub fn run(json: bool) {
13    let ctx = Context::resolve();
14    let root = &ctx.root;
15    if json {
16        print_json(root, &ctx);
17    } else {
18        print_report(root, &ctx);
19    }
20}
21
22// ---------------------------------------------------------------------------
23// Human-readable report
24// ---------------------------------------------------------------------------
25
26fn print_report(root: &Path, ctx: &Context) {
27    println!("floDl Diagnostics");
28    println!("=================");
29    println!();
30
31    // Context
32    println!("Context:       {}", ctx.label());
33    println!();
34
35    // System
36    println!("System");
37    let cpu = system::cpu_model().unwrap_or_else(|| "Unknown".into());
38    let threads = system::cpu_threads();
39    let ram_gb = system::ram_total_gb();
40    println!("  CPU:         {} ({} threads, {}GB RAM)", cpu, threads, ram_gb);
41    if let Some(os) = system::os_version() {
42        println!("  OS:          {}", os);
43    }
44    if system::is_inside_docker() {
45        println!("  Docker:      yes (running inside container)");
46    } else {
47        match system::docker_version() {
48            Some(v) => println!("  Docker:      {}", v),
49            None => println!("  Docker:      not found"),
50        }
51    }
52    println!();
53
54    // CUDA / GPU
55    println!("CUDA");
56    let devices = system::detect_gpus();
57    if !devices.is_empty() {
58        if let Some(driver) = system::nvidia_driver_version() {
59            println!("  Driver:      {}", driver);
60        }
61        println!("  Devices:     {}", devices.len());
62        for d in &devices {
63            let vram_gb = d.total_memory_mb / 1024;
64            println!(
65                "  [{}] {} -- {}, {}GB VRAM",
66                d.index,
67                d.name,
68                d.sm_version(),
69                vram_gb
70            );
71        }
72    } else {
73        println!("  No CUDA devices available");
74    }
75    println!();
76
77    // libtorch
78    println!("libtorch");
79    match detect::read_active(root) {
80        Some(info) => {
81            println!("  Active:      {}", info.path);
82            if let Some(v) = &info.torch_version {
83                println!("  Version:     {}", v);
84            }
85            if let Some(c) = &info.cuda_version {
86                println!("  CUDA:        {}", c);
87            }
88            if let Some(a) = &info.archs {
89                println!("  Archs:       {}", a);
90            }
91            if let Some(s) = &info.source {
92                println!("  Source:      {}", s);
93            }
94        }
95        None => {
96            println!("  No active variant (run `fdl setup`)");
97        }
98    }
99
100    let variants = detect::list_variants(root);
101    if !variants.is_empty() {
102        println!("  Variants:    {}", variants.join(", "));
103    }
104    println!();
105
106    // Compatibility
107    if !devices.is_empty() {
108        println!("Compatibility");
109        if let Some(info) = detect::read_active(root) {
110            let archs = info.archs.as_deref().unwrap_or("");
111            let mut all_ok = true;
112            for d in &devices {
113                if detect::arch_compatible(d, archs) {
114                    println!(
115                        "  GPU {} ({}, {}):  OK",
116                        d.index,
117                        d.short_name(),
118                        d.sm_version()
119                    );
120                } else {
121                    all_ok = false;
122                    let arch_str = format!("{}.{}", d.sm_major, d.sm_minor);
123                    println!(
124                        "  GPU {} ({}, {}):  MISSING -- arch {} not in [{}]",
125                        d.index,
126                        d.short_name(),
127                        d.sm_version(),
128                        arch_str,
129                        archs
130                    );
131                }
132            }
133            if all_ok {
134                println!();
135                println!("  All GPUs compatible with active libtorch.");
136            }
137        } else {
138            println!("  Cannot check -- no active libtorch variant.");
139        }
140        println!();
141    }
142}
143
144// ---------------------------------------------------------------------------
145// JSON output
146// ---------------------------------------------------------------------------
147
148fn print_json(root: &Path, ctx: &Context) {
149    let mut b = String::with_capacity(2048);
150    b.push('{');
151
152    // Context
153    let _ = write!(
154        b,
155        "\"context\":{{\"mode\":\"{}\",\"root\":\"{}\"}}",
156        if ctx.is_project { "project" } else { "global" },
157        system::escape_json(&ctx.root.display().to_string())
158    );
159
160    // System
161    let cpu = system::cpu_model().unwrap_or_else(|| "Unknown".into());
162    let _ = write!(
163        b,
164        ",\"system\":{{\"cpu\":\"{}\",\"threads\":{},\"ram_gb\":{}",
165        system::escape_json(&cpu),
166        system::cpu_threads(),
167        system::ram_total_gb()
168    );
169    if let Some(os) = system::os_version() {
170        let _ = write!(b, ",\"os\":\"{}\"", system::escape_json(&os));
171    }
172    if system::is_inside_docker() {
173        b.push_str(",\"docker\":\"container\"");
174    } else if let Some(docker) = system::docker_version() {
175        let _ = write!(b, ",\"docker\":\"{}\"", system::escape_json(&docker));
176    }
177    b.push('}');
178
179    // GPUs
180    let devices = system::detect_gpus();
181    let archs = detect::read_active(root)
182        .and_then(|info| info.archs)
183        .unwrap_or_default();
184    b.push_str(",\"gpus\":[");
185    for (i, d) in devices.iter().enumerate() {
186        if i > 0 {
187            b.push(',');
188        }
189        let compatible = detect::arch_compatible(d, &archs);
190        let _ = write!(
191            b,
192            "{{\"index\":{},\"name\":\"{}\",\"sm\":\"{}\",\"vram_bytes\":{},\"arch_compatible\":{}}}",
193            d.index,
194            system::escape_json(&d.name),
195            d.sm_version(),
196            d.vram_bytes(),
197            compatible
198        );
199    }
200    b.push(']');
201
202    // libtorch
203    b.push_str(",\"libtorch\":");
204    match detect::read_active(root) {
205        Some(info) => {
206            let _ = write!(b, "{{\"path\":\"{}\"", system::escape_json(&info.path));
207            if let Some(v) = &info.torch_version {
208                let _ = write!(b, ",\"version\":\"{}\"", system::escape_json(v));
209            }
210            if let Some(c) = &info.cuda_version {
211                let _ = write!(b, ",\"cuda\":\"{}\"", system::escape_json(c));
212            }
213            if let Some(a) = &info.archs {
214                let _ = write!(b, ",\"archs\":\"{}\"", system::escape_json(a));
215            }
216            if let Some(s) = &info.source {
217                let _ = write!(b, ",\"source\":\"{}\"", system::escape_json(s));
218            }
219            b.push('}');
220        }
221        None => b.push_str("null"),
222    }
223
224    b.push('}');
225    println!("{}", b);
226}