Skip to main content

dbg_cli/session_db/collectors/
mod.rs

1//! On-demand collectors.
2//!
3//! These populate the cross-track shared tables (`disassembly`,
4//! `source_snapshots`, `alloc_sites`) in response to a specific agent
5//! question — `dbg disasm <sym>`, `dbg source <sym>`, etc. They're
6//! lightweight: cheap enough to run mid-debug at a breakpoint.
7//!
8//! Wiring:
9//!   * A collector may have access to the live debugger session via
10//!     `LiveDebugger`. Collectors that can reuse the existing PTY do
11//!     so (lldb's `disassemble` runs cleanly inside an active session);
12//!     collectors that need a fresh process (.NET jitdasm, which must
13//!     set `DOTNET_JitDisasm` before the runtime starts) always spawn
14//!     one — never restarting the live debug session.
15//!   * Results are deduplicated on `(symbol_id, source, tier)` unless
16//!     `CollectCtx::refresh = true` is set.
17
18pub mod disasm;
19
20use std::path::Path;
21
22use anyhow::Result;
23use rusqlite::{OptionalExtension, params};
24
25use super::canonicalizer::{CanonicalSymbol, for_lang};
26use super::{SessionDb, TargetClass};
27
28/// What drove this collection — informational, stored on the row so
29/// agents can see whether disasm was requested at a stop point,
30/// drilled into from a hotspot, or asked for explicitly.
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub enum CollectTrigger {
33    BreakpointHit,
34    HotspotDrill,
35    Explicit,
36}
37
38impl CollectTrigger {
39    pub fn as_str(self) -> &'static str {
40        match self {
41            CollectTrigger::BreakpointHit => "breakpoint-hit",
42            CollectTrigger::HotspotDrill => "hotspot-drill",
43            CollectTrigger::Explicit => "explicit",
44        }
45    }
46}
47
48/// Context passed to every collector invocation.
49pub struct CollectCtx<'a> {
50    pub target: &'a str,
51    pub target_class: TargetClass,
52    /// The symbol to collect for. May be a raw (pre-canonical) or
53    /// canonical form; the collector canonicalizes internally.
54    pub symbol: &'a str,
55    /// If `true`, overwrite any existing row matching
56    /// `(symbol_id, source, tier)` instead of returning the cached one.
57    pub refresh: bool,
58    pub trigger: CollectTrigger,
59    pub cwd: &'a Path,
60}
61
62/// The output of a disasm collector, before it's written to the DB.
63/// Separated so tests can exercise the shell-out + parse path without
64/// an actual SessionDb attached.
65#[derive(Clone, Debug, PartialEq, Eq)]
66pub struct DisasmOutput {
67    pub source: &'static str,
68    pub tier: Option<String>,
69    pub code_bytes: Option<i64>,
70    pub asm_text: String,
71    pub asm_lines_json: Option<String>,
72}
73
74/// A minimal handle the daemon passes to collectors so they can issue
75/// commands on the live debugger session when useful. None when the
76/// collector is invoked from a profile-only context (no live PTY).
77pub trait LiveDebugger: Send + Sync {
78    /// Send a native-tool command to the active debugger and return
79    /// its (cleaned) output.
80    fn send(&self, cmd: &str) -> Result<String>;
81    /// Tool name, for logging / error messages.
82    fn tool_name(&self) -> &'static str;
83}
84
85/// The on-demand collector trait.
86pub trait OnDemandCollector: Send + Sync {
87    /// Stable identifier — also stored in `disassembly.source`.
88    fn kind(&self) -> &'static str;
89
90    /// Does this collector handle the given target class?
91    fn supports(&self, class: TargetClass) -> bool;
92
93    /// Collect disassembly (or equivalent) for `ctx.symbol`.
94    fn collect(
95        &self,
96        ctx: &CollectCtx<'_>,
97        live: Option<&dyn LiveDebugger>,
98    ) -> Result<DisasmOutput>;
99}
100
101/// Upsert a symbols row for the given canonical form and return its id.
102/// Called by `persist_disasm` before inserting the disassembly row so
103/// `symbol_id` joins are always valid.
104fn upsert_symbol(db: &SessionDb, sym: &CanonicalSymbol) -> Result<i64> {
105    let session_id = current_session_id(db)?;
106    // Try to find an existing row.
107    let existing: Option<i64> = db.conn()
108        .query_row(
109            "SELECT id FROM symbols WHERE session_id=?1 AND lang=?2 AND fqn=?3",
110            params![session_id, sym.lang, sym.fqn],
111            |r| r.get(0),
112        )
113        .optional()?;
114    if let Some(id) = existing {
115        return Ok(id);
116    }
117    db.conn().execute(
118        "INSERT INTO symbols (session_id, lang, fqn, file, line, demangled, raw, is_synthetic)
119         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
120        params![
121            session_id,
122            sym.lang,
123            sym.fqn,
124            sym.file,
125            sym.line,
126            sym.demangled,
127            sym.raw,
128            sym.is_synthetic as i64,
129        ],
130    )?;
131    Ok(db.conn().last_insert_rowid())
132}
133
134fn current_session_id(db: &SessionDb) -> Result<String> {
135    Ok(db.conn().query_row(
136        "SELECT id FROM sessions LIMIT 1",
137        [],
138        |r| r.get::<_, String>(0),
139    )?)
140}
141
142/// Write a `DisasmOutput` into the `disassembly` table, keyed to the
143/// canonicalized symbol. Respects `ctx.refresh` — returns the existing
144/// row's id when a match is found and refresh is off.
145///
146/// Returns the `disassembly.id` of the stored row.
147pub fn persist_disasm(
148    db: &SessionDb,
149    ctx: &CollectCtx<'_>,
150    output: &DisasmOutput,
151) -> Result<i64> {
152    let lang = lang_for_class(ctx.target_class);
153    let canon = match for_lang(lang) {
154        Some(c) => c.canonicalize(ctx.symbol),
155        None => CanonicalSymbol {
156            lang: "unknown",
157            fqn: ctx.symbol.to_string(),
158            file: None,
159            line: None,
160            demangled: None,
161            raw: ctx.symbol.to_string(),
162            is_synthetic: false,
163        },
164    };
165    let symbol_id = upsert_symbol(db, &canon)?;
166    let session_id = current_session_id(db)?;
167
168    if !ctx.refresh {
169        let existing: Option<i64> = db.conn()
170            .query_row(
171                "SELECT id FROM disassembly
172                 WHERE session_id=?1 AND symbol_id=?2 AND source=?3
173                       AND tier IS ?4",
174                params![session_id, symbol_id, output.source, output.tier],
175                |r| r.get(0),
176            )
177            .optional()?;
178        if let Some(id) = existing {
179            return Ok(id);
180        }
181    } else {
182        db.conn().execute(
183            "DELETE FROM disassembly
184             WHERE session_id=?1 AND symbol_id=?2 AND source=?3
185                   AND (tier IS ?4 OR tier=?4)",
186            params![session_id, symbol_id, output.source, output.tier],
187        )?;
188    }
189
190    db.conn().execute(
191        "INSERT INTO disassembly
192            (session_id, symbol_id, source, tier, code_bytes,
193             asm_text, asm_lines_json, collected_at, trigger)
194         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, datetime('now'), ?8)",
195        params![
196            session_id,
197            symbol_id,
198            output.source,
199            output.tier,
200            output.code_bytes,
201            output.asm_text,
202            output.asm_lines_json,
203            ctx.trigger.as_str(),
204        ],
205    )?;
206    Ok(db.conn().last_insert_rowid())
207}
208
209/// Pick the canonicalizer language for a target class. For
210/// `NativeCpu` we default to `cpp` — demangling covers C/C++/Rust
211/// equivalently for disasm purposes. Callers with language-specific
212/// knowledge may use the canonicalizer modules directly.
213fn lang_for_class(class: TargetClass) -> &'static str {
214    match class {
215        TargetClass::Gpu => "cuda",
216        TargetClass::NativeCpu => "cpp",
217        TargetClass::ManagedDotnet => "dotnet",
218        TargetClass::Jvm => "jvm",
219        TargetClass::Python => "python",
220        TargetClass::JsNode => "js",
221        TargetClass::Ruby => "ruby",
222        TargetClass::Php => "php",
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use crate::session_db::{CreateOptions, SessionKind};
230    use tempfile::TempDir;
231
232    fn test_db(tmp: &TempDir, class: TargetClass) -> SessionDb {
233        SessionDb::create(CreateOptions {
234            kind: SessionKind::Debug,
235            target: "./t",
236            target_class: class,
237            cwd: tmp.path(),
238            db_path: None,
239            label: Some("t".into()),
240            target_hash: Some("h".into()),
241        })
242        .unwrap()
243    }
244
245    fn ctx<'a>(tmp: &'a TempDir, symbol: &'a str, refresh: bool) -> CollectCtx<'a> {
246        CollectCtx {
247            target: "./t",
248            target_class: TargetClass::NativeCpu,
249            symbol,
250            refresh,
251            trigger: CollectTrigger::Explicit,
252            cwd: tmp.path(),
253        }
254    }
255
256    #[test]
257    fn persist_inserts_symbol_and_disasm() {
258        let tmp = TempDir::new().unwrap();
259        let db = test_db(&tmp, TargetClass::NativeCpu);
260        let out = DisasmOutput {
261            source: "lldb-disassemble",
262            tier: None,
263            code_bytes: Some(128),
264            asm_text: "mov rax, rbx\nret".into(),
265            asm_lines_json: None,
266        };
267        let id = persist_disasm(&db, &ctx(&tmp, "main", false), &out).unwrap();
268        assert!(id > 0);
269
270        let count: i64 = db.conn()
271            .query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get(0)).unwrap();
272        assert_eq!(count, 1);
273        let dcount: i64 = db.conn()
274            .query_row("SELECT COUNT(*) FROM disassembly", [], |r| r.get(0)).unwrap();
275        assert_eq!(dcount, 1);
276    }
277
278    #[test]
279    fn persist_dedups_without_refresh() {
280        let tmp = TempDir::new().unwrap();
281        let db = test_db(&tmp, TargetClass::NativeCpu);
282        let out = DisasmOutput {
283            source: "lldb-disassemble",
284            tier: None,
285            code_bytes: None,
286            asm_text: "a".into(),
287            asm_lines_json: None,
288        };
289        let a = persist_disasm(&db, &ctx(&tmp, "main", false), &out).unwrap();
290        let b = persist_disasm(&db, &ctx(&tmp, "main", false), &out).unwrap();
291        assert_eq!(a, b, "second call should return cached id");
292
293        let count: i64 = db.conn()
294            .query_row("SELECT COUNT(*) FROM disassembly", [], |r| r.get(0)).unwrap();
295        assert_eq!(count, 1);
296    }
297
298    #[test]
299    fn persist_refresh_replaces_row() {
300        let tmp = TempDir::new().unwrap();
301        let db = test_db(&tmp, TargetClass::NativeCpu);
302        let v1 = DisasmOutput {
303            source: "lldb-disassemble",
304            tier: None,
305            code_bytes: None,
306            asm_text: "old asm".into(),
307            asm_lines_json: None,
308        };
309        let v2 = DisasmOutput {
310            source: "lldb-disassemble",
311            tier: None,
312            code_bytes: None,
313            asm_text: "new asm".into(),
314            asm_lines_json: None,
315        };
316        let _ = persist_disasm(&db, &ctx(&tmp, "main", false), &v1).unwrap();
317        let _ = persist_disasm(&db, &ctx(&tmp, "main", true), &v2).unwrap();
318
319        let count: i64 = db.conn()
320            .query_row("SELECT COUNT(*) FROM disassembly", [], |r| r.get(0)).unwrap();
321        assert_eq!(count, 1);
322        let text: String = db.conn()
323            .query_row("SELECT asm_text FROM disassembly", [], |r| r.get(0))
324            .unwrap();
325        assert_eq!(text, "new asm");
326    }
327
328    #[test]
329    fn persist_distinguishes_by_tier() {
330        let tmp = TempDir::new().unwrap();
331        let db = test_db(&tmp, TargetClass::ManagedDotnet);
332        let t0 = DisasmOutput {
333            source: "jitdasm",
334            tier: Some("tier0".into()),
335            code_bytes: None,
336            asm_text: "tier-0".into(),
337            asm_lines_json: None,
338        };
339        let t1 = DisasmOutput {
340            source: "jitdasm",
341            tier: Some("tier1".into()),
342            code_bytes: None,
343            asm_text: "tier-1".into(),
344            asm_lines_json: None,
345        };
346        let c = CollectCtx {
347            target: "./t",
348            target_class: TargetClass::ManagedDotnet,
349            symbol: "MyApp.Foo",
350            refresh: false,
351            trigger: CollectTrigger::Explicit,
352            cwd: tmp.path(),
353        };
354        persist_disasm(&db, &c, &t0).unwrap();
355        persist_disasm(&db, &c, &t1).unwrap();
356
357        let count: i64 = db.conn()
358            .query_row("SELECT COUNT(*) FROM disassembly", [], |r| r.get(0)).unwrap();
359        assert_eq!(count, 2, "tier0 and tier1 are distinct rows");
360    }
361
362    #[test]
363    fn lang_mapping_covers_every_class() {
364        assert_eq!(lang_for_class(TargetClass::Gpu), "cuda");
365        assert_eq!(lang_for_class(TargetClass::NativeCpu), "cpp");
366        assert_eq!(lang_for_class(TargetClass::ManagedDotnet), "dotnet");
367        assert_eq!(lang_for_class(TargetClass::Jvm), "jvm");
368        assert_eq!(lang_for_class(TargetClass::Python), "python");
369        assert_eq!(lang_for_class(TargetClass::JsNode), "js");
370    }
371}