Skip to main content

webspec_index/
lib.rs

1//! WebSpec-Index: Query WHATWG/W3C web specifications
2//!
3//! This library provides parsing, indexing, and querying of web specifications.
4//! It's designed to be used via Python bindings (PyO3), but can also be used directly from Rust.
5
6pub mod analyze;
7pub mod db;
8pub mod fetch;
9pub mod format;
10pub mod ietf;
11pub mod lsp;
12pub mod model;
13pub mod parse;
14pub mod spec_list;
15pub mod spec_registry;
16
17use anyhow::{Context, Result};
18use regex::Regex;
19use rusqlite::Connection;
20use std::collections::{HashMap, HashSet, VecDeque};
21
22/// Parse a spec#anchor string or full URL into (spec, anchor) tuple
23/// Parse a spec anchor string into (spec_name, anchor, Option<base_url>).
24///
25/// The third element is `Some(base_url)` when the input was a URL on a
26/// whitelisted domain — callers can use it as a hint to fetch specs that
27/// aren't in the bundled list yet.
28pub fn parse_spec_anchor(input: &str) -> Result<(String, String, Option<String>)> {
29    let trimmed = input.trim();
30
31    // Try URL first
32    if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
33        let registry = spec_registry::SpecRegistry::new();
34        if let Some((spec, anchor, base_url)) = registry.resolve_url_with_base(trimmed) {
35            return Ok((spec, anchor, Some(base_url)));
36        }
37        anyhow::bail!(
38            "URL not recognized. Use a known SPEC#anchor, or a whitelisted URL domain with a #fragment: {trimmed}"
39        );
40    }
41
42    // Accept host-style URLs without an explicit scheme, e.g. html.spec.whatwg.org/#navigate
43    if trimmed.contains('#') && trimmed.contains('/') && !trimmed.contains("://") {
44        let maybe_url = format!("https://{}", trimmed.trim_start_matches('/'));
45        let registry = spec_registry::SpecRegistry::new();
46        if let Some((spec, anchor, base_url)) = registry.resolve_url_with_base(&maybe_url) {
47            return Ok((spec, anchor, Some(base_url)));
48        }
49    }
50
51    // Fall back to SPEC#anchor
52    let parts: Vec<&str> = trimmed.split('#').collect();
53    if parts.len() != 2 {
54        anyhow::bail!("Invalid format. Expected SPEC#anchor or a full spec URL");
55    }
56    Ok((parts[0].to_string(), parts[1].to_string(), None))
57}
58
59/// Return indexed/discovered spec base URLs
60pub fn spec_urls() -> Vec<model::SpecUrlEntry> {
61    let conn = match db::open_or_create_db() {
62        Ok(conn) => conn,
63        Err(_) => return vec![],
64    };
65
66    db::queries::list_specs(&conn)
67        .unwrap_or_default()
68        .into_iter()
69        .map(|(spec, base_url, _provider)| model::SpecUrlEntry { spec, base_url })
70        .collect()
71}
72
73fn resolve_spec_metadata(
74    conn: &Connection,
75    registry: &spec_registry::SpecRegistry,
76    spec_name: &str,
77    base_url_hint: Option<&str>,
78) -> Result<(String, String, String)> {
79    if let Some((name, base_url, provider)) = db::queries::get_spec_meta(conn, spec_name)? {
80        return Ok((name, base_url, provider));
81    }
82
83    if let Some(base_url) = spec_registry::auto_spec_base_url(spec_name) {
84        let provider = spec_registry::provider_for_base_url(&base_url).to_string();
85        let name = spec_name.to_string();
86        return Ok((name, base_url, provider));
87    }
88
89    if let Some((base_url, provider)) = registry.infer_base_url_from_spec_name(spec_name) {
90        // Canonicalize name from URL for stable refs.
91        if let Some((canonical_name, _)) = registry.resolve_url(&format!("{base_url}#x")) {
92            return Ok((canonical_name, base_url, provider));
93        }
94        return Ok((spec_name.to_string(), base_url, provider));
95    }
96
97    // Last resort: if caller provided a base URL (from a whitelisted-domain URL
98    // query), use it directly. This lets us fetch specs not in the bundled list.
99    if let Some(base_url) = base_url_hint {
100        let provider = spec_registry::provider_for_base_url(base_url).to_string();
101        return Ok((spec_name.to_string(), base_url.to_string(), provider));
102    }
103
104    anyhow::bail!("Unknown spec: {}", spec_name)
105}
106
107async fn ensure_indexed_for_spec_name(
108    conn: &Connection,
109    registry: &spec_registry::SpecRegistry,
110    spec_name: &str,
111    base_url_hint: Option<&str>,
112) -> Result<(i64, String)> {
113    let meta = resolve_spec_metadata(conn, registry, spec_name, base_url_hint);
114    let (canonical_name, base_url, provider) = match meta {
115        Ok(m) => m,
116        Err(_) if ietf::is_ietf_name(spec_name) => {
117            if let Some((name, url)) = ietf::discover_spec(spec_name).await? {
118                (name, url, "ietf".to_string())
119            } else {
120                anyhow::bail!("IETF document not found: {}", spec_name);
121            }
122        }
123        Err(_) => {
124            spec_list::fetch_and_seed(conn)?;
125            resolve_spec_metadata(conn, registry, spec_name, base_url_hint)?
126        }
127    };
128    let snapshot_id = fetch::ensure_indexed(conn, &canonical_name, &base_url, &provider).await?;
129    Ok((snapshot_id, canonical_name))
130}
131
132/// Query a specific section in a specification
133///
134/// Returns complete section information including navigation, children, and cross-references.
135///
136/// # Arguments
137/// * `spec_anchor` - Format: "SPEC#anchor" (e.g., "HTML#navigate")
138/// * `pr` - Optional PR options (number + force_update) to query against a WHATWG PR preview
139pub async fn query_section(
140    spec_anchor: &str,
141    pr: Option<&model::PrOpts>,
142) -> Result<model::QueryResult> {
143    let (spec_name, anchor, base_url_hint) = parse_spec_anchor(spec_anchor)?;
144    let conn = db::open_or_create_db()?;
145    let registry = spec_registry::SpecRegistry::new();
146
147    let (snapshot_id, spec_name, fallback_snapshot_id) = if let Some(pr_opts) = pr {
148        let (canonical_name, base_url, provider) =
149            resolve_spec_metadata(&conn, &registry, &spec_name, base_url_hint.as_deref())?;
150        let _ =
151            ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
152                .await?;
153        let (pr_snap, base_snap) = fetch::whatpr::ensure_pr_indexed(
154            &conn,
155            &canonical_name,
156            &base_url,
157            &provider,
158            pr_opts.pr_number,
159            pr_opts.force_update,
160        )
161        .await?;
162        (pr_snap, canonical_name, Some(base_snap))
163    } else {
164        let (snap_id, name) =
165            ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
166                .await?;
167        (snap_id, name, None)
168    };
169
170    let snapshot_sha: String = conn.query_row(
171        "SELECT sha FROM snapshots WHERE id = ?1",
172        [snapshot_id],
173        |row| row.get(0),
174    )?;
175
176    let section = db::queries::get_section(&conn, snapshot_id, &anchor)?
177        .or_else(|| {
178            fallback_snapshot_id.and_then(|fb_id| {
179                db::queries::get_section(&conn, fb_id, &anchor)
180                    .ok()
181                    .flatten()
182            })
183        })
184        .ok_or_else(|| anyhow::anyhow!("Section not found: {}#{}", spec_name, anchor))?;
185
186    let children = db::queries::get_children(&conn, snapshot_id, &anchor)?
187        .iter()
188        .map(|(child_anchor, title)| model::NavEntry {
189            anchor: child_anchor.clone(),
190            title: title.clone(),
191        })
192        .collect();
193
194    let navigation = model::Navigation {
195        parent: section.parent_anchor.as_ref().and_then(|p| {
196            db::queries::get_section(&conn, snapshot_id, p)
197                .ok()?
198                .map(|s| model::NavEntry {
199                    anchor: s.anchor,
200                    title: s.title,
201                })
202        }),
203        prev: section.prev_anchor.as_ref().and_then(|p| {
204            db::queries::get_section(&conn, snapshot_id, p)
205                .ok()?
206                .map(|s| model::NavEntry {
207                    anchor: s.anchor,
208                    title: s.title,
209                })
210        }),
211        next: section.next_anchor.as_ref().and_then(|n| {
212            db::queries::get_section(&conn, snapshot_id, n)
213                .ok()?
214                .map(|s| model::NavEntry {
215                    anchor: s.anchor,
216                    title: s.title,
217                })
218        }),
219        children,
220    };
221
222    let out_refs = db::queries::get_outgoing_refs(&conn, snapshot_id, &anchor)?;
223    let outgoing = out_refs
224        .iter()
225        .map(|(to_spec, to_anchor)| model::RefEntry {
226            spec: to_spec.clone(),
227            anchor: to_anchor.clone(),
228        })
229        .collect();
230
231    let in_refs = db::queries::get_incoming_refs(&conn, &spec_name, &anchor)?;
232    let incoming = in_refs
233        .iter()
234        .map(|(from_spec, from_anchor)| model::RefEntry {
235            spec: from_spec.clone(),
236            anchor: from_anchor.clone(),
237        })
238        .collect();
239
240    Ok(model::QueryResult {
241        spec: spec_name.clone(),
242        sha: snapshot_sha,
243        anchor: section.anchor,
244        title: section.title,
245        section_type: section.section_type.as_str().to_string(),
246        content: section.content_text,
247        navigation,
248        outgoing_refs: outgoing,
249        incoming_refs: incoming,
250    })
251}
252
253/// Check if a section exists in the specification
254///
255/// # Arguments
256/// * `spec_anchor` - Format: "SPEC#anchor"
257/// * `pr` - Optional PR options (number + force_update) to query against a WHATWG PR preview
258///
259/// # Returns
260/// `ExistsResult` with existence status and section type if found
261pub async fn check_exists(
262    spec_anchor: &str,
263    pr: Option<&model::PrOpts>,
264) -> Result<model::ExistsResult> {
265    let (spec_name, anchor, base_url_hint) = parse_spec_anchor(spec_anchor)?;
266    let conn = db::open_or_create_db()?;
267    let registry = spec_registry::SpecRegistry::new();
268
269    let (snapshot_id, spec_name, fallback_snapshot_id) = if let Some(pr_opts) = pr {
270        let (canonical_name, base_url, provider) =
271            resolve_spec_metadata(&conn, &registry, &spec_name, base_url_hint.as_deref())?;
272        let _ =
273            ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
274                .await?;
275        let (pr_snap, base_snap) = fetch::whatpr::ensure_pr_indexed(
276            &conn,
277            &canonical_name,
278            &base_url,
279            &provider,
280            pr_opts.pr_number,
281            pr_opts.force_update,
282        )
283        .await?;
284        (pr_snap, canonical_name, Some(base_snap))
285    } else {
286        let (snap_id, name) =
287            ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
288                .await?;
289        (snap_id, name, None)
290    };
291
292    // Check if section exists, with fallback to merge base
293    let section = db::queries::get_section(&conn, snapshot_id, &anchor)?.or_else(|| {
294        fallback_snapshot_id.and_then(|fb_id| {
295            db::queries::get_section(&conn, fb_id, &anchor)
296                .ok()
297                .flatten()
298        })
299    });
300    let exists = section.is_some();
301    let section_type = section
302        .as_ref()
303        .map(|s| s.section_type.as_str().to_string());
304
305    Ok(model::ExistsResult {
306        exists,
307        spec: spec_name,
308        anchor,
309        section_type,
310    })
311}
312
313type AnchorRow = (String, String, Option<String>, String);
314
315fn find_anchors_sql(
316    conn: &Connection,
317    sql_pattern: &str,
318    spec: Option<&str>,
319    snapshot_ids: Option<(i64, i64)>,
320    limit: u32,
321) -> Result<Vec<AnchorRow>> {
322    if let Some((pr_snap, base_snap)) = snapshot_ids {
323        let mut stmt = conn.prepare(
324            "SELECT s.anchor, sp.name, s.title, s.section_type FROM sections s
325             JOIN snapshots sn ON s.snapshot_id = sn.id
326             JOIN specs sp ON sn.spec_id = sp.id
327             WHERE s.anchor LIKE ?1 AND sn.id IN (?2, ?3)
328             LIMIT ?4",
329        )?;
330        let rows = stmt
331            .query_map((sql_pattern, pr_snap, base_snap, limit), |row| {
332                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
333            })?
334            .collect::<Result<Vec<_>, _>>()?;
335        Ok(rows)
336    } else if let Some(spec_name) = spec {
337        let mut stmt = conn.prepare(
338            "SELECT s.anchor, sp.name, s.title, s.section_type FROM sections s
339             JOIN snapshots sn ON s.snapshot_id = sn.id
340             JOIN specs sp ON sn.spec_id = sp.id
341             WHERE s.anchor LIKE ?1 AND sp.name = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
342             LIMIT ?3",
343        )?;
344        let rows = stmt
345            .query_map((sql_pattern, spec_name, limit), |row| {
346                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
347            })?
348            .collect::<Result<Vec<_>, _>>()?;
349        Ok(rows)
350    } else {
351        let mut stmt = conn.prepare(
352            "SELECT s.anchor, sp.name, s.title, s.section_type FROM sections s
353             JOIN snapshots sn ON s.snapshot_id = sn.id
354             JOIN specs sp ON sn.spec_id = sp.id
355             WHERE s.anchor LIKE ?1 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
356             LIMIT ?2",
357        )?;
358        let rows = stmt
359            .query_map((sql_pattern, limit), |row| {
360                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
361            })?
362            .collect::<Result<Vec<_>, _>>()?;
363        Ok(rows)
364    }
365}
366
367fn search_sections_pr(
368    conn: &Connection,
369    query: &str,
370    pr_snap: i64,
371    base_snap: i64,
372    limit: u32,
373) -> Result<Vec<model::SearchEntry>> {
374    let mut stmt = conn.prepare(
375        "SELECT s.anchor, sp.name, s.title, s.section_type,
376                snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
377         FROM sections_fts
378         JOIN sections s ON sections_fts.rowid = s.id
379         JOIN snapshots sn ON s.snapshot_id = sn.id
380         JOIN specs sp ON sn.spec_id = sp.id
381         WHERE sections_fts MATCH ?1 AND sn.id IN (?2, ?3)
382         LIMIT ?4",
383    )?;
384    let mut seen = HashSet::new();
385    let rows = stmt
386        .query_map((query, pr_snap, base_snap, limit), |row| {
387            Ok(model::SearchEntry {
388                anchor: row.get(0)?,
389                spec: row.get(1)?,
390                title: row.get(2)?,
391                section_type: row.get(3)?,
392                snippet: row.get::<_, Option<String>>(4)?.unwrap_or_default(),
393            })
394        })?
395        .collect::<Result<Vec<_>, _>>()?;
396    // Deduplicate: PR snapshot sections override merge base
397    Ok(rows
398        .into_iter()
399        .filter(|e| seen.insert(e.anchor.clone()))
400        .collect())
401}
402
403/// Find anchors matching a glob pattern
404///
405/// # Arguments
406/// * `pattern` - Glob pattern (e.g., "*-tree", "concept-*")
407/// * `spec` - Optional spec name to limit search
408/// * `limit` - Maximum number of results
409///
410/// # Returns
411/// `AnchorsResult` with matching anchors
412pub async fn find_anchors(
413    pattern: &str,
414    spec: Option<&str>,
415    limit: u32,
416    pr: Option<&model::PrOpts>,
417) -> Result<model::AnchorsResult> {
418    let conn = db::open_or_create_db()?;
419    let sql_pattern = pattern.replace('*', "%");
420
421    let snapshot_ids = if let Some(pr_opts) = pr {
422        let spec_name = spec.context("--pr requires --spec for anchor search")?;
423        let registry = spec_registry::SpecRegistry::new();
424        let (canonical_name, base_url, provider) =
425            resolve_spec_metadata(&conn, &registry, spec_name, None)?;
426        let _ = ensure_indexed_for_spec_name(&conn, &registry, spec_name, None).await?;
427        let (pr_snap, base_snap) = fetch::whatpr::ensure_pr_indexed(
428            &conn,
429            &canonical_name,
430            &base_url,
431            &provider,
432            pr_opts.pr_number,
433            pr_opts.force_update,
434        )
435        .await?;
436        Some((pr_snap, base_snap))
437    } else {
438        None
439    };
440
441    let results: Vec<(String, String, Option<String>, String)> =
442        if let Some((pr_snap, base_snap)) = snapshot_ids {
443            find_anchors_sql(&conn, &sql_pattern, None, Some((pr_snap, base_snap)), limit)?
444        } else {
445            find_anchors_sql(&conn, &sql_pattern, spec, None, limit)?
446        };
447
448    // Deduplicate by anchor (PR snapshot may overlap with merge base)
449    let mut seen = HashSet::new();
450    let entries: Vec<model::AnchorEntry> = results
451        .into_iter()
452        .filter(|(anchor, _, _, _)| seen.insert(anchor.clone()))
453        .map(
454            |(anchor, spec_name, title, section_type)| model::AnchorEntry {
455                spec: spec_name,
456                anchor,
457                title,
458                section_type,
459            },
460        )
461        .collect();
462
463    Ok(model::AnchorsResult {
464        pattern: pattern.to_string(),
465        results: entries,
466    })
467}
468
469/// Full-text search across specifications
470///
471/// # Arguments
472/// * `query` - Search query string
473/// * `spec` - Optional spec name to limit search
474/// * `limit` - Maximum number of results
475///
476/// # Returns
477/// `SearchResult` with matching sections and snippets
478pub async fn search_sections(
479    query: &str,
480    spec: Option<&str>,
481    limit: u32,
482    pr: Option<&model::PrOpts>,
483) -> Result<model::SearchResult> {
484    let conn = db::open_or_create_db()?;
485
486    let snapshot_ids = if let Some(pr_opts) = pr {
487        let spec_name = spec.context("--pr requires --spec for search")?;
488        let registry = spec_registry::SpecRegistry::new();
489        let (canonical_name, base_url, provider) =
490            resolve_spec_metadata(&conn, &registry, spec_name, None)?;
491        let _ = ensure_indexed_for_spec_name(&conn, &registry, spec_name, None).await?;
492        let (pr_snap, base_snap) = fetch::whatpr::ensure_pr_indexed(
493            &conn,
494            &canonical_name,
495            &base_url,
496            &provider,
497            pr_opts.pr_number,
498            pr_opts.force_update,
499        )
500        .await?;
501        Some((pr_snap, base_snap))
502    } else {
503        None
504    };
505
506    let entries = if let Some((pr_snap, base_snap)) = snapshot_ids {
507        search_sections_pr(&conn, query, pr_snap, base_snap, limit)?
508    } else {
509        match search_sections_fts(&conn, query, spec, limit) {
510            Ok(entries) => entries,
511            Err(err) if is_fts_syntax_error(&err) => {
512                if let Some(sanitized) = sanitize_for_fts(query) {
513                    search_sections_fts(&conn, &sanitized, spec, limit)?
514                } else {
515                    vec![]
516                }
517            }
518            Err(err) => return Err(err.into()),
519        }
520    };
521
522    Ok(model::SearchResult {
523        query: query.to_string(),
524        results: entries,
525    })
526}
527
528fn search_sections_fts(
529    conn: &Connection,
530    query: &str,
531    spec: Option<&str>,
532    limit: u32,
533) -> rusqlite::Result<Vec<model::SearchEntry>> {
534    let sql = if spec.is_some() {
535        "SELECT s.anchor, sp.name, s.title, s.section_type, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
536         FROM sections_fts
537         JOIN sections s ON sections_fts.rowid = s.id
538         JOIN snapshots sn ON s.snapshot_id = sn.id
539         JOIN specs sp ON sn.spec_id = sp.id
540         WHERE sections_fts MATCH ?1 AND sp.name = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'          LIMIT ?3"
541    } else {
542        "SELECT s.anchor, sp.name, s.title, s.section_type, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
543         FROM sections_fts
544         JOIN sections s ON sections_fts.rowid = s.id
545         JOIN snapshots sn ON s.snapshot_id = sn.id
546         JOIN specs sp ON sn.spec_id = sp.id
547         WHERE sections_fts MATCH ?1 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'          LIMIT ?2"
548    };
549
550    let mut stmt = conn.prepare(sql)?;
551    let map_row = |row: &rusqlite::Row| -> rusqlite::Result<model::SearchEntry> {
552        Ok(model::SearchEntry {
553            anchor: row.get(0)?,
554            spec: row.get(1)?,
555            title: row.get(2)?,
556            section_type: row.get(3)?,
557            snippet: row.get::<_, Option<String>>(4)?.unwrap_or_default(),
558        })
559    };
560    if let Some(spec_name) = spec {
561        stmt.query_map((query, spec_name, limit), map_row)?
562            .collect::<rusqlite::Result<Vec<_>>>()
563    } else {
564        stmt.query_map((query, limit), map_row)?
565            .collect::<rusqlite::Result<Vec<_>>>()
566    }
567}
568
569fn is_fts_syntax_error(err: &rusqlite::Error) -> bool {
570    match err {
571        rusqlite::Error::SqliteFailure(_, Some(message)) => message.contains("fts5: syntax error"),
572        _ => false,
573    }
574}
575
576fn sanitize_for_fts(query: &str) -> Option<String> {
577    let terms = query
578        .split(|c: char| !c.is_alphanumeric())
579        .filter(|token| !token.is_empty())
580        .collect::<Vec<_>>();
581    if terms.is_empty() {
582        None
583    } else {
584        Some(terms.join(" "))
585    }
586}
587
588/// List all headings in a specification
589///
590/// # Arguments
591/// * `spec` - Spec name
592/// * `pr` - Optional PR options (number + force_update) to query against a WHATWG PR preview
593///
594/// # Returns
595/// Vector of `ListEntry` with heading hierarchy
596pub async fn list_headings(
597    spec: &str,
598    pr: Option<&model::PrOpts>,
599) -> Result<Vec<model::ListEntry>> {
600    let conn = db::open_or_create_db()?;
601    let registry = spec_registry::SpecRegistry::new();
602
603    let snapshot_id = if let Some(pr_opts) = pr {
604        let (canonical_name, base_url, provider) =
605            resolve_spec_metadata(&conn, &registry, spec, None)?;
606        let _ = ensure_indexed_for_spec_name(&conn, &registry, spec, None).await?;
607        let (pr_snap, _base_snap) = fetch::whatpr::ensure_pr_indexed(
608            &conn,
609            &canonical_name,
610            &base_url,
611            &provider,
612            pr_opts.pr_number,
613            pr_opts.force_update,
614        )
615        .await?;
616        pr_snap
617    } else {
618        let (snap_id, _name) = ensure_indexed_for_spec_name(&conn, &registry, spec, None).await?;
619        snap_id
620    };
621
622    // Get all headings
623    let headings = db::queries::list_headings(&conn, snapshot_id)?;
624
625    // Convert to ListEntry format
626    let entries: Vec<model::ListEntry> = headings
627        .iter()
628        .map(|h| model::ListEntry {
629            anchor: h.anchor.clone(),
630            title: h.title.clone(),
631            depth: h.depth.unwrap_or(0),
632            parent: h.parent_anchor.clone(),
633        })
634        .collect();
635
636    Ok(entries)
637}
638
639/// Get cross-references for a section
640///
641/// # Arguments
642
643#[derive(Clone, Copy, PartialEq, Eq)]
644enum RefDirection {
645    Incoming,
646    Outgoing,
647    Both,
648}
649
650fn parse_ref_direction(direction: &str) -> Result<RefDirection> {
651    match direction.to_ascii_lowercase().as_str() {
652        "incoming" => Ok(RefDirection::Incoming),
653        "outgoing" => Ok(RefDirection::Outgoing),
654        "both" => Ok(RefDirection::Both),
655        _ => anyhow::bail!(
656            "Invalid direction: {} (expected incoming|outgoing|both)",
657            direction
658        ),
659    }
660}
661
662fn node_id(spec: &str, anchor: &str) -> String {
663    format!("{spec}#{anchor}")
664}
665
666#[derive(Clone)]
667struct GraphFilters {
668    include: Vec<String>,
669    exclude: Vec<String>,
670    same_spec_only: bool,
671}
672
673fn compile_pattern(pattern: &str) -> Result<Regex> {
674    if let Some(rest) = pattern.strip_prefix("re:") {
675        return Regex::new(rest)
676            .map_err(|e| anyhow::anyhow!("Invalid regex pattern '{}': {}", pattern, e));
677    }
678
679    let mut re = String::from("^");
680    for ch in pattern.chars() {
681        match ch {
682            '*' => re.push_str(".*"),
683            '?' => re.push('.'),
684            _ => re.push_str(&regex::escape(&ch.to_string())),
685        }
686    }
687    re.push('$');
688    Regex::new(&re).map_err(|e| anyhow::anyhow!("Invalid wildcard pattern '{}': {}", pattern, e))
689}
690
691struct CompiledGraphFilters {
692    include: Vec<Regex>,
693    exclude: Vec<Regex>,
694    same_spec_only: bool,
695}
696
697impl CompiledGraphFilters {
698    fn from_filters(filters: &GraphFilters) -> Result<Self> {
699        let include = filters
700            .include
701            .iter()
702            .map(|p| compile_pattern(p))
703            .collect::<Result<Vec<_>>>()?;
704        let exclude = filters
705            .exclude
706            .iter()
707            .map(|p| compile_pattern(p))
708            .collect::<Result<Vec<_>>>()?;
709        Ok(Self {
710            include,
711            exclude,
712            same_spec_only: filters.same_spec_only,
713        })
714    }
715
716    fn matches_node(&self, node_id: &str, root_id: &str) -> bool {
717        if node_id == root_id {
718            return true;
719        }
720
721        if !self.include.is_empty() && !self.include.iter().any(|re| re.is_match(node_id)) {
722            return false;
723        }
724
725        if self.exclude.iter().any(|re| re.is_match(node_id)) {
726            return false;
727        }
728
729        true
730    }
731}
732
733fn section_meta(
734    conn: &Connection,
735    spec: &str,
736    anchor: &str,
737) -> Result<Option<(Option<String>, Option<String>)>> {
738    let mut stmt = conn.prepare(
739        "SELECT s.title, s.section_type FROM sections s
740         JOIN snapshots sn ON s.snapshot_id = sn.id
741         JOIN specs sp ON sn.spec_id = sp.id
742         WHERE sp.name = ?1 AND s.anchor = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
743         LIMIT 1",
744    )?;
745
746    let mut rows = stmt.query((spec, anchor))?;
747    if let Some(row) = rows.next()? {
748        let title: Option<String> = row.get(0)?;
749        let section_type: Option<String> = row.get(1)?;
750        Ok(Some((title, section_type)))
751    } else {
752        Ok(None)
753    }
754}
755
756fn outgoing_refs_for_node(
757    conn: &Connection,
758    spec: &str,
759    anchor: &str,
760) -> Result<Vec<(String, String)>> {
761    let Some(snapshot_id) = db::queries::get_snapshot(conn, spec)? else {
762        return Ok(vec![]);
763    };
764    db::queries::get_outgoing_refs(conn, snapshot_id, anchor)
765}
766
767fn build_graph_from_conn(
768    conn: &Connection,
769    root_spec: &str,
770    root_anchor: &str,
771    direction: &str,
772    max_depth: usize,
773    max_nodes: usize,
774    filters: &GraphFilters,
775) -> Result<model::GraphResult> {
776    if max_nodes == 0 {
777        anyhow::bail!("max_nodes must be greater than 0");
778    }
779
780    let dir = parse_ref_direction(direction)?;
781    let compiled_filters = CompiledGraphFilters::from_filters(filters)?;
782
783    let mut visited: HashSet<(String, String)> = HashSet::new();
784    let mut queue: VecDeque<(String, String, usize)> = VecDeque::new();
785    let mut edges: HashSet<(String, String)> = HashSet::new();
786    let mut truncated = false;
787
788    visited.insert((root_spec.to_string(), root_anchor.to_string()));
789    queue.push_back((root_spec.to_string(), root_anchor.to_string(), 0));
790
791    while let Some((spec, anchor, depth)) = queue.pop_front() {
792        if depth >= max_depth {
793            continue;
794        }
795
796        if dir == RefDirection::Outgoing || dir == RefDirection::Both {
797            for (to_spec, to_anchor) in outgoing_refs_for_node(conn, &spec, &anchor)? {
798                if compiled_filters.same_spec_only && (to_spec != root_spec || spec != root_spec) {
799                    continue;
800                }
801                let from_id = node_id(&spec, &anchor);
802                let to_id = node_id(&to_spec, &to_anchor);
803                if from_id == to_id {
804                    continue;
805                }
806                edges.insert((from_id, to_id));
807
808                if visited.insert((to_spec.clone(), to_anchor.clone())) {
809                    if visited.len() > max_nodes {
810                        visited.remove(&(to_spec, to_anchor));
811                        truncated = true;
812                    } else {
813                        queue.push_back((to_spec, to_anchor, depth + 1));
814                    }
815                }
816            }
817        }
818
819        if dir == RefDirection::Incoming || dir == RefDirection::Both {
820            for (from_spec, from_anchor) in db::queries::get_incoming_refs(conn, &spec, &anchor)? {
821                if compiled_filters.same_spec_only && (from_spec != root_spec || spec != root_spec)
822                {
823                    continue;
824                }
825                let from_id = node_id(&from_spec, &from_anchor);
826                let to_id = node_id(&spec, &anchor);
827                if from_id == to_id {
828                    continue;
829                }
830                edges.insert((from_id, to_id));
831
832                if visited.insert((from_spec.clone(), from_anchor.clone())) {
833                    if visited.len() > max_nodes {
834                        visited.remove(&(from_spec, from_anchor));
835                        truncated = true;
836                    } else {
837                        queue.push_back((from_spec, from_anchor, depth + 1));
838                    }
839                }
840            }
841        }
842    }
843
844    let mut nodes: Vec<model::GraphNode> = visited
845        .into_iter()
846        .map(|(spec, anchor)| {
847            let id = node_id(&spec, &anchor);
848            let (title, section_type) = section_meta(conn, &spec, &anchor)?.unwrap_or((None, None));
849            Ok(model::GraphNode {
850                id,
851                spec,
852                anchor,
853                title,
854                section_type,
855                filter_role: None,
856            })
857        })
858        .collect::<Result<Vec<_>>>()?;
859
860    let mut edge_list: Vec<model::GraphEdge> = edges
861        .into_iter()
862        .map(|(from, to)| model::GraphEdge {
863            from,
864            to,
865            kind: "reference".to_string(),
866        })
867        .collect();
868
869    let root_id = node_id(root_spec, root_anchor);
870    let filter_active =
871        !compiled_filters.include.is_empty() || !compiled_filters.exclude.is_empty();
872    let all_ids: HashSet<String> = nodes.iter().map(|n| n.id.clone()).collect();
873
874    let mut matched_ids: HashSet<String> = if filter_active {
875        nodes
876            .iter()
877            .filter_map(|n| {
878                if compiled_filters.matches_node(&n.id, &root_id) {
879                    Some(n.id.clone())
880                } else {
881                    None
882                }
883            })
884            .collect()
885    } else {
886        all_ids.clone()
887    };
888    matched_ids.insert(root_id.clone());
889
890    // Build undirected adjacency from all currently known edges.
891    let mut adjacency: HashMap<String, Vec<String>> = HashMap::new();
892    for edge in &edge_list {
893        adjacency
894            .entry(edge.from.clone())
895            .or_default()
896            .push(edge.to.clone());
897        adjacency
898            .entry(edge.to.clone())
899            .or_default()
900            .push(edge.from.clone());
901    }
902
903    // BFS tree for shortest paths from root (undirected).
904    let mut parent: HashMap<String, String> = HashMap::new();
905    let mut seen: HashSet<String> = HashSet::new();
906    let mut bfs: VecDeque<String> = VecDeque::new();
907    seen.insert(root_id.clone());
908    bfs.push_back(root_id.clone());
909    while let Some(current) = bfs.pop_front() {
910        if let Some(neighbors) = adjacency.get(&current) {
911            for neighbor in neighbors {
912                if seen.insert(neighbor.clone()) {
913                    parent.insert(neighbor.clone(), current.clone());
914                    bfs.push_back(neighbor.clone());
915                }
916            }
917        }
918    }
919
920    // Keep matched nodes and include bridge nodes on shortest root paths.
921    let mut kept_ids: HashSet<String> = HashSet::new();
922    kept_ids.insert(root_id.clone());
923    for id in &matched_ids {
924        if !seen.contains(id) {
925            continue;
926        }
927        let mut cur = id.clone();
928        kept_ids.insert(cur.clone());
929        while let Some(p) = parent.get(&cur) {
930            kept_ids.insert(p.clone());
931            if *p == root_id {
932                break;
933            }
934            cur = p.clone();
935        }
936    }
937
938    nodes.retain(|n| kept_ids.contains(&n.id));
939    edge_list.retain(|e| kept_ids.contains(&e.from) && kept_ids.contains(&e.to));
940
941    // Final prune for accidental disconnected remnants in kept graph.
942    let mut kept_adj: HashMap<String, Vec<String>> = HashMap::new();
943    for edge in &edge_list {
944        kept_adj
945            .entry(edge.from.clone())
946            .or_default()
947            .push(edge.to.clone());
948        kept_adj
949            .entry(edge.to.clone())
950            .or_default()
951            .push(edge.from.clone());
952    }
953    let mut connected: HashSet<String> = HashSet::new();
954    let mut connected_q: VecDeque<String> = VecDeque::new();
955    connected.insert(root_id.clone());
956    connected_q.push_back(root_id.clone());
957    while let Some(current) = connected_q.pop_front() {
958        if let Some(neighbors) = kept_adj.get(&current) {
959            for neighbor in neighbors {
960                if connected.insert(neighbor.clone()) {
961                    connected_q.push_back(neighbor.clone());
962                }
963            }
964        }
965    }
966
967    nodes.retain(|n| connected.contains(&n.id));
968    edge_list.retain(|e| connected.contains(&e.from) && connected.contains(&e.to));
969
970    if filter_active {
971        for node in &mut nodes {
972            if node.id == root_id {
973                node.filter_role = Some("root".to_string());
974            } else if matched_ids.contains(&node.id) {
975                node.filter_role = Some("matched".to_string());
976            } else {
977                node.filter_role = Some("bridge".to_string());
978            }
979        }
980    }
981
982    nodes.sort_by(|a, b| a.id.cmp(&b.id));
983
984    edge_list.sort_by(|a, b| a.from.cmp(&b.from).then(a.to.cmp(&b.to)));
985
986    Ok(model::GraphResult {
987        root: model::GraphRoot {
988            spec: root_spec.to_string(),
989            anchor: root_anchor.to_string(),
990        },
991        direction: direction.to_ascii_lowercase(),
992        max_depth,
993        max_nodes,
994        truncated,
995        nodes,
996        edges: edge_list,
997    })
998}
999
1000#[derive(Clone)]
1001struct Candidate {
1002    spec: String,
1003    anchor: String,
1004    title: Option<String>,
1005    section_type: String,
1006    score: i32,
1007}
1008
1009fn resolve_find_references_candidates(
1010    conn: &Connection,
1011    query: &str,
1012    limit: u32,
1013) -> Result<Vec<Candidate>> {
1014    let q = query.trim().to_ascii_lowercase();
1015    if q.is_empty() {
1016        return Ok(vec![]);
1017    }
1018
1019    let mut stmt = conn.prepare(
1020        "SELECT sp.name, s.anchor, s.title, s.section_type
1021         FROM sections s
1022         JOIN snapshots sn ON s.snapshot_id = sn.id
1023         JOIN specs sp ON sn.spec_id = sp.id
1024         WHERE (LOWER(s.anchor) LIKE ?1 OR LOWER(COALESCE(s.title, '')) LIKE ?2) AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
1025         LIMIT 1000",
1026    )?;
1027
1028    let (anchor_like, title_like) = if let Some((_owner, member)) = q.split_once('.') {
1029        (format!("%{}%", member), format!("%{}%", member))
1030    } else {
1031        (format!("%{}%", q), format!("%{}%", q))
1032    };
1033
1034    let mut rows = stmt.query((anchor_like, title_like))?;
1035    let mut candidates = Vec::new();
1036
1037    if let Some((owner, member)) = q.split_once('.') {
1038        let owner = owner.trim();
1039        let member = member.trim();
1040
1041        while let Some(row) = rows.next()? {
1042            let spec: String = row.get(0)?;
1043            let anchor: String = row.get(1)?;
1044            let title: Option<String> = row.get(2)?;
1045            let section_type: String = row.get(3)?;
1046            let anchor_l = anchor.to_ascii_lowercase();
1047            let title_l = title.as_deref().unwrap_or("").to_ascii_lowercase();
1048
1049            let mut score = 0;
1050            if anchor_l.contains(member) {
1051                score += 40;
1052            }
1053            if anchor_l.contains(owner) {
1054                score += 35;
1055            }
1056            if anchor_l.contains(&format!("-{}-{}", owner, member))
1057                || anchor_l.contains(&format!("{}-{}", owner, member))
1058            {
1059                score += 50;
1060            }
1061            if anchor_l.ends_with(&format!("-{}", member)) {
1062                score += 10;
1063            }
1064            if title_l == member {
1065                score += 50;
1066            } else if title_l.contains(member) {
1067                score += 20;
1068            }
1069            if section_type == "idl" {
1070                score += 10;
1071            } else if section_type == "definition" {
1072                score += 5;
1073            }
1074
1075            // Deprioritize candidates that don't mention owner at all.
1076            if !anchor_l.contains(owner) {
1077                score -= 20;
1078            }
1079
1080            if score > 0 {
1081                candidates.push(Candidate {
1082                    spec,
1083                    anchor,
1084                    title,
1085                    section_type,
1086                    score,
1087                });
1088            }
1089        }
1090    } else {
1091        while let Some(row) = rows.next()? {
1092            let spec: String = row.get(0)?;
1093            let anchor: String = row.get(1)?;
1094            let title: Option<String> = row.get(2)?;
1095            let section_type: String = row.get(3)?;
1096            let anchor_l = anchor.to_ascii_lowercase();
1097            let title_l = title.as_deref().unwrap_or("").to_ascii_lowercase();
1098
1099            let mut score = 0;
1100            if anchor_l == q {
1101                score += 100;
1102            }
1103            if title_l == q {
1104                score += 90;
1105            }
1106            if anchor_l.contains(&q) {
1107                score += 40;
1108            }
1109            if title_l.contains(&q) {
1110                score += 30;
1111            }
1112            if section_type == "idl" || section_type == "definition" {
1113                score += 5;
1114            }
1115
1116            if score > 0 {
1117                candidates.push(Candidate {
1118                    spec,
1119                    anchor,
1120                    title,
1121                    section_type,
1122                    score,
1123                });
1124            }
1125        }
1126    }
1127
1128    candidates.sort_by(|a, b| {
1129        b.score
1130            .cmp(&a.score)
1131            .then(a.spec.cmp(&b.spec))
1132            .then(a.anchor.cmp(&b.anchor))
1133    });
1134    let mut seen: HashSet<(String, String)> = HashSet::new();
1135    candidates.retain(|c| seen.insert((c.spec.clone(), c.anchor.clone())));
1136    candidates.truncate(limit as usize);
1137
1138    Ok(candidates)
1139}
1140
1141fn find_references_from_conn(
1142    conn: &Connection,
1143    exact_target: Option<(String, String)>,
1144    query: &str,
1145    direction: &str,
1146    limit: u32,
1147) -> Result<model::RefsResult> {
1148    let dir = parse_ref_direction(direction)?;
1149    let mut matches = Vec::new();
1150    let exact_mode = exact_target.is_some();
1151
1152    let candidates = if let Some((spec, anchor)) = exact_target {
1153        let (title, section_type) =
1154            section_meta(conn, &spec, &anchor)?.unwrap_or((None, Some("unknown".to_string())));
1155        vec![Candidate {
1156            spec,
1157            anchor,
1158            title,
1159            section_type: section_type.unwrap_or_else(|| "unknown".to_string()),
1160            score: i32::MAX,
1161        }]
1162    } else {
1163        resolve_find_references_candidates(conn, query, limit)?
1164    };
1165
1166    for candidate in candidates {
1167        let outgoing = if dir == RefDirection::Outgoing || dir == RefDirection::Both {
1168            Some(
1169                outgoing_refs_for_node(conn, &candidate.spec, &candidate.anchor)?
1170                    .into_iter()
1171                    .map(|(to_spec, to_anchor)| model::RefEntry {
1172                        spec: to_spec,
1173                        anchor: to_anchor,
1174                    })
1175                    .collect(),
1176            )
1177        } else {
1178            None
1179        };
1180
1181        let incoming = if dir == RefDirection::Incoming || dir == RefDirection::Both {
1182            Some(
1183                db::queries::get_incoming_refs(conn, &candidate.spec, &candidate.anchor)?
1184                    .into_iter()
1185                    .map(|(from_spec, from_anchor)| model::RefEntry {
1186                        spec: from_spec,
1187                        anchor: from_anchor,
1188                    })
1189                    .collect(),
1190            )
1191        } else {
1192            None
1193        };
1194
1195        matches.push(model::RefsMatch {
1196            spec: candidate.spec,
1197            anchor: candidate.anchor,
1198            title: candidate.title,
1199            section_type: candidate.section_type,
1200            resolution: if exact_mode {
1201                "exact".to_string()
1202            } else {
1203                "heuristic".to_string()
1204            },
1205            outgoing,
1206            incoming,
1207        });
1208    }
1209
1210    Ok(model::RefsResult {
1211        query: query.to_string(),
1212        direction: direction.to_ascii_lowercase(),
1213        matches,
1214    })
1215}
1216
1217fn normalize_idl_query(query: &str) -> String {
1218    let trimmed = query.trim();
1219    if let Some((owner, member)) = trimmed.rsplit_once('.') {
1220        let owner = owner.trim();
1221        let member = member.trim().trim_end_matches("()");
1222        if owner.is_empty() {
1223            return member.to_string();
1224        }
1225        return format!("{owner}.{member}");
1226    }
1227    trimmed.trim_end_matches("()").to_string()
1228}
1229
1230fn query_idl_from_conn(
1231    conn: &Connection,
1232    query: &str,
1233    spec_filter: Option<&str>,
1234    limit: u32,
1235) -> Result<model::IdlResult> {
1236    let mut entries = Vec::new();
1237
1238    // Exact anchor lookup
1239    if let Ok((spec_name, anchor, _)) = parse_spec_anchor(query) {
1240        if spec_filter.is_some() && spec_filter != Some(spec_name.as_str()) {
1241            return Ok(model::IdlResult {
1242                query: query.to_string(),
1243                matches: vec![],
1244            });
1245        }
1246
1247        let mut stmt = conn.prepare(
1248            "SELECT sp.name, d.anchor, d.kind, d.name, d.owner, d.canonical_name, d.idl_text, s.title
1249             FROM idl_defs d
1250             JOIN snapshots sn ON d.snapshot_id = sn.id
1251             JOIN specs sp ON sn.spec_id = sp.id
1252             LEFT JOIN sections s ON s.snapshot_id = d.snapshot_id AND s.anchor = d.anchor
1253             WHERE sp.name = ?1 AND d.anchor = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
1254             ORDER BY d.kind
1255             LIMIT ?3",
1256        )?;
1257
1258        let rows = stmt
1259            .query_map((spec_name, anchor, limit), |row| {
1260                Ok(model::IdlEntry {
1261                    spec: row.get(0)?,
1262                    anchor: row.get(1)?,
1263                    kind: row.get(2)?,
1264                    name: row.get(3)?,
1265                    owner: row.get(4)?,
1266                    canonical_name: row.get(5)?,
1267                    idl_text: row.get(6)?,
1268                    title: row.get(7)?,
1269                })
1270            })?
1271            .collect::<Result<Vec<_>, _>>()?;
1272        entries.extend(rows);
1273    } else {
1274        let normalized = normalize_idl_query(query).to_ascii_lowercase();
1275        if normalized.is_empty() {
1276            return Ok(model::IdlResult {
1277                query: query.to_string(),
1278                matches: vec![],
1279            });
1280        }
1281        let like = format!("%{}%", normalized);
1282
1283        let sql_with_spec = "SELECT sp.name, d.anchor, d.kind, d.name, d.owner, d.canonical_name, d.idl_text, s.title,
1284                CASE
1285                    WHEN LOWER(d.canonical_name) = ?1 THEN 100
1286                    WHEN LOWER(d.name) = ?1 THEN 95
1287                    WHEN LOWER(d.anchor) = ?1 THEN 90
1288                    WHEN LOWER(d.canonical_name) LIKE ?2 THEN 80
1289                    WHEN LOWER(d.name) LIKE ?2 THEN 70
1290                    ELSE 0
1291                END AS score
1292             FROM idl_defs d
1293             JOIN snapshots sn ON d.snapshot_id = sn.id
1294             JOIN specs sp ON sn.spec_id = sp.id
1295             LEFT JOIN sections s ON s.snapshot_id = d.snapshot_id AND s.anchor = d.anchor
1296             WHERE sp.name = ?3
1297               AND (LOWER(d.canonical_name) = ?1 OR LOWER(d.name) = ?1 OR LOWER(d.anchor) = ?1
1298                    OR LOWER(d.canonical_name) LIKE ?2 OR LOWER(d.name) LIKE ?2)
1299               AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
1300             ORDER BY score DESC, sp.name, d.canonical_name
1301             LIMIT ?4";
1302
1303        let sql_without_spec = "SELECT sp.name, d.anchor, d.kind, d.name, d.owner, d.canonical_name, d.idl_text, s.title,
1304                CASE
1305                    WHEN LOWER(d.canonical_name) = ?1 THEN 100
1306                    WHEN LOWER(d.name) = ?1 THEN 95
1307                    WHEN LOWER(d.anchor) = ?1 THEN 90
1308                    WHEN LOWER(d.canonical_name) LIKE ?2 THEN 80
1309                    WHEN LOWER(d.name) LIKE ?2 THEN 70
1310                    ELSE 0
1311                END AS score
1312             FROM idl_defs d
1313             JOIN snapshots sn ON d.snapshot_id = sn.id
1314             JOIN specs sp ON sn.spec_id = sp.id
1315             LEFT JOIN sections s ON s.snapshot_id = d.snapshot_id AND s.anchor = d.anchor
1316             WHERE (LOWER(d.canonical_name) = ?1 OR LOWER(d.name) = ?1 OR LOWER(d.anchor) = ?1
1317                    OR LOWER(d.canonical_name) LIKE ?2 OR LOWER(d.name) LIKE ?2)
1318               AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'
1319             ORDER BY score DESC, sp.name, d.canonical_name
1320             LIMIT ?3";
1321
1322        let mut stmt = conn.prepare(if spec_filter.is_some() {
1323            sql_with_spec
1324        } else {
1325            sql_without_spec
1326        })?;
1327
1328        if let Some(spec_name) = spec_filter {
1329            let rows = stmt
1330                .query_map((normalized, like, spec_name, limit), |row| {
1331                    Ok(model::IdlEntry {
1332                        spec: row.get(0)?,
1333                        anchor: row.get(1)?,
1334                        kind: row.get(2)?,
1335                        name: row.get(3)?,
1336                        owner: row.get(4)?,
1337                        canonical_name: row.get(5)?,
1338                        idl_text: row.get(6)?,
1339                        title: row.get(7)?,
1340                    })
1341                })?
1342                .collect::<Result<Vec<_>, _>>()?;
1343            entries.extend(rows);
1344        } else {
1345            let rows = stmt
1346                .query_map((normalized, like, limit), |row| {
1347                    Ok(model::IdlEntry {
1348                        spec: row.get(0)?,
1349                        anchor: row.get(1)?,
1350                        kind: row.get(2)?,
1351                        name: row.get(3)?,
1352                        owner: row.get(4)?,
1353                        canonical_name: row.get(5)?,
1354                        idl_text: row.get(6)?,
1355                        title: row.get(7)?,
1356                    })
1357                })?
1358                .collect::<Result<Vec<_>, _>>()?;
1359            entries.extend(rows);
1360        }
1361    }
1362
1363    Ok(model::IdlResult {
1364        query: query.to_string(),
1365        matches: entries,
1366    })
1367}
1368
1369/// Build a cross-reference graph rooted at SPEC#anchor from currently indexed specs.
1370pub async fn graph_section(
1371    spec_anchor: &str,
1372    direction: &str,
1373    max_depth: usize,
1374    max_nodes: usize,
1375    include: &[String],
1376    exclude: &[String],
1377    same_spec_only: bool,
1378) -> Result<model::GraphResult> {
1379    let (spec_name, anchor, base_url_hint) = parse_spec_anchor(spec_anchor)?;
1380    let conn = db::open_or_create_db()?;
1381    let registry = spec_registry::SpecRegistry::new();
1382
1383    let (_snapshot_id, spec_name) =
1384        ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
1385            .await?;
1386
1387    let filters = GraphFilters {
1388        include: include.to_vec(),
1389        exclude: exclude.to_vec(),
1390        same_spec_only,
1391    };
1392
1393    build_graph_from_conn(
1394        &conn, &spec_name, &anchor, direction, max_depth, max_nodes, &filters,
1395    )
1396}
1397
1398/// Query dedicated WebIDL definitions.
1399///
1400/// `query` supports:
1401/// - exact anchor: `SPEC#anchor` or full URL
1402/// - canonical name: `Interface.member`, `Interface.method()`, `Interface`
1403pub async fn query_idl(
1404    query: &str,
1405    spec_filter: Option<&str>,
1406    limit: u32,
1407    pr: Option<&model::PrOpts>,
1408) -> Result<model::IdlResult> {
1409    let conn = db::open_or_create_db()?;
1410    let registry = spec_registry::SpecRegistry::new();
1411
1412    if let Some(spec_name) = spec_filter {
1413        let _ = ensure_indexed_for_spec_name(&conn, &registry, spec_name, None).await?;
1414        if let Some(pr_opts) = pr {
1415            let (canonical_name, base_url, provider) =
1416                resolve_spec_metadata(&conn, &registry, spec_name, None)?;
1417            let _ = fetch::whatpr::ensure_pr_indexed(
1418                &conn,
1419                &canonical_name,
1420                &base_url,
1421                &provider,
1422                pr_opts.pr_number,
1423                pr_opts.force_update,
1424            )
1425            .await?;
1426        }
1427    } else if let Ok((spec_name, _, base_url_hint)) = parse_spec_anchor(query) {
1428        let _ =
1429            ensure_indexed_for_spec_name(&conn, &registry, &spec_name, base_url_hint.as_deref())
1430                .await?;
1431        if let Some(pr_opts) = pr {
1432            let (canonical_name, base_url, provider) =
1433                resolve_spec_metadata(&conn, &registry, &spec_name, base_url_hint.as_deref())?;
1434            let _ = fetch::whatpr::ensure_pr_indexed(
1435                &conn,
1436                &canonical_name,
1437                &base_url,
1438                &provider,
1439                pr_opts.pr_number,
1440                pr_opts.force_update,
1441            )
1442            .await?;
1443        }
1444    }
1445
1446    query_idl_from_conn(&conn, query, spec_filter, limit)
1447}
1448
1449/// Find incoming/outgoing references for SPEC#anchor or a shorthand query (e.g. Window.navigation).
1450pub async fn find_references(
1451    target: &str,
1452    direction: &str,
1453    limit: u32,
1454    pr: Option<&model::PrOpts>,
1455) -> Result<model::RefsResult> {
1456    let conn = db::open_or_create_db()?;
1457    let registry = spec_registry::SpecRegistry::new();
1458
1459    let exact_target = match parse_spec_anchor(target) {
1460        Ok((spec_name, anchor, base_url_hint)) => {
1461            let (_snapshot_id, canonical_spec_name) = ensure_indexed_for_spec_name(
1462                &conn,
1463                &registry,
1464                &spec_name,
1465                base_url_hint.as_deref(),
1466            )
1467            .await?;
1468            if let Some(pr_opts) = pr {
1469                let (canonical_name, base_url, provider) =
1470                    resolve_spec_metadata(&conn, &registry, &canonical_spec_name, None)?;
1471                let _ = fetch::whatpr::ensure_pr_indexed(
1472                    &conn,
1473                    &canonical_name,
1474                    &base_url,
1475                    &provider,
1476                    pr_opts.pr_number,
1477                    pr_opts.force_update,
1478                )
1479                .await?;
1480            }
1481            Some((canonical_spec_name, anchor))
1482        }
1483        Err(_) => None,
1484    };
1485
1486    find_references_from_conn(&conn, exact_target, target, direction, limit)
1487}
1488
1489/// Compute diff between a PR preview and its merge base for a spec.
1490pub async fn pr_diff(spec: &str, pr_opts: &model::PrOpts) -> Result<model::PrDiffResult> {
1491    let conn = db::open_or_create_db()?;
1492    let registry = spec_registry::SpecRegistry::new();
1493    let (canonical_name, base_url, provider) = resolve_spec_metadata(&conn, &registry, spec, None)?;
1494    let _ = ensure_indexed_for_spec_name(&conn, &registry, spec, None).await?;
1495    let (pr_snap_id, base_snap_id) = fetch::whatpr::ensure_pr_indexed(
1496        &conn,
1497        &canonical_name,
1498        &base_url,
1499        &provider,
1500        pr_opts.pr_number,
1501        pr_opts.force_update,
1502    )
1503    .await?;
1504
1505    let pr_sha: String = conn.query_row(
1506        "SELECT sha FROM snapshots WHERE id = ?1",
1507        [pr_snap_id],
1508        |row| row.get(0),
1509    )?;
1510    let base_sha: String = conn.query_row(
1511        "SELECT sha FROM snapshots WHERE id = ?1",
1512        [base_snap_id],
1513        |row| row.get(0),
1514    )?;
1515
1516    let changes = db::queries::compute_pr_diff(&conn, pr_snap_id, base_snap_id)?;
1517    let added = changes.iter().filter(|c| c.change_type == "added").count();
1518    let removed = changes
1519        .iter()
1520        .filter(|c| c.change_type == "removed")
1521        .count();
1522    let modified = changes
1523        .iter()
1524        .filter(|c| c.change_type == "modified")
1525        .count();
1526
1527    Ok(model::PrDiffResult {
1528        spec: canonical_name,
1529        pr_number: pr_opts.pr_number,
1530        head_sha: pr_sha,
1531        merge_base_sha: base_sha,
1532        summary: model::PrDiffSummary {
1533            added,
1534            removed,
1535            modified,
1536        },
1537        changes,
1538    })
1539}
1540
1541/// Clear cached PR preview data.
1542///
1543/// With no arguments, lists all cached PR snapshots.
1544/// With `--all`, clears PR data for all specs.
1545/// With `--spec`, clears PR data for a specific spec (optionally a single PR with `pr_number`).
1546pub fn clear_pr_data(
1547    spec: Option<&str>,
1548    pr_number: Option<i64>,
1549    all: bool,
1550) -> Result<serde_json::Value> {
1551    let conn = db::open_or_create_db()?;
1552
1553    if !all && spec.is_none() && pr_number.is_none() {
1554        let snapshots = db::queries::list_pr_snapshots(&conn)?;
1555        let entries: Vec<serde_json::Value> = snapshots
1556            .iter()
1557            .map(|(spec, pr, sha, indexed, sections)| {
1558                serde_json::json!({
1559                    "spec": spec, "pr": pr, "sha": sha,
1560                    "indexed_at": indexed, "sections": sections
1561                })
1562            })
1563            .collect();
1564        return Ok(serde_json::json!({ "cached_prs": entries }));
1565    }
1566
1567    if all {
1568        let specs = db::queries::list_specs(&conn)?;
1569        let mut total = 0;
1570        for (name, base_url, provider) in &specs {
1571            let spec_id = db::write::insert_or_get_spec(&conn, name, base_url, provider)?;
1572            total += db::write::delete_all_pr_data_for_spec(&conn, spec_id)?;
1573        }
1574        return Ok(serde_json::json!({ "cleared": total }));
1575    }
1576
1577    let spec_name = spec.context("--spec required when clearing specific PR data")?;
1578    let (name, base_url, provider) = db::queries::get_spec_meta(&conn, spec_name)?
1579        .with_context(|| format!("Unknown spec: {spec_name}"))?;
1580    let spec_id = db::write::insert_or_get_spec(&conn, &name, &base_url, &provider)?;
1581
1582    if let Some(pr) = pr_number {
1583        db::write::delete_pr_data(&conn, spec_id, pr)?;
1584        Ok(serde_json::json!({ "cleared_pr": pr, "spec": name }))
1585    } else {
1586        let count = db::write::delete_all_pr_data_for_spec(&conn, spec_id)?;
1587        Ok(serde_json::json!({ "cleared": count, "spec": name }))
1588    }
1589}
1590
1591/// Update specifications to latest versions
1592///
1593/// # Arguments
1594/// * `spec` - Optional spec name (updates all if None)
1595/// * `force` - Force update even if recently checked
1596///
1597/// # Returns
1598/// Vector of tuples (spec_name, Option<snapshot_id>)
1599/// - None indicates spec was already up to date
1600pub async fn update_specs(spec: Option<&str>, force: bool) -> Result<Vec<(String, Option<i64>)>> {
1601    let conn = db::open_or_create_db()?;
1602    let registry = spec_registry::SpecRegistry::new();
1603
1604    let mut results = Vec::new();
1605
1606    if let Some(spec_name) = spec {
1607        // Update single spec
1608        let (canonical_name, base_url, provider) =
1609            resolve_spec_metadata(&conn, &registry, spec_name, None)?;
1610        let snapshot_id =
1611            fetch::update_if_needed(&conn, &canonical_name, &base_url, &provider, force).await?;
1612        results.push((canonical_name, snapshot_id));
1613    } else {
1614        // Update all indexed/discovered specs.
1615        let specs = db::queries::list_specs(&conn)?;
1616        let all_results = fetch::update_all_specs(&conn, &specs, force).await;
1617
1618        for (spec_name, result) in all_results {
1619            match result {
1620                Ok(snapshot_id) => results.push((spec_name, snapshot_id)),
1621                Err(e) => {
1622                    eprintln!("Failed to update {}: {}", spec_name, e);
1623                    results.push((spec_name, None));
1624                }
1625            }
1626        }
1627    }
1628
1629    Ok(results)
1630}
1631
1632/// Clear the database (remove all indexed data)
1633///
1634/// # Returns
1635/// Path to the deleted database file
1636pub fn clear_database() -> Result<String> {
1637    let db_path = db::get_db_path();
1638
1639    if !db_path.exists() {
1640        anyhow::bail!("Database does not exist: {}", db_path.display());
1641    }
1642
1643    std::fs::remove_file(&db_path)?;
1644    Ok(db_path.display().to_string())
1645}
1646
1647#[cfg(test)]
1648mod tests {
1649    use super::*;
1650    use crate::db;
1651    use crate::db::write;
1652    use crate::model::{ParsedReference, ParsedSection, SectionType};
1653    use rusqlite::Connection;
1654
1655    fn default_graph_filters() -> GraphFilters {
1656        GraphFilters {
1657            include: vec![],
1658            exclude: vec![],
1659            same_spec_only: false,
1660        }
1661    }
1662
1663    fn setup_reference_graph_db() -> Connection {
1664        let conn = db::open_test_db().unwrap();
1665
1666        let html_spec_id =
1667            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
1668                .unwrap();
1669        let dom_spec_id =
1670            write::insert_or_get_spec(&conn, "DOM", "https://dom.spec.whatwg.org", "whatwg")
1671                .unwrap();
1672        let url_spec_id =
1673            write::insert_or_get_spec(&conn, "URL", "https://url.spec.whatwg.org", "whatwg")
1674                .unwrap();
1675
1676        let html_snapshot =
1677            write::insert_snapshot(&conn, html_spec_id, "hash:sha-html", "2026-01-01T00:00:00Z")
1678                .unwrap();
1679        let dom_snapshot =
1680            write::insert_snapshot(&conn, dom_spec_id, "hash:sha-dom", "2026-01-01T00:00:00Z")
1681                .unwrap();
1682        let url_snapshot =
1683            write::insert_snapshot(&conn, url_spec_id, "hash:sha-url", "2026-01-01T00:00:00Z")
1684                .unwrap();
1685
1686        let html_sections = vec![
1687            ParsedSection {
1688                anchor: "navigate".to_string(),
1689                title: Some("navigate".to_string()),
1690                content_text: None,
1691                section_type: SectionType::Algorithm,
1692                parent_anchor: None,
1693                prev_anchor: None,
1694                next_anchor: None,
1695                depth: None,
1696            },
1697            ParsedSection {
1698                anchor: "dom-window-navigation".to_string(),
1699                title: Some("navigation".to_string()),
1700                content_text: None,
1701                section_type: SectionType::Idl,
1702                parent_anchor: None,
1703                prev_anchor: None,
1704                next_anchor: None,
1705                depth: None,
1706            },
1707            ParsedSection {
1708                anchor: "dom-worker-navigation".to_string(),
1709                title: Some("navigation".to_string()),
1710                content_text: None,
1711                section_type: SectionType::Idl,
1712                parent_anchor: None,
1713                prev_anchor: None,
1714                next_anchor: None,
1715                depth: None,
1716            },
1717            ParsedSection {
1718                anchor: "some-consumer".to_string(),
1719                title: Some("consumer".to_string()),
1720                content_text: None,
1721                section_type: SectionType::Algorithm,
1722                parent_anchor: None,
1723                prev_anchor: None,
1724                next_anchor: None,
1725                depth: None,
1726            },
1727            ParsedSection {
1728                anchor: "dom-window-navigation-helper".to_string(),
1729                title: Some("navigation helper".to_string()),
1730                content_text: None,
1731                section_type: SectionType::Algorithm,
1732                parent_anchor: None,
1733                prev_anchor: None,
1734                next_anchor: None,
1735                depth: None,
1736            },
1737        ];
1738        write::insert_sections_bulk(&conn, html_snapshot, &html_sections).unwrap();
1739
1740        let dom_sections = vec![ParsedSection {
1741            anchor: "concept-tree".to_string(),
1742            title: Some("tree".to_string()),
1743            content_text: None,
1744            section_type: SectionType::Definition,
1745            parent_anchor: None,
1746            prev_anchor: None,
1747            next_anchor: None,
1748            depth: None,
1749        }];
1750        write::insert_sections_bulk(&conn, dom_snapshot, &dom_sections).unwrap();
1751
1752        let url_sections = vec![
1753            ParsedSection {
1754                anchor: "concept-url".to_string(),
1755                title: Some("URL".to_string()),
1756                content_text: None,
1757                section_type: SectionType::Definition,
1758                parent_anchor: None,
1759                prev_anchor: None,
1760                next_anchor: None,
1761                depth: None,
1762            },
1763            ParsedSection {
1764                anchor: "concept-relevant-global".to_string(),
1765                title: Some("relevant global object".to_string()),
1766                content_text: None,
1767                section_type: SectionType::Definition,
1768                parent_anchor: None,
1769                prev_anchor: None,
1770                next_anchor: None,
1771                depth: None,
1772            },
1773        ];
1774        write::insert_sections_bulk(&conn, url_snapshot, &url_sections).unwrap();
1775
1776        let html_refs = vec![
1777            ParsedReference {
1778                from_anchor: "navigate".to_string(),
1779                to_spec: "DOM".to_string(),
1780                to_anchor: "concept-tree".to_string(),
1781            },
1782            ParsedReference {
1783                from_anchor: "navigate".to_string(),
1784                to_spec: "URL".to_string(),
1785                to_anchor: "concept-url".to_string(),
1786            },
1787            ParsedReference {
1788                from_anchor: "some-consumer".to_string(),
1789                to_spec: "HTML".to_string(),
1790                to_anchor: "dom-window-navigation".to_string(),
1791            },
1792            ParsedReference {
1793                from_anchor: "dom-window-navigation".to_string(),
1794                to_spec: "URL".to_string(),
1795                to_anchor: "concept-url".to_string(),
1796            },
1797            ParsedReference {
1798                from_anchor: "dom-worker-navigation".to_string(),
1799                to_spec: "DOM".to_string(),
1800                to_anchor: "concept-tree".to_string(),
1801            },
1802            ParsedReference {
1803                from_anchor: "dom-window-navigation-helper".to_string(),
1804                to_spec: "HTML".to_string(),
1805                to_anchor: "navigate".to_string(),
1806            },
1807        ];
1808        write::insert_refs_bulk(&conn, html_snapshot, &html_refs).unwrap();
1809
1810        let dom_refs = vec![ParsedReference {
1811            from_anchor: "concept-tree".to_string(),
1812            to_spec: "URL".to_string(),
1813            to_anchor: "concept-url".to_string(),
1814        }];
1815        write::insert_refs_bulk(&conn, dom_snapshot, &dom_refs).unwrap();
1816
1817        let url_refs = vec![ParsedReference {
1818            from_anchor: "concept-relevant-global".to_string(),
1819            to_spec: "HTML".to_string(),
1820            to_anchor: "dom-window-navigation-helper".to_string(),
1821        }];
1822        write::insert_refs_bulk(&conn, url_snapshot, &url_refs).unwrap();
1823
1824        let idl_defs = vec![
1825            crate::model::ParsedIdlDefinition {
1826                anchor: "dom-window".to_string(),
1827                name: "Window".to_string(),
1828                owner: None,
1829                kind: "interface".to_string(),
1830                canonical_name: "Window".to_string(),
1831                idl_text: Some("interface Window { ... };".to_string()),
1832            },
1833            crate::model::ParsedIdlDefinition {
1834                anchor: "dom-window-navigation".to_string(),
1835                name: "navigation".to_string(),
1836                owner: Some("Window".to_string()),
1837                kind: "attribute".to_string(),
1838                canonical_name: "Window.navigation".to_string(),
1839                idl_text: Some(
1840                    "interface Window { attribute Navigation navigation; };".to_string(),
1841                ),
1842            },
1843            crate::model::ParsedIdlDefinition {
1844                anchor: "dom-window-open".to_string(),
1845                name: "open(url)".to_string(),
1846                owner: Some("Window".to_string()),
1847                kind: "method".to_string(),
1848                canonical_name: "Window.open".to_string(),
1849                idl_text: Some("interface Window { undefined open(DOMString url); };".to_string()),
1850            },
1851        ];
1852        write::insert_idl_defs_bulk(&conn, html_snapshot, &idl_defs).unwrap();
1853
1854        conn
1855    }
1856
1857    #[test]
1858    fn parse_spec_anchor_classic_format() {
1859        let (spec, anchor, _) = parse_spec_anchor("HTML#navigate").unwrap();
1860        assert_eq!(spec, "HTML");
1861        assert_eq!(anchor, "navigate");
1862    }
1863
1864    #[test]
1865    fn parse_spec_anchor_url_format() {
1866        let (spec, anchor, _) =
1867            parse_spec_anchor("https://html.spec.whatwg.org/#navigate").unwrap();
1868        assert_eq!(spec, "HTML");
1869        assert_eq!(anchor, "navigate");
1870    }
1871
1872    #[test]
1873    fn parse_spec_anchor_url_dom() {
1874        let (spec, anchor, _) =
1875            parse_spec_anchor("https://dom.spec.whatwg.org/#concept-tree").unwrap();
1876        assert_eq!(spec, "DOM");
1877        assert_eq!(anchor, "concept-tree");
1878    }
1879
1880    #[test]
1881    fn parse_spec_anchor_url_without_scheme() {
1882        let (spec, anchor, _) = parse_spec_anchor("html.spec.whatwg.org/#navigate").unwrap();
1883        assert_eq!(spec, "HTML");
1884        assert_eq!(anchor, "navigate");
1885    }
1886
1887    #[test]
1888    fn parse_spec_anchor_ietf_rfc_url() {
1889        let (spec, anchor, base_url) =
1890            parse_spec_anchor("https://www.rfc-editor.org/rfc/rfc9110.html#section-5").unwrap();
1891        assert_eq!(spec, "RFC9110");
1892        assert_eq!(anchor, "section-5");
1893        assert!(base_url.is_some());
1894    }
1895
1896    #[test]
1897    fn parse_spec_anchor_ietf_datatracker_url() {
1898        let (spec, anchor, base_url) =
1899            parse_spec_anchor("https://datatracker.ietf.org/doc/html/rfc9110#section-5").unwrap();
1900        assert_eq!(spec, "RFC9110");
1901        assert_eq!(anchor, "section-5");
1902        assert!(base_url.is_some());
1903    }
1904
1905    #[test]
1906    fn parse_spec_anchor_ietf_draft() {
1907        let (spec, anchor, _) = parse_spec_anchor("RFC9110#section-5").unwrap();
1908        assert_eq!(spec, "RFC9110");
1909        assert_eq!(anchor, "section-5");
1910    }
1911
1912    #[test]
1913    fn parse_spec_anchor_auto_whitelisted_url() {
1914        let (spec, anchor, base_url) = parse_spec_anchor(
1915            "https://w3c.github.io/webappsec-permissions-policy/#permissions-policy-header",
1916        )
1917        .unwrap();
1918        assert_eq!(spec, "WEBAPPSEC-PERMISSIONS-POLICY");
1919        assert_eq!(anchor, "permissions-policy-header");
1920        assert_eq!(
1921            base_url.as_deref(),
1922            Some("https://w3c.github.io/webappsec-permissions-policy")
1923        );
1924    }
1925
1926    #[test]
1927    fn parse_spec_anchor_unknown_url() {
1928        let result = parse_spec_anchor("https://example.com/#foo");
1929        assert!(result.is_err());
1930    }
1931
1932    #[test]
1933    fn parse_spec_anchor_invalid() {
1934        let result = parse_spec_anchor("no-hash");
1935        assert!(result.is_err());
1936    }
1937
1938    #[test]
1939    fn spec_urls_returns_without_panicking() {
1940        let urls = spec_urls();
1941        assert!(urls.iter().all(|entry| !entry.spec.is_empty()));
1942        assert!(urls.iter().all(|entry| entry.base_url.starts_with("http")));
1943    }
1944
1945    #[test]
1946    fn sanitize_for_fts_handles_punctuation() {
1947        let sanitized = sanitize_for_fts("Where is attribute reflection defined?");
1948        assert_eq!(
1949            sanitized.as_deref(),
1950            Some("Where is attribute reflection defined")
1951        );
1952    }
1953
1954    #[test]
1955    fn sanitize_for_fts_returns_none_when_no_terms() {
1956        assert_eq!(sanitize_for_fts("???"), None);
1957    }
1958
1959    #[test]
1960    fn detects_fts_syntax_error_message() {
1961        let err = rusqlite::Error::SqliteFailure(
1962            rusqlite::ffi::Error {
1963                code: rusqlite::ErrorCode::Unknown,
1964                extended_code: 1,
1965            },
1966            Some("fts5: syntax error near \"?\"".to_string()),
1967        );
1968        assert!(is_fts_syntax_error(&err));
1969    }
1970
1971    #[test]
1972    fn graph_outgoing_depth_one() {
1973        let conn = setup_reference_graph_db();
1974        let graph = build_graph_from_conn(
1975            &conn,
1976            "HTML",
1977            "navigate",
1978            "outgoing",
1979            1,
1980            50,
1981            &default_graph_filters(),
1982        )
1983        .unwrap();
1984
1985        assert_eq!(graph.root.spec, "HTML");
1986        assert_eq!(graph.root.anchor, "navigate");
1987        assert_eq!(graph.edges.len(), 2);
1988        assert!(graph
1989            .edges
1990            .iter()
1991            .any(|e| e.from == "HTML#navigate" && e.to == "DOM#concept-tree"));
1992        assert!(graph
1993            .edges
1994            .iter()
1995            .any(|e| e.from == "HTML#navigate" && e.to == "URL#concept-url"));
1996    }
1997
1998    #[test]
1999    fn graph_outgoing_depth_two_follows_transitive_edges() {
2000        let conn = setup_reference_graph_db();
2001        let graph = build_graph_from_conn(
2002            &conn,
2003            "HTML",
2004            "navigate",
2005            "outgoing",
2006            2,
2007            50,
2008            &default_graph_filters(),
2009        )
2010        .unwrap();
2011
2012        assert!(graph
2013            .edges
2014            .iter()
2015            .any(|e| { e.from == "DOM#concept-tree" && e.to == "URL#concept-url" }));
2016    }
2017
2018    #[test]
2019    fn find_references_exact_anchor_incoming() {
2020        let conn = setup_reference_graph_db();
2021        let result = find_references_from_conn(
2022            &conn,
2023            Some(("HTML".to_string(), "dom-window-navigation".to_string())),
2024            "HTML#dom-window-navigation",
2025            "incoming",
2026            10,
2027        )
2028        .unwrap();
2029
2030        assert_eq!(result.matches.len(), 1);
2031        let m = &result.matches[0];
2032        assert_eq!(m.resolution, "exact");
2033        assert!(m
2034            .incoming
2035            .as_ref()
2036            .unwrap()
2037            .iter()
2038            .any(|r| r.spec == "HTML" && r.anchor == "some-consumer"));
2039    }
2040
2041    #[test]
2042    fn find_references_property_shorthand_prefers_window_navigation() {
2043        let conn = setup_reference_graph_db();
2044        let result =
2045            find_references_from_conn(&conn, None, "Window.navigation", "incoming", 10).unwrap();
2046
2047        assert!(!result.matches.is_empty());
2048        let first = &result.matches[0];
2049        assert_eq!(first.spec, "HTML");
2050        assert_eq!(first.anchor, "dom-window-navigation");
2051        assert_eq!(first.resolution, "heuristic");
2052    }
2053
2054    #[test]
2055    fn graph_mermaid_render_contains_nodes_and_edges() {
2056        let conn = setup_reference_graph_db();
2057        let graph = build_graph_from_conn(
2058            &conn,
2059            "HTML",
2060            "navigate",
2061            "outgoing",
2062            1,
2063            50,
2064            &default_graph_filters(),
2065        )
2066        .unwrap();
2067        let mermaid = crate::format::graph_mermaid(&graph);
2068
2069        assert!(mermaid.contains("graph TD"));
2070        assert!(mermaid.contains("HTML#navigate"));
2071        assert!(mermaid.contains("-->"));
2072        assert!(mermaid.contains("<br>"));
2073        assert!(!mermaid.contains("\\n"));
2074    }
2075
2076    #[test]
2077    fn graph_dot_render_contains_nodes_and_edges() {
2078        let conn = setup_reference_graph_db();
2079        let graph = build_graph_from_conn(
2080            &conn,
2081            "HTML",
2082            "navigate",
2083            "outgoing",
2084            1,
2085            50,
2086            &default_graph_filters(),
2087        )
2088        .unwrap();
2089        let dot = crate::format::graph_dot(&graph);
2090
2091        assert!(dot.contains("digraph"));
2092        assert!(dot.contains("\"HTML#navigate\""));
2093        assert!(dot.contains("->"));
2094    }
2095
2096    #[test]
2097    fn graph_same_spec_only_keeps_only_root_spec_nodes() {
2098        let conn = setup_reference_graph_db();
2099        let mut filters = default_graph_filters();
2100        filters.same_spec_only = true;
2101
2102        let graph =
2103            build_graph_from_conn(&conn, "HTML", "navigate", "outgoing", 2, 50, &filters).unwrap();
2104
2105        assert!(graph.nodes.iter().all(|n| n.spec == "HTML"));
2106        assert!(graph.edges.is_empty());
2107    }
2108
2109    #[test]
2110    fn graph_wildcard_include_filters_nodes() {
2111        let conn = setup_reference_graph_db();
2112        let mut filters = default_graph_filters();
2113        filters.include = vec!["*concept-*".to_string()];
2114
2115        let graph =
2116            build_graph_from_conn(&conn, "HTML", "navigate", "outgoing", 1, 50, &filters).unwrap();
2117
2118        assert!(graph.nodes.iter().any(|n| n.id == "HTML#navigate"));
2119        assert!(graph.nodes.iter().any(|n| n.id == "DOM#concept-tree"));
2120        assert!(graph.nodes.iter().any(|n| n.id == "URL#concept-url"));
2121        assert!(!graph
2122            .nodes
2123            .iter()
2124            .any(|n| n.id == "HTML#dom-window-navigation"));
2125    }
2126
2127    #[test]
2128    fn graph_regex_exclude_filters_nodes() {
2129        let conn = setup_reference_graph_db();
2130        let mut filters = default_graph_filters();
2131        filters.exclude = vec!["re:^URL#".to_string()];
2132
2133        let graph =
2134            build_graph_from_conn(&conn, "HTML", "navigate", "outgoing", 1, 50, &filters).unwrap();
2135
2136        assert!(!graph.nodes.iter().any(|n| n.id == "URL#concept-url"));
2137        assert!(graph.nodes.iter().any(|n| n.id == "DOM#concept-tree"));
2138        assert!(!graph.edges.iter().any(|e| e.to == "URL#concept-url"));
2139    }
2140
2141    #[test]
2142    fn graph_filters_prune_disconnected_components() {
2143        let conn = setup_reference_graph_db();
2144        let mut filters = default_graph_filters();
2145        filters.include = vec!["*concept-*".to_string()];
2146
2147        let graph =
2148            build_graph_from_conn(&conn, "HTML", "navigate", "incoming", 2, 50, &filters).unwrap();
2149
2150        // concept-relevant-global exists via a non-matching intermediary.
2151        // The intermediary should be kept as a bridge node.
2152        assert!(graph.nodes.iter().any(|n| n.id == "HTML#navigate"));
2153        assert!(graph
2154            .nodes
2155            .iter()
2156            .any(|n| n.id == "URL#concept-relevant-global"));
2157        assert!(graph
2158            .nodes
2159            .iter()
2160            .any(|n| n.id == "HTML#dom-window-navigation-helper"));
2161        let bridge = graph
2162            .nodes
2163            .iter()
2164            .find(|n| n.id == "HTML#dom-window-navigation-helper")
2165            .unwrap();
2166        assert_eq!(bridge.filter_role.as_deref(), Some("bridge"));
2167        assert!(graph.edges.iter().any(|e| {
2168            e.from == "URL#concept-relevant-global" && e.to == "HTML#dom-window-navigation-helper"
2169        }));
2170        assert!(graph
2171            .edges
2172            .iter()
2173            .any(|e| { e.from == "HTML#dom-window-navigation-helper" && e.to == "HTML#navigate" }));
2174    }
2175
2176    #[test]
2177    fn graph_drops_self_referencing_edges() {
2178        let conn = setup_reference_graph_db();
2179        let dom_snapshot = db::queries::get_snapshot(&conn, "DOM").unwrap().unwrap();
2180        write::insert_refs_bulk(
2181            &conn,
2182            dom_snapshot,
2183            &[ParsedReference {
2184                from_anchor: "concept-tree".to_string(),
2185                to_spec: "DOM".to_string(),
2186                to_anchor: "concept-tree".to_string(),
2187            }],
2188        )
2189        .unwrap();
2190
2191        let graph = build_graph_from_conn(
2192            &conn,
2193            "DOM",
2194            "concept-tree",
2195            "outgoing",
2196            1,
2197            50,
2198            &default_graph_filters(),
2199        )
2200        .unwrap();
2201
2202        assert!(
2203            !graph
2204                .edges
2205                .iter()
2206                .any(|e| e.from == "DOM#concept-tree" && e.to == "DOM#concept-tree"),
2207            "Self-loop edges should be removed from graph output"
2208        );
2209    }
2210
2211    #[test]
2212    fn graph_mermaid_styles_bridge_nodes() {
2213        let conn = setup_reference_graph_db();
2214        let mut filters = default_graph_filters();
2215        filters.include = vec!["*concept-*".to_string()];
2216
2217        let graph =
2218            build_graph_from_conn(&conn, "HTML", "navigate", "incoming", 2, 50, &filters).unwrap();
2219        let mermaid = crate::format::graph_mermaid(&graph);
2220
2221        assert!(mermaid.contains("classDef bridge"));
2222        assert!(mermaid.contains("classDef root"));
2223        assert!(mermaid.contains("class "));
2224        assert!(
2225            !mermaid.contains("classDef bridge stroke-dasharray: 5 5;"),
2226            "Mermaid classDef must not end with semicolon"
2227        );
2228    }
2229
2230    #[test]
2231    fn graph_dot_label_newline_not_double_escaped() {
2232        let conn = setup_reference_graph_db();
2233        let graph = build_graph_from_conn(
2234            &conn,
2235            "HTML",
2236            "navigate",
2237            "outgoing",
2238            1,
2239            50,
2240            &default_graph_filters(),
2241        )
2242        .unwrap();
2243        let dot = crate::format::graph_dot(&graph);
2244
2245        // navigate has title "navigate" → DOT label must use a single \n (backslash-n),
2246        // not the double-escaped \\n that would appear if escape runs on the combined string.
2247        assert!(
2248            dot.contains("[label=\"HTML#navigate\\nnavigate\"]"),
2249            "DOT label should use single \\n as line separator, got:\n{}",
2250            dot
2251        );
2252    }
2253
2254    #[test]
2255    fn graph_max_nodes_truncation() {
2256        let conn = setup_reference_graph_db();
2257        // navigate has 2 outgoing refs; with max_nodes=2 one neighbour must be dropped
2258        let graph = build_graph_from_conn(
2259            &conn,
2260            "HTML",
2261            "navigate",
2262            "outgoing",
2263            2,
2264            2,
2265            &default_graph_filters(),
2266        )
2267        .unwrap();
2268        assert!(
2269            graph.truncated,
2270            "graph should be truncated when max_nodes is hit"
2271        );
2272        assert!(graph.nodes.len() <= 2);
2273    }
2274
2275    #[test]
2276    fn graph_incoming_depth_one() {
2277        let conn = setup_reference_graph_db();
2278        let graph = build_graph_from_conn(
2279            &conn,
2280            "HTML",
2281            "dom-window-navigation",
2282            "incoming",
2283            1,
2284            50,
2285            &default_graph_filters(),
2286        )
2287        .unwrap();
2288        assert!(
2289            graph
2290                .edges
2291                .iter()
2292                .any(|e| e.from == "HTML#some-consumer" && e.to == "HTML#dom-window-navigation"),
2293            "incoming edge from some-consumer should be present"
2294        );
2295    }
2296
2297    #[test]
2298    fn find_references_outgoing_direction() {
2299        let conn = setup_reference_graph_db();
2300        let result = find_references_from_conn(
2301            &conn,
2302            Some(("HTML".to_string(), "navigate".to_string())),
2303            "HTML#navigate",
2304            "outgoing",
2305            10,
2306        )
2307        .unwrap();
2308        assert_eq!(result.matches.len(), 1);
2309        assert_eq!(result.direction, "outgoing");
2310        let m = &result.matches[0];
2311        let outgoing = m.outgoing.as_ref().unwrap();
2312        assert!(outgoing
2313            .iter()
2314            .any(|r| r.spec == "DOM" && r.anchor == "concept-tree"));
2315        assert!(outgoing
2316            .iter()
2317            .any(|r| r.spec == "URL" && r.anchor == "concept-url"));
2318        assert!(m.incoming.is_none());
2319    }
2320
2321    #[test]
2322    fn query_idl_exact_anchor() {
2323        let conn = setup_reference_graph_db();
2324        let result = query_idl_from_conn(&conn, "HTML#dom-window-navigation", None, 10).unwrap();
2325
2326        assert_eq!(result.matches.len(), 1);
2327        let m = &result.matches[0];
2328        assert_eq!(m.spec, "HTML");
2329        assert_eq!(m.kind, "attribute");
2330        assert_eq!(m.canonical_name, "Window.navigation");
2331    }
2332
2333    #[test]
2334    fn query_idl_by_canonical_member() {
2335        let conn = setup_reference_graph_db();
2336        let result = query_idl_from_conn(&conn, "Window.navigation", None, 10).unwrap();
2337
2338        assert!(!result.matches.is_empty());
2339        assert_eq!(result.matches[0].canonical_name, "Window.navigation");
2340    }
2341
2342    #[test]
2343    fn query_idl_method_parentheses_normalized() {
2344        let conn = setup_reference_graph_db();
2345        let result = query_idl_from_conn(&conn, "Window.open()", None, 10).unwrap();
2346
2347        assert!(!result.matches.is_empty());
2348        assert_eq!(result.matches[0].canonical_name, "Window.open");
2349    }
2350}