Skip to main content

webspec_index/db/
queries.rs

1// Query operations on the database
2use crate::model::{ParsedSection, PrDiffEntry, SectionType};
3use anyhow::Result;
4use chrono::{DateTime, Utc};
5use rusqlite::Connection;
6use std::collections::HashMap;
7
8#[derive(Debug, Clone)]
9pub struct UpdateCheckState {
10    pub last_checked: DateTime<Utc>,
11    pub last_indexed: Option<DateTime<Utc>>,
12    pub content_hash: Option<String>,
13}
14
15pub type PrSnapshotRow = (String, i64, String, String, i64);
16
17/// List all cached PR snapshots with their section counts.
18pub fn list_pr_snapshots(conn: &Connection) -> Result<Vec<PrSnapshotRow>> {
19    let mut stmt = conn.prepare(
20        "SELECT sp.name, s.pr_number, s.sha, s.indexed_at,
21                (SELECT COUNT(*) FROM sections sec WHERE sec.snapshot_id = s.id)
22         FROM snapshots s
23         JOIN specs sp ON s.spec_id = sp.id
24         WHERE s.pr_number IS NOT NULL
25         ORDER BY sp.name, s.pr_number",
26    )?;
27    let rows = stmt
28        .query_map([], |row| {
29            Ok((
30                row.get(0)?,
31                row.get(1)?,
32                row.get(2)?,
33                row.get(3)?,
34                row.get(4)?,
35            ))
36        })?
37        .collect::<Result<Vec<_>, _>>()?;
38    Ok(rows)
39}
40
41/// Get a PR snapshot for a spec by name and PR number.
42/// Returns (snapshot_id, merge_base_sha) if found.
43pub fn get_pr_snapshot(
44    conn: &Connection,
45    spec_name: &str,
46    pr_number: i64,
47) -> Result<Option<(i64, String)>> {
48    let result = conn.query_row(
49        "SELECT s.id, s.merge_base_sha FROM snapshots s
50         JOIN specs sp ON s.spec_id = sp.id
51         WHERE sp.name = ?1 AND s.pr_number = ?2",
52        (spec_name, pr_number),
53        |row| Ok((row.get(0)?, row.get::<_, String>(1)?)),
54    );
55    match result {
56        Ok(r) => Ok(Some(r)),
57        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
58        Err(e) => Err(e.into()),
59    }
60}
61
62/// Get a commit snapshot by spec_id and exact SHA.
63/// Used to find cached merge base snapshots.
64pub fn get_commit_snapshot(conn: &Connection, spec_id: i64, sha: &str) -> Result<Option<i64>> {
65    let result = conn.query_row(
66        "SELECT id FROM snapshots
67         WHERE spec_id = ?1 AND sha = ?2 AND pr_number IS NULL",
68        (spec_id, sha),
69        |row| row.get(0),
70    );
71    match result {
72        Ok(id) => Ok(Some(id)),
73        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
74        Err(e) => Err(e.into()),
75    }
76}
77
78/// Get the list of page paths stored for a PR snapshot.
79pub fn get_pr_pages(conn: &Connection, snapshot_id: i64) -> Result<Vec<String>> {
80    let pages: Option<String> = conn.query_row(
81        "SELECT pr_pages FROM snapshots WHERE id = ?1",
82        [snapshot_id],
83        |row| row.get(0),
84    )?;
85    Ok(pages
86        .map(|s| {
87            s.split(',')
88                .filter(|p| !p.is_empty())
89                .map(|p| p.to_string())
90                .collect()
91        })
92        .unwrap_or_default())
93}
94
95/// Get the snapshot for a spec by name (each spec has at most one snapshot)
96pub fn get_snapshot(conn: &Connection, spec_name: &str) -> Result<Option<i64>> {
97    let result = conn.query_row(
98        "SELECT s.id FROM snapshots s
99         JOIN specs sp ON s.spec_id = sp.id
100         WHERE sp.name = ?1 AND s.pr_number IS NULL AND s.sha LIKE 'hash:%'",
101        [spec_name],
102        |row| row.get(0),
103    );
104
105    match result {
106        Ok(id) => Ok(Some(id)),
107        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
108        Err(e) => Err(e.into()),
109    }
110}
111
112/// Get canonical spec metadata by name (case-insensitive).
113/// Returns (name, base_url, provider).
114pub fn get_spec_meta(
115    conn: &Connection,
116    spec_name: &str,
117) -> Result<Option<(String, String, String)>> {
118    let row = conn.query_row(
119        "SELECT name, base_url, provider
120         FROM specs
121         WHERE LOWER(name) = LOWER(?1)
122         LIMIT 1",
123        [spec_name],
124        |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
125    );
126
127    match row {
128        Ok(meta) => Ok(Some(meta)),
129        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
130        Err(e) => Err(e.into()),
131    }
132}
133
134/// List all indexed/discovered specs as (name, base_url, provider).
135pub fn list_specs(conn: &Connection) -> Result<Vec<(String, String, String)>> {
136    let mut stmt = conn.prepare(
137        "SELECT name, base_url, provider
138         FROM specs
139         ORDER BY name",
140    )?;
141
142    let rows = stmt
143        .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?
144        .collect::<Result<Vec<_>, _>>()?;
145    Ok(rows)
146}
147
148/// Get sync metadata for a spec from update_checks.
149pub fn get_update_check(conn: &Connection, spec_id: i64) -> Result<Option<UpdateCheckState>> {
150    let row = conn.query_row(
151        "SELECT last_checked, last_indexed, content_hash
152         FROM update_checks
153         WHERE spec_id = ?1",
154        [spec_id],
155        |row| {
156            Ok((
157                row.get::<_, String>(0)?,
158                row.get::<_, Option<String>>(1)?,
159                row.get::<_, Option<String>>(2)?,
160            ))
161        },
162    );
163
164    match row {
165        Ok((checked, indexed, content_hash)) => {
166            let last_checked = DateTime::parse_from_rfc3339(&checked)
167                .map(|d| d.with_timezone(&Utc))
168                .map_err(|e| {
169                    rusqlite::Error::InvalidColumnType(
170                        0,
171                        e.to_string(),
172                        rusqlite::types::Type::Text,
173                    )
174                })?;
175
176            let last_indexed = match indexed {
177                Some(value) => Some(
178                    DateTime::parse_from_rfc3339(&value)
179                        .map(|d| d.with_timezone(&Utc))
180                        .map_err(|e| {
181                            rusqlite::Error::InvalidColumnType(
182                                1,
183                                e.to_string(),
184                                rusqlite::types::Type::Text,
185                            )
186                        })?,
187                ),
188                None => None,
189            };
190
191            Ok(Some(UpdateCheckState {
192                last_checked,
193                last_indexed,
194                content_hash,
195            }))
196        }
197        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
198        Err(e) => Err(e.into()),
199    }
200}
201
202/// Get a section by snapshot ID and anchor
203pub fn get_section(
204    conn: &Connection,
205    snapshot_id: i64,
206    anchor: &str,
207) -> Result<Option<ParsedSection>> {
208    let result = conn.query_row(
209        "SELECT anchor, title, content_text, section_type, parent_anchor, prev_anchor, next_anchor, depth
210         FROM sections
211         WHERE snapshot_id = ?1 AND anchor = ?2",
212        (snapshot_id, anchor),
213        |row| {
214            Ok(ParsedSection {
215                anchor: row.get(0)?,
216                title: row.get(1)?,
217                content_text: row.get(2)?,
218                section_type: row.get::<_, String>(3)?.parse::<SectionType>()
219                    .map_err(|_| rusqlite::Error::InvalidColumnType(3, "section_type".to_string(), rusqlite::types::Type::Text))?,
220                parent_anchor: row.get(4)?,
221                prev_anchor: row.get(5)?,
222                next_anchor: row.get(6)?,
223                depth: row.get(7)?,
224            })
225        },
226    );
227
228    match result {
229        Ok(section) => Ok(Some(section)),
230        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
231        Err(e) => Err(e.into()),
232    }
233}
234
235/// Get child sections (sections with this as parent)
236pub fn get_children(
237    conn: &Connection,
238    snapshot_id: i64,
239    parent_anchor: &str,
240) -> Result<Vec<(String, Option<String>)>> {
241    let mut stmt = conn.prepare(
242        "SELECT anchor, title FROM sections
243         WHERE snapshot_id = ?1 AND parent_anchor = ?2
244         ORDER BY rowid",
245    )?;
246
247    let children = stmt
248        .query_map((snapshot_id, parent_anchor), |row| {
249            Ok((row.get(0)?, row.get(1)?))
250        })?
251        .collect::<Result<Vec<_>, _>>()?;
252
253    Ok(children)
254}
255
256/// Get outgoing references from a section
257pub fn get_outgoing_refs(
258    conn: &Connection,
259    snapshot_id: i64,
260    from_anchor: &str,
261) -> Result<Vec<(String, String)>> {
262    let mut stmt = conn.prepare(
263        "SELECT to_spec, to_anchor FROM refs
264         WHERE snapshot_id = ?1 AND from_anchor = ?2",
265    )?;
266
267    let refs = stmt
268        .query_map((snapshot_id, from_anchor), |row| {
269            Ok((row.get(0)?, row.get(1)?))
270        })?
271        .collect::<Result<Vec<_>, _>>()?;
272
273    Ok(refs)
274}
275
276/// Get incoming references to a section
277/// Returns (from_spec, from_anchor) tuples
278/// Searches across all indexed specs to find cross-spec refs
279pub fn get_incoming_refs(
280    conn: &Connection,
281    to_spec: &str,
282    to_anchor: &str,
283) -> Result<Vec<(String, String)>> {
284    let mut stmt = conn.prepare(
285        "SELECT sp.name, r.from_anchor FROM refs r
286         JOIN snapshots sn ON r.snapshot_id = sn.id
287         JOIN specs sp ON sn.spec_id = sp.id
288         WHERE r.to_spec = ?1 AND r.to_anchor = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'",
289    )?;
290
291    let refs = stmt
292        .query_map((to_spec, to_anchor), |row| Ok((row.get(0)?, row.get(1)?)))?
293        .collect::<Result<Vec<_>, _>>()?;
294
295    Ok(refs)
296}
297
298/// Search sections using FTS5
299#[cfg(test)]
300pub fn search_sections(
301    conn: &Connection,
302    query: &str,
303    spec_filter: Option<&str>,
304    limit: u32,
305) -> Result<Vec<(String, String, Option<String>)>> {
306    let sql = if let Some(_spec) = spec_filter {
307        "SELECT s.anchor, sp.name, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
308         FROM sections_fts
309         JOIN sections s ON sections_fts.rowid = s.id
310         JOIN snapshots sn ON s.snapshot_id = sn.id
311         JOIN specs sp ON sn.spec_id = sp.id
312         WHERE sections_fts MATCH ?1 AND sp.name = ?2          LIMIT ?3"
313    } else {
314        "SELECT s.anchor, sp.name, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
315         FROM sections_fts
316         JOIN sections s ON sections_fts.rowid = s.id
317         JOIN snapshots sn ON s.snapshot_id = sn.id
318         JOIN specs sp ON sn.spec_id = sp.id
319         WHERE sections_fts MATCH ?1          LIMIT ?2"
320    };
321
322    let mut stmt = conn.prepare(sql)?;
323
324    let results = if let Some(spec) = spec_filter {
325        stmt.query_map((query, spec, limit), |row| {
326            Ok((row.get(0)?, row.get(1)?, row.get(2)?))
327        })?
328        .collect::<Result<Vec<_>, _>>()?
329    } else {
330        stmt.query_map((query, limit), |row| {
331            Ok((row.get(0)?, row.get(1)?, row.get(2)?))
332        })?
333        .collect::<Result<Vec<_>, _>>()?
334    };
335
336    Ok(results)
337}
338
339/// Find anchors matching a pattern
340#[cfg(test)]
341pub fn find_anchors(
342    conn: &Connection,
343    pattern: &str,
344    spec_filter: Option<&str>,
345    limit: u32,
346) -> Result<Vec<(String, String)>> {
347    let sql = if let Some(_spec) = spec_filter {
348        "SELECT s.anchor, sp.name FROM sections s
349         JOIN snapshots sn ON s.snapshot_id = sn.id
350         JOIN specs sp ON sn.spec_id = sp.id
351         WHERE s.anchor LIKE ?1 AND sp.name = ?2          LIMIT ?3"
352    } else {
353        "SELECT s.anchor, sp.name FROM sections s
354         JOIN snapshots sn ON s.snapshot_id = sn.id
355         JOIN specs sp ON sn.spec_id = sp.id
356         WHERE s.anchor LIKE ?1          LIMIT ?2"
357    };
358
359    let mut stmt = conn.prepare(sql)?;
360
361    let results = if let Some(spec) = spec_filter {
362        stmt.query_map((pattern, spec, limit), |row| Ok((row.get(0)?, row.get(1)?)))?
363            .collect::<Result<Vec<_>, _>>()?
364    } else {
365        stmt.query_map((pattern, limit), |row| Ok((row.get(0)?, row.get(1)?)))?
366            .collect::<Result<Vec<_>, _>>()?
367    };
368
369    Ok(results)
370}
371
372/// List all headings in a spec
373pub fn list_headings(conn: &Connection, snapshot_id: i64) -> Result<Vec<ParsedSection>> {
374    let mut stmt = conn.prepare(
375        "SELECT anchor, title, content_text, section_type, parent_anchor, prev_anchor, next_anchor, depth
376         FROM sections
377         WHERE snapshot_id = ?1 AND section_type = 'heading'
378         ORDER BY rowid",
379    )?;
380
381    let sections = stmt
382        .query_map([snapshot_id], |row| {
383            Ok(ParsedSection {
384                anchor: row.get(0)?,
385                title: row.get(1)?,
386                content_text: row.get(2)?,
387                section_type: row
388                    .get::<_, String>(3)?
389                    .parse::<SectionType>()
390                    .map_err(|_| {
391                        rusqlite::Error::InvalidColumnType(
392                            3,
393                            "section_type".to_string(),
394                            rusqlite::types::Type::Text,
395                        )
396                    })?,
397                parent_anchor: row.get(4)?,
398                prev_anchor: row.get(5)?,
399                next_anchor: row.get(6)?,
400                depth: row.get(7)?,
401            })
402        })?
403        .collect::<Result<Vec<_>, _>>()?;
404
405    Ok(sections)
406}
407
408fn normalize_content(s: &str) -> String {
409    let collapsed: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
410    // Normalize intra-spec link targets. Single-page specs use absolute self-refs
411    // like "(https://html.spec.whatwg.org#anchor)" while multipage PR previews
412    // use relative refs like "(dom.html#anchor)". Strip everything between "]("
413    // and "#" in markdown links so both compare as "](#anchor)".
414    normalize_link_targets(&collapsed)
415}
416
417fn normalize_link_targets(s: &str) -> String {
418    let mut result = String::with_capacity(s.len());
419    let bytes = s.as_bytes();
420    let mut i = 0;
421    while i < bytes.len() {
422        // Look for "](" pattern — start of a markdown link target
423        if i + 1 < bytes.len() && bytes[i] == b']' && bytes[i + 1] == b'(' {
424            result.push_str("](");
425            i += 2;
426            // Find the next '#' or ')' — whichever comes first
427            let link_start = i;
428            let mut found_hash = false;
429            while i < bytes.len() && bytes[i] != b')' {
430                if bytes[i] == b'#' {
431                    found_hash = true;
432                    break;
433                }
434                i += 1;
435            }
436            if found_hash {
437                // Skip everything between "](" and "#", keep from "#" onwards
438                // This normalizes both "https://html.spec.whatwg.org#x" and "dom.html#x" to "#x"
439            } else {
440                // No fragment — keep the URL as-is
441                let url = std::str::from_utf8(&bytes[link_start..i]).unwrap();
442                result.push_str(url);
443            }
444        } else {
445            result.push(bytes[i] as char);
446            i += 1;
447        }
448    }
449    result
450}
451
452fn content_eq(a: Option<&str>, b: Option<&str>) -> bool {
453    match (a, b) {
454        (None, None) => true,
455        (Some(a), Some(b)) => normalize_content(a) == normalize_content(b),
456        _ => false,
457    }
458}
459
460/// Compute a diff between a PR snapshot and its merge base snapshot.
461/// Returns entries for sections that were added or modified in the PR.
462pub fn compute_pr_diff(
463    conn: &Connection,
464    pr_snapshot_id: i64,
465    base_snapshot_id: i64,
466) -> Result<Vec<PrDiffEntry>> {
467    let mut pr_stmt =
468        conn.prepare("SELECT anchor, title, content_text FROM sections WHERE snapshot_id = ?1")?;
469    let pr_sections: HashMap<String, (Option<String>, Option<String>)> = pr_stmt
470        .query_map([pr_snapshot_id], |row| {
471            Ok((row.get::<_, String>(0)?, (row.get(1)?, row.get(2)?)))
472        })?
473        .collect::<Result<HashMap<_, _>, _>>()?;
474
475    let mut diffs = Vec::new();
476
477    for (anchor, (title, new_content)) in &pr_sections {
478        match get_section(conn, base_snapshot_id, anchor)? {
479            Some(base_section) => {
480                if !content_eq(base_section.content_text.as_deref(), new_content.as_deref()) {
481                    diffs.push(PrDiffEntry {
482                        anchor: anchor.clone(),
483                        title: title.clone(),
484                        change_type: "modified".to_string(),
485                        old_content: base_section.content_text,
486                        new_content: new_content.clone(),
487                    });
488                }
489            }
490            None => {
491                diffs.push(PrDiffEntry {
492                    anchor: anchor.clone(),
493                    title: title.clone(),
494                    change_type: "added".to_string(),
495                    old_content: None,
496                    new_content: new_content.clone(),
497                });
498            }
499        }
500    }
501
502    diffs.sort_by(|a, b| a.anchor.cmp(&b.anchor));
503    Ok(diffs)
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509    use crate::db::{self, write};
510
511    fn setup_test_data(conn: &Connection) -> Result<i64> {
512        let spec_id =
513            write::insert_or_get_spec(conn, "HTML", "https://html.spec.whatwg.org", "whatwg")?;
514        let snapshot_id =
515            write::insert_snapshot(conn, spec_id, "hash:abc123", "2026-01-01T00:00:00Z")?;
516
517        let sections = vec![
518            ParsedSection {
519                anchor: "intro".to_string(),
520                title: Some("Introduction".to_string()),
521                content_text: Some("This is an introduction to HTML".to_string()),
522                section_type: SectionType::Heading,
523                parent_anchor: None,
524                prev_anchor: None,
525                next_anchor: Some("details".to_string()),
526                depth: Some(2),
527            },
528            ParsedSection {
529                anchor: "details".to_string(),
530                title: Some("Details".to_string()),
531                content_text: Some("More details about the specification".to_string()),
532                section_type: SectionType::Heading,
533                parent_anchor: Some("intro".to_string()),
534                prev_anchor: Some("intro".to_string()),
535                next_anchor: None,
536                depth: Some(3),
537            },
538        ];
539
540        write::insert_sections_bulk(conn, snapshot_id, &sections)?;
541
542        Ok(snapshot_id)
543    }
544
545    #[test]
546    fn test_get_snapshot() {
547        let conn = db::open_test_db().unwrap();
548        let snapshot_id = setup_test_data(&conn).unwrap();
549
550        let result = get_snapshot(&conn, "HTML").unwrap();
551        assert_eq!(result, Some(snapshot_id));
552
553        let result = get_snapshot(&conn, "NonExistent").unwrap();
554        assert_eq!(result, None);
555    }
556
557    #[test]
558    fn test_get_spec_meta_and_list_specs() {
559        let conn = db::open_test_db().unwrap();
560        setup_test_data(&conn).unwrap();
561
562        let meta = get_spec_meta(&conn, "html").unwrap().unwrap();
563        assert_eq!(meta.0, "HTML");
564        assert_eq!(meta.1, "https://html.spec.whatwg.org");
565        assert_eq!(meta.2, "whatwg");
566
567        let specs = list_specs(&conn).unwrap();
568        assert_eq!(specs.len(), 1);
569        assert_eq!(specs[0].0, "HTML");
570    }
571
572    #[test]
573    fn test_get_section() {
574        let conn = db::open_test_db().unwrap();
575        let snapshot_id = setup_test_data(&conn).unwrap();
576
577        let section = get_section(&conn, snapshot_id, "intro").unwrap();
578        assert!(section.is_some());
579        let section = section.unwrap();
580        assert_eq!(section.anchor, "intro");
581        assert_eq!(section.title, Some("Introduction".to_string()));
582
583        let section = get_section(&conn, snapshot_id, "nonexistent").unwrap();
584        assert!(section.is_none());
585    }
586
587    #[test]
588    fn test_get_children() {
589        let conn = db::open_test_db().unwrap();
590        let snapshot_id = setup_test_data(&conn).unwrap();
591
592        let children = get_children(&conn, snapshot_id, "intro").unwrap();
593        assert_eq!(children.len(), 1);
594        assert_eq!(children[0].0, "details");
595    }
596
597    #[test]
598    fn test_search_sections() {
599        let conn = db::open_test_db().unwrap();
600        setup_test_data(&conn).unwrap();
601
602        let results = search_sections(&conn, "introduction", None, 10).unwrap();
603        assert!(!results.is_empty());
604        assert_eq!(results[0].0, "intro");
605    }
606
607    #[test]
608    fn test_find_anchors() {
609        let conn = db::open_test_db().unwrap();
610        setup_test_data(&conn).unwrap();
611
612        let results = find_anchors(&conn, "intro%", None, 10).unwrap();
613        assert!(!results.is_empty());
614        assert_eq!(results[0].0, "intro");
615    }
616
617    #[test]
618    fn test_list_headings() {
619        let conn = db::open_test_db().unwrap();
620        let snapshot_id = setup_test_data(&conn).unwrap();
621
622        let headings = list_headings(&conn, snapshot_id).unwrap();
623        assert_eq!(headings.len(), 2);
624        assert_eq!(headings[0].anchor, "intro");
625        assert_eq!(headings[1].anchor, "details");
626    }
627
628    #[test]
629    fn test_get_pr_snapshot() {
630        let conn = db::open_test_db().unwrap();
631        let spec_id =
632            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
633                .unwrap();
634
635        // No PR snapshot yet
636        let result = get_pr_snapshot(&conn, "HTML", 12345).unwrap();
637        assert!(result.is_none());
638
639        // Insert PR snapshot
640        let snap_id = write::insert_pr_snapshot(
641            &conn,
642            spec_id,
643            "pr-sha",
644            "2026-01-01T00:00:00Z",
645            12345,
646            "base-sha",
647            &[],
648        )
649        .unwrap();
650
651        let result = get_pr_snapshot(&conn, "HTML", 12345).unwrap();
652        assert_eq!(result, Some((snap_id, "base-sha".to_string())));
653    }
654
655    #[test]
656    fn test_get_commit_snapshot() {
657        let conn = db::open_test_db().unwrap();
658        let spec_id =
659            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
660                .unwrap();
661
662        // Insert a commit snapshot (regular snapshot with a real SHA)
663        let snap_id =
664            write::insert_snapshot(&conn, spec_id, "abc123full", "2026-01-01T00:00:00Z").unwrap();
665
666        let result = get_commit_snapshot(&conn, spec_id, "abc123full").unwrap();
667        assert_eq!(result, Some(snap_id));
668
669        let result = get_commit_snapshot(&conn, spec_id, "nonexistent").unwrap();
670        assert!(result.is_none());
671    }
672
673    #[test]
674    fn test_get_update_check() {
675        let conn = db::open_test_db().unwrap();
676        let spec_id =
677            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
678                .unwrap();
679
680        write::record_update_check(
681            &conn,
682            spec_id,
683            "2026-01-01T00:00:00Z",
684            Some("2026-01-01T00:00:00Z"),
685            Some("abc123"),
686        )
687        .unwrap();
688
689        let state = get_update_check(&conn, spec_id).unwrap().unwrap();
690        assert_eq!(state.last_checked.to_rfc3339(), "2026-01-01T00:00:00+00:00");
691        assert_eq!(
692            state.last_indexed.as_ref().unwrap().to_rfc3339(),
693            "2026-01-01T00:00:00+00:00"
694        );
695        assert_eq!(state.content_hash.as_deref(), Some("abc123"));
696    }
697
698    #[test]
699    fn test_compute_pr_diff() {
700        let conn = db::open_test_db().unwrap();
701        let spec_id =
702            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
703                .unwrap();
704
705        let base_id =
706            write::insert_snapshot(&conn, spec_id, "base", "2026-01-01T00:00:00Z").unwrap();
707        write::insert_sections_bulk(
708            &conn,
709            base_id,
710            &[
711                ParsedSection {
712                    anchor: "sec-a".into(),
713                    title: Some("A".into()),
714                    content_text: Some("Original A".into()),
715                    section_type: SectionType::Heading,
716                    parent_anchor: None,
717                    prev_anchor: None,
718                    next_anchor: None,
719                    depth: Some(2),
720                },
721                ParsedSection {
722                    anchor: "sec-b".into(),
723                    title: Some("B".into()),
724                    content_text: Some("Original B".into()),
725                    section_type: SectionType::Heading,
726                    parent_anchor: None,
727                    prev_anchor: None,
728                    next_anchor: None,
729                    depth: Some(2),
730                },
731            ],
732        )
733        .unwrap();
734
735        let pr_id = write::insert_pr_snapshot(
736            &conn,
737            spec_id,
738            "pr-sha",
739            "2026-01-01T00:00:00Z",
740            99,
741            "base",
742            &[],
743        )
744        .unwrap();
745        write::insert_sections_bulk(
746            &conn,
747            pr_id,
748            &[
749                ParsedSection {
750                    anchor: "sec-a".into(),
751                    title: Some("A".into()),
752                    content_text: Some("Modified A".into()),
753                    section_type: SectionType::Heading,
754                    parent_anchor: None,
755                    prev_anchor: None,
756                    next_anchor: None,
757                    depth: Some(2),
758                },
759                ParsedSection {
760                    anchor: "sec-d".into(),
761                    title: Some("D".into()),
762                    content_text: Some("New D".into()),
763                    section_type: SectionType::Heading,
764                    parent_anchor: None,
765                    prev_anchor: None,
766                    next_anchor: None,
767                    depth: Some(2),
768                },
769            ],
770        )
771        .unwrap();
772
773        let diff = compute_pr_diff(&conn, pr_id, base_id).unwrap();
774        assert_eq!(diff.len(), 2);
775
776        let modified: Vec<_> = diff
777            .iter()
778            .filter(|d| d.change_type == "modified")
779            .collect();
780        assert_eq!(modified.len(), 1);
781        assert_eq!(modified[0].anchor, "sec-a");
782
783        let added: Vec<_> = diff.iter().filter(|d| d.change_type == "added").collect();
784        assert_eq!(added.len(), 1);
785        assert_eq!(added[0].anchor, "sec-d");
786    }
787
788    #[test]
789    fn test_compute_pr_diff_ignores_whitespace_only_changes() {
790        let conn = db::open_test_db().unwrap();
791        let spec_id =
792            write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
793                .unwrap();
794
795        let base_id =
796            write::insert_snapshot(&conn, spec_id, "hash:base2", "2026-01-01T00:00:00Z").unwrap();
797        write::insert_sections_bulk(
798            &conn,
799            base_id,
800            &[
801                ParsedSection {
802                    anchor: "sec-a".into(),
803                    title: Some("A".into()),
804                    content_text: Some("Hello  world\n\nfoo".into()),
805                    section_type: SectionType::Heading,
806                    parent_anchor: None,
807                    prev_anchor: None,
808                    next_anchor: None,
809                    depth: Some(2),
810                },
811                ParsedSection {
812                    anchor: "sec-b".into(),
813                    title: Some("B".into()),
814                    content_text: Some("Real content B".into()),
815                    section_type: SectionType::Heading,
816                    parent_anchor: None,
817                    prev_anchor: None,
818                    next_anchor: None,
819                    depth: Some(2),
820                },
821            ],
822        )
823        .unwrap();
824
825        let pr_id = write::insert_pr_snapshot(
826            &conn,
827            spec_id,
828            "pr-sha2",
829            "2026-01-01T00:00:00Z",
830            98,
831            "hash:base2",
832            &[],
833        )
834        .unwrap();
835        write::insert_sections_bulk(
836            &conn,
837            pr_id,
838            &[
839                ParsedSection {
840                    anchor: "sec-a".into(),
841                    title: Some("A".into()),
842                    content_text: Some("Hello world\n\nfoo".into()),
843                    section_type: SectionType::Heading,
844                    parent_anchor: None,
845                    prev_anchor: None,
846                    next_anchor: None,
847                    depth: Some(2),
848                },
849                ParsedSection {
850                    anchor: "sec-b".into(),
851                    title: Some("B".into()),
852                    content_text: Some("Modified content B".into()),
853                    section_type: SectionType::Heading,
854                    parent_anchor: None,
855                    prev_anchor: None,
856                    next_anchor: None,
857                    depth: Some(2),
858                },
859            ],
860        )
861        .unwrap();
862
863        let diff = compute_pr_diff(&conn, pr_id, base_id).unwrap();
864        assert_eq!(diff.len(), 1);
865        assert_eq!(diff[0].anchor, "sec-b");
866        assert_eq!(diff[0].change_type, "modified");
867    }
868}