1use crate::model::{ParsedSection, PrDiffEntry, SectionType};
3use anyhow::Result;
4use chrono::{DateTime, Utc};
5use rusqlite::Connection;
6use std::collections::HashMap;
7
8#[derive(Debug, Clone)]
9pub struct UpdateCheckState {
10 pub last_checked: DateTime<Utc>,
11 pub last_indexed: Option<DateTime<Utc>>,
12 pub content_hash: Option<String>,
13}
14
15pub type PrSnapshotRow = (String, i64, String, String, i64);
16
17pub fn list_pr_snapshots(conn: &Connection) -> Result<Vec<PrSnapshotRow>> {
19 let mut stmt = conn.prepare(
20 "SELECT sp.name, s.pr_number, s.sha, s.indexed_at,
21 (SELECT COUNT(*) FROM sections sec WHERE sec.snapshot_id = s.id)
22 FROM snapshots s
23 JOIN specs sp ON s.spec_id = sp.id
24 WHERE s.pr_number IS NOT NULL
25 ORDER BY sp.name, s.pr_number",
26 )?;
27 let rows = stmt
28 .query_map([], |row| {
29 Ok((
30 row.get(0)?,
31 row.get(1)?,
32 row.get(2)?,
33 row.get(3)?,
34 row.get(4)?,
35 ))
36 })?
37 .collect::<Result<Vec<_>, _>>()?;
38 Ok(rows)
39}
40
41pub fn get_pr_snapshot(
44 conn: &Connection,
45 spec_name: &str,
46 pr_number: i64,
47) -> Result<Option<(i64, String)>> {
48 let result = conn.query_row(
49 "SELECT s.id, s.merge_base_sha FROM snapshots s
50 JOIN specs sp ON s.spec_id = sp.id
51 WHERE sp.name = ?1 AND s.pr_number = ?2",
52 (spec_name, pr_number),
53 |row| Ok((row.get(0)?, row.get::<_, String>(1)?)),
54 );
55 match result {
56 Ok(r) => Ok(Some(r)),
57 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
58 Err(e) => Err(e.into()),
59 }
60}
61
62pub fn get_commit_snapshot(conn: &Connection, spec_id: i64, sha: &str) -> Result<Option<i64>> {
65 let result = conn.query_row(
66 "SELECT id FROM snapshots
67 WHERE spec_id = ?1 AND sha = ?2 AND pr_number IS NULL",
68 (spec_id, sha),
69 |row| row.get(0),
70 );
71 match result {
72 Ok(id) => Ok(Some(id)),
73 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
74 Err(e) => Err(e.into()),
75 }
76}
77
78pub fn get_pr_pages(conn: &Connection, snapshot_id: i64) -> Result<Vec<String>> {
80 let pages: Option<String> = conn.query_row(
81 "SELECT pr_pages FROM snapshots WHERE id = ?1",
82 [snapshot_id],
83 |row| row.get(0),
84 )?;
85 Ok(pages
86 .map(|s| {
87 s.split(',')
88 .filter(|p| !p.is_empty())
89 .map(|p| p.to_string())
90 .collect()
91 })
92 .unwrap_or_default())
93}
94
95pub fn get_snapshot(conn: &Connection, spec_name: &str) -> Result<Option<i64>> {
97 let result = conn.query_row(
98 "SELECT s.id FROM snapshots s
99 JOIN specs sp ON s.spec_id = sp.id
100 WHERE sp.name = ?1 AND s.pr_number IS NULL AND s.sha LIKE 'hash:%'",
101 [spec_name],
102 |row| row.get(0),
103 );
104
105 match result {
106 Ok(id) => Ok(Some(id)),
107 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
108 Err(e) => Err(e.into()),
109 }
110}
111
112pub fn get_spec_meta(
115 conn: &Connection,
116 spec_name: &str,
117) -> Result<Option<(String, String, String)>> {
118 let row = conn.query_row(
119 "SELECT name, base_url, provider
120 FROM specs
121 WHERE LOWER(name) = LOWER(?1)
122 LIMIT 1",
123 [spec_name],
124 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
125 );
126
127 match row {
128 Ok(meta) => Ok(Some(meta)),
129 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
130 Err(e) => Err(e.into()),
131 }
132}
133
134pub fn list_specs(conn: &Connection) -> Result<Vec<(String, String, String)>> {
136 let mut stmt = conn.prepare(
137 "SELECT name, base_url, provider
138 FROM specs
139 ORDER BY name",
140 )?;
141
142 let rows = stmt
143 .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?
144 .collect::<Result<Vec<_>, _>>()?;
145 Ok(rows)
146}
147
148pub fn get_update_check(conn: &Connection, spec_id: i64) -> Result<Option<UpdateCheckState>> {
150 let row = conn.query_row(
151 "SELECT last_checked, last_indexed, content_hash
152 FROM update_checks
153 WHERE spec_id = ?1",
154 [spec_id],
155 |row| {
156 Ok((
157 row.get::<_, String>(0)?,
158 row.get::<_, Option<String>>(1)?,
159 row.get::<_, Option<String>>(2)?,
160 ))
161 },
162 );
163
164 match row {
165 Ok((checked, indexed, content_hash)) => {
166 let last_checked = DateTime::parse_from_rfc3339(&checked)
167 .map(|d| d.with_timezone(&Utc))
168 .map_err(|e| {
169 rusqlite::Error::InvalidColumnType(
170 0,
171 e.to_string(),
172 rusqlite::types::Type::Text,
173 )
174 })?;
175
176 let last_indexed = match indexed {
177 Some(value) => Some(
178 DateTime::parse_from_rfc3339(&value)
179 .map(|d| d.with_timezone(&Utc))
180 .map_err(|e| {
181 rusqlite::Error::InvalidColumnType(
182 1,
183 e.to_string(),
184 rusqlite::types::Type::Text,
185 )
186 })?,
187 ),
188 None => None,
189 };
190
191 Ok(Some(UpdateCheckState {
192 last_checked,
193 last_indexed,
194 content_hash,
195 }))
196 }
197 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
198 Err(e) => Err(e.into()),
199 }
200}
201
202pub fn get_section(
204 conn: &Connection,
205 snapshot_id: i64,
206 anchor: &str,
207) -> Result<Option<ParsedSection>> {
208 let result = conn.query_row(
209 "SELECT anchor, title, content_text, section_type, parent_anchor, prev_anchor, next_anchor, depth
210 FROM sections
211 WHERE snapshot_id = ?1 AND anchor = ?2",
212 (snapshot_id, anchor),
213 |row| {
214 Ok(ParsedSection {
215 anchor: row.get(0)?,
216 title: row.get(1)?,
217 content_text: row.get(2)?,
218 section_type: row.get::<_, String>(3)?.parse::<SectionType>()
219 .map_err(|_| rusqlite::Error::InvalidColumnType(3, "section_type".to_string(), rusqlite::types::Type::Text))?,
220 parent_anchor: row.get(4)?,
221 prev_anchor: row.get(5)?,
222 next_anchor: row.get(6)?,
223 depth: row.get(7)?,
224 })
225 },
226 );
227
228 match result {
229 Ok(section) => Ok(Some(section)),
230 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
231 Err(e) => Err(e.into()),
232 }
233}
234
235pub fn get_children(
237 conn: &Connection,
238 snapshot_id: i64,
239 parent_anchor: &str,
240) -> Result<Vec<(String, Option<String>)>> {
241 let mut stmt = conn.prepare(
242 "SELECT anchor, title FROM sections
243 WHERE snapshot_id = ?1 AND parent_anchor = ?2
244 ORDER BY rowid",
245 )?;
246
247 let children = stmt
248 .query_map((snapshot_id, parent_anchor), |row| {
249 Ok((row.get(0)?, row.get(1)?))
250 })?
251 .collect::<Result<Vec<_>, _>>()?;
252
253 Ok(children)
254}
255
256pub fn get_outgoing_refs(
258 conn: &Connection,
259 snapshot_id: i64,
260 from_anchor: &str,
261) -> Result<Vec<(String, String)>> {
262 let mut stmt = conn.prepare(
263 "SELECT to_spec, to_anchor FROM refs
264 WHERE snapshot_id = ?1 AND from_anchor = ?2",
265 )?;
266
267 let refs = stmt
268 .query_map((snapshot_id, from_anchor), |row| {
269 Ok((row.get(0)?, row.get(1)?))
270 })?
271 .collect::<Result<Vec<_>, _>>()?;
272
273 Ok(refs)
274}
275
276pub fn get_incoming_refs(
280 conn: &Connection,
281 to_spec: &str,
282 to_anchor: &str,
283) -> Result<Vec<(String, String)>> {
284 let mut stmt = conn.prepare(
285 "SELECT sp.name, r.from_anchor FROM refs r
286 JOIN snapshots sn ON r.snapshot_id = sn.id
287 JOIN specs sp ON sn.spec_id = sp.id
288 WHERE r.to_spec = ?1 AND r.to_anchor = ?2 AND sn.pr_number IS NULL AND sn.sha LIKE 'hash:%'",
289 )?;
290
291 let refs = stmt
292 .query_map((to_spec, to_anchor), |row| Ok((row.get(0)?, row.get(1)?)))?
293 .collect::<Result<Vec<_>, _>>()?;
294
295 Ok(refs)
296}
297
298#[cfg(test)]
300pub fn search_sections(
301 conn: &Connection,
302 query: &str,
303 spec_filter: Option<&str>,
304 limit: u32,
305) -> Result<Vec<(String, String, Option<String>)>> {
306 let sql = if let Some(_spec) = spec_filter {
307 "SELECT s.anchor, sp.name, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
308 FROM sections_fts
309 JOIN sections s ON sections_fts.rowid = s.id
310 JOIN snapshots sn ON s.snapshot_id = sn.id
311 JOIN specs sp ON sn.spec_id = sp.id
312 WHERE sections_fts MATCH ?1 AND sp.name = ?2 LIMIT ?3"
313 } else {
314 "SELECT s.anchor, sp.name, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 64)
315 FROM sections_fts
316 JOIN sections s ON sections_fts.rowid = s.id
317 JOIN snapshots sn ON s.snapshot_id = sn.id
318 JOIN specs sp ON sn.spec_id = sp.id
319 WHERE sections_fts MATCH ?1 LIMIT ?2"
320 };
321
322 let mut stmt = conn.prepare(sql)?;
323
324 let results = if let Some(spec) = spec_filter {
325 stmt.query_map((query, spec, limit), |row| {
326 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
327 })?
328 .collect::<Result<Vec<_>, _>>()?
329 } else {
330 stmt.query_map((query, limit), |row| {
331 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
332 })?
333 .collect::<Result<Vec<_>, _>>()?
334 };
335
336 Ok(results)
337}
338
339#[cfg(test)]
341pub fn find_anchors(
342 conn: &Connection,
343 pattern: &str,
344 spec_filter: Option<&str>,
345 limit: u32,
346) -> Result<Vec<(String, String)>> {
347 let sql = if let Some(_spec) = spec_filter {
348 "SELECT s.anchor, sp.name FROM sections s
349 JOIN snapshots sn ON s.snapshot_id = sn.id
350 JOIN specs sp ON sn.spec_id = sp.id
351 WHERE s.anchor LIKE ?1 AND sp.name = ?2 LIMIT ?3"
352 } else {
353 "SELECT s.anchor, sp.name FROM sections s
354 JOIN snapshots sn ON s.snapshot_id = sn.id
355 JOIN specs sp ON sn.spec_id = sp.id
356 WHERE s.anchor LIKE ?1 LIMIT ?2"
357 };
358
359 let mut stmt = conn.prepare(sql)?;
360
361 let results = if let Some(spec) = spec_filter {
362 stmt.query_map((pattern, spec, limit), |row| Ok((row.get(0)?, row.get(1)?)))?
363 .collect::<Result<Vec<_>, _>>()?
364 } else {
365 stmt.query_map((pattern, limit), |row| Ok((row.get(0)?, row.get(1)?)))?
366 .collect::<Result<Vec<_>, _>>()?
367 };
368
369 Ok(results)
370}
371
372pub fn list_headings(conn: &Connection, snapshot_id: i64) -> Result<Vec<ParsedSection>> {
374 let mut stmt = conn.prepare(
375 "SELECT anchor, title, content_text, section_type, parent_anchor, prev_anchor, next_anchor, depth
376 FROM sections
377 WHERE snapshot_id = ?1 AND section_type = 'heading'
378 ORDER BY rowid",
379 )?;
380
381 let sections = stmt
382 .query_map([snapshot_id], |row| {
383 Ok(ParsedSection {
384 anchor: row.get(0)?,
385 title: row.get(1)?,
386 content_text: row.get(2)?,
387 section_type: row
388 .get::<_, String>(3)?
389 .parse::<SectionType>()
390 .map_err(|_| {
391 rusqlite::Error::InvalidColumnType(
392 3,
393 "section_type".to_string(),
394 rusqlite::types::Type::Text,
395 )
396 })?,
397 parent_anchor: row.get(4)?,
398 prev_anchor: row.get(5)?,
399 next_anchor: row.get(6)?,
400 depth: row.get(7)?,
401 })
402 })?
403 .collect::<Result<Vec<_>, _>>()?;
404
405 Ok(sections)
406}
407
408fn normalize_content(s: &str) -> String {
409 let collapsed: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
410 normalize_link_targets(&collapsed)
415}
416
417fn normalize_link_targets(s: &str) -> String {
418 let mut result = String::with_capacity(s.len());
419 let bytes = s.as_bytes();
420 let mut i = 0;
421 while i < bytes.len() {
422 if i + 1 < bytes.len() && bytes[i] == b']' && bytes[i + 1] == b'(' {
424 result.push_str("](");
425 i += 2;
426 let link_start = i;
428 let mut found_hash = false;
429 while i < bytes.len() && bytes[i] != b')' {
430 if bytes[i] == b'#' {
431 found_hash = true;
432 break;
433 }
434 i += 1;
435 }
436 if found_hash {
437 } else {
440 let url = std::str::from_utf8(&bytes[link_start..i]).unwrap();
442 result.push_str(url);
443 }
444 } else {
445 result.push(bytes[i] as char);
446 i += 1;
447 }
448 }
449 result
450}
451
452fn content_eq(a: Option<&str>, b: Option<&str>) -> bool {
453 match (a, b) {
454 (None, None) => true,
455 (Some(a), Some(b)) => normalize_content(a) == normalize_content(b),
456 _ => false,
457 }
458}
459
460pub fn compute_pr_diff(
463 conn: &Connection,
464 pr_snapshot_id: i64,
465 base_snapshot_id: i64,
466) -> Result<Vec<PrDiffEntry>> {
467 let mut pr_stmt =
468 conn.prepare("SELECT anchor, title, content_text FROM sections WHERE snapshot_id = ?1")?;
469 let pr_sections: HashMap<String, (Option<String>, Option<String>)> = pr_stmt
470 .query_map([pr_snapshot_id], |row| {
471 Ok((row.get::<_, String>(0)?, (row.get(1)?, row.get(2)?)))
472 })?
473 .collect::<Result<HashMap<_, _>, _>>()?;
474
475 let mut diffs = Vec::new();
476
477 for (anchor, (title, new_content)) in &pr_sections {
478 match get_section(conn, base_snapshot_id, anchor)? {
479 Some(base_section) => {
480 if !content_eq(base_section.content_text.as_deref(), new_content.as_deref()) {
481 diffs.push(PrDiffEntry {
482 anchor: anchor.clone(),
483 title: title.clone(),
484 change_type: "modified".to_string(),
485 old_content: base_section.content_text,
486 new_content: new_content.clone(),
487 });
488 }
489 }
490 None => {
491 diffs.push(PrDiffEntry {
492 anchor: anchor.clone(),
493 title: title.clone(),
494 change_type: "added".to_string(),
495 old_content: None,
496 new_content: new_content.clone(),
497 });
498 }
499 }
500 }
501
502 diffs.sort_by(|a, b| a.anchor.cmp(&b.anchor));
503 Ok(diffs)
504}
505
506#[cfg(test)]
507mod tests {
508 use super::*;
509 use crate::db::{self, write};
510
511 fn setup_test_data(conn: &Connection) -> Result<i64> {
512 let spec_id =
513 write::insert_or_get_spec(conn, "HTML", "https://html.spec.whatwg.org", "whatwg")?;
514 let snapshot_id =
515 write::insert_snapshot(conn, spec_id, "hash:abc123", "2026-01-01T00:00:00Z")?;
516
517 let sections = vec![
518 ParsedSection {
519 anchor: "intro".to_string(),
520 title: Some("Introduction".to_string()),
521 content_text: Some("This is an introduction to HTML".to_string()),
522 section_type: SectionType::Heading,
523 parent_anchor: None,
524 prev_anchor: None,
525 next_anchor: Some("details".to_string()),
526 depth: Some(2),
527 },
528 ParsedSection {
529 anchor: "details".to_string(),
530 title: Some("Details".to_string()),
531 content_text: Some("More details about the specification".to_string()),
532 section_type: SectionType::Heading,
533 parent_anchor: Some("intro".to_string()),
534 prev_anchor: Some("intro".to_string()),
535 next_anchor: None,
536 depth: Some(3),
537 },
538 ];
539
540 write::insert_sections_bulk(conn, snapshot_id, §ions)?;
541
542 Ok(snapshot_id)
543 }
544
545 #[test]
546 fn test_get_snapshot() {
547 let conn = db::open_test_db().unwrap();
548 let snapshot_id = setup_test_data(&conn).unwrap();
549
550 let result = get_snapshot(&conn, "HTML").unwrap();
551 assert_eq!(result, Some(snapshot_id));
552
553 let result = get_snapshot(&conn, "NonExistent").unwrap();
554 assert_eq!(result, None);
555 }
556
557 #[test]
558 fn test_get_spec_meta_and_list_specs() {
559 let conn = db::open_test_db().unwrap();
560 setup_test_data(&conn).unwrap();
561
562 let meta = get_spec_meta(&conn, "html").unwrap().unwrap();
563 assert_eq!(meta.0, "HTML");
564 assert_eq!(meta.1, "https://html.spec.whatwg.org");
565 assert_eq!(meta.2, "whatwg");
566
567 let specs = list_specs(&conn).unwrap();
568 assert_eq!(specs.len(), 1);
569 assert_eq!(specs[0].0, "HTML");
570 }
571
572 #[test]
573 fn test_get_section() {
574 let conn = db::open_test_db().unwrap();
575 let snapshot_id = setup_test_data(&conn).unwrap();
576
577 let section = get_section(&conn, snapshot_id, "intro").unwrap();
578 assert!(section.is_some());
579 let section = section.unwrap();
580 assert_eq!(section.anchor, "intro");
581 assert_eq!(section.title, Some("Introduction".to_string()));
582
583 let section = get_section(&conn, snapshot_id, "nonexistent").unwrap();
584 assert!(section.is_none());
585 }
586
587 #[test]
588 fn test_get_children() {
589 let conn = db::open_test_db().unwrap();
590 let snapshot_id = setup_test_data(&conn).unwrap();
591
592 let children = get_children(&conn, snapshot_id, "intro").unwrap();
593 assert_eq!(children.len(), 1);
594 assert_eq!(children[0].0, "details");
595 }
596
597 #[test]
598 fn test_search_sections() {
599 let conn = db::open_test_db().unwrap();
600 setup_test_data(&conn).unwrap();
601
602 let results = search_sections(&conn, "introduction", None, 10).unwrap();
603 assert!(!results.is_empty());
604 assert_eq!(results[0].0, "intro");
605 }
606
607 #[test]
608 fn test_find_anchors() {
609 let conn = db::open_test_db().unwrap();
610 setup_test_data(&conn).unwrap();
611
612 let results = find_anchors(&conn, "intro%", None, 10).unwrap();
613 assert!(!results.is_empty());
614 assert_eq!(results[0].0, "intro");
615 }
616
617 #[test]
618 fn test_list_headings() {
619 let conn = db::open_test_db().unwrap();
620 let snapshot_id = setup_test_data(&conn).unwrap();
621
622 let headings = list_headings(&conn, snapshot_id).unwrap();
623 assert_eq!(headings.len(), 2);
624 assert_eq!(headings[0].anchor, "intro");
625 assert_eq!(headings[1].anchor, "details");
626 }
627
628 #[test]
629 fn test_get_pr_snapshot() {
630 let conn = db::open_test_db().unwrap();
631 let spec_id =
632 write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
633 .unwrap();
634
635 let result = get_pr_snapshot(&conn, "HTML", 12345).unwrap();
637 assert!(result.is_none());
638
639 let snap_id = write::insert_pr_snapshot(
641 &conn,
642 spec_id,
643 "pr-sha",
644 "2026-01-01T00:00:00Z",
645 12345,
646 "base-sha",
647 &[],
648 )
649 .unwrap();
650
651 let result = get_pr_snapshot(&conn, "HTML", 12345).unwrap();
652 assert_eq!(result, Some((snap_id, "base-sha".to_string())));
653 }
654
655 #[test]
656 fn test_get_commit_snapshot() {
657 let conn = db::open_test_db().unwrap();
658 let spec_id =
659 write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
660 .unwrap();
661
662 let snap_id =
664 write::insert_snapshot(&conn, spec_id, "abc123full", "2026-01-01T00:00:00Z").unwrap();
665
666 let result = get_commit_snapshot(&conn, spec_id, "abc123full").unwrap();
667 assert_eq!(result, Some(snap_id));
668
669 let result = get_commit_snapshot(&conn, spec_id, "nonexistent").unwrap();
670 assert!(result.is_none());
671 }
672
673 #[test]
674 fn test_get_update_check() {
675 let conn = db::open_test_db().unwrap();
676 let spec_id =
677 write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
678 .unwrap();
679
680 write::record_update_check(
681 &conn,
682 spec_id,
683 "2026-01-01T00:00:00Z",
684 Some("2026-01-01T00:00:00Z"),
685 Some("abc123"),
686 )
687 .unwrap();
688
689 let state = get_update_check(&conn, spec_id).unwrap().unwrap();
690 assert_eq!(state.last_checked.to_rfc3339(), "2026-01-01T00:00:00+00:00");
691 assert_eq!(
692 state.last_indexed.as_ref().unwrap().to_rfc3339(),
693 "2026-01-01T00:00:00+00:00"
694 );
695 assert_eq!(state.content_hash.as_deref(), Some("abc123"));
696 }
697
698 #[test]
699 fn test_compute_pr_diff() {
700 let conn = db::open_test_db().unwrap();
701 let spec_id =
702 write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
703 .unwrap();
704
705 let base_id =
706 write::insert_snapshot(&conn, spec_id, "base", "2026-01-01T00:00:00Z").unwrap();
707 write::insert_sections_bulk(
708 &conn,
709 base_id,
710 &[
711 ParsedSection {
712 anchor: "sec-a".into(),
713 title: Some("A".into()),
714 content_text: Some("Original A".into()),
715 section_type: SectionType::Heading,
716 parent_anchor: None,
717 prev_anchor: None,
718 next_anchor: None,
719 depth: Some(2),
720 },
721 ParsedSection {
722 anchor: "sec-b".into(),
723 title: Some("B".into()),
724 content_text: Some("Original B".into()),
725 section_type: SectionType::Heading,
726 parent_anchor: None,
727 prev_anchor: None,
728 next_anchor: None,
729 depth: Some(2),
730 },
731 ],
732 )
733 .unwrap();
734
735 let pr_id = write::insert_pr_snapshot(
736 &conn,
737 spec_id,
738 "pr-sha",
739 "2026-01-01T00:00:00Z",
740 99,
741 "base",
742 &[],
743 )
744 .unwrap();
745 write::insert_sections_bulk(
746 &conn,
747 pr_id,
748 &[
749 ParsedSection {
750 anchor: "sec-a".into(),
751 title: Some("A".into()),
752 content_text: Some("Modified A".into()),
753 section_type: SectionType::Heading,
754 parent_anchor: None,
755 prev_anchor: None,
756 next_anchor: None,
757 depth: Some(2),
758 },
759 ParsedSection {
760 anchor: "sec-d".into(),
761 title: Some("D".into()),
762 content_text: Some("New D".into()),
763 section_type: SectionType::Heading,
764 parent_anchor: None,
765 prev_anchor: None,
766 next_anchor: None,
767 depth: Some(2),
768 },
769 ],
770 )
771 .unwrap();
772
773 let diff = compute_pr_diff(&conn, pr_id, base_id).unwrap();
774 assert_eq!(diff.len(), 2);
775
776 let modified: Vec<_> = diff
777 .iter()
778 .filter(|d| d.change_type == "modified")
779 .collect();
780 assert_eq!(modified.len(), 1);
781 assert_eq!(modified[0].anchor, "sec-a");
782
783 let added: Vec<_> = diff.iter().filter(|d| d.change_type == "added").collect();
784 assert_eq!(added.len(), 1);
785 assert_eq!(added[0].anchor, "sec-d");
786 }
787
788 #[test]
789 fn test_compute_pr_diff_ignores_whitespace_only_changes() {
790 let conn = db::open_test_db().unwrap();
791 let spec_id =
792 write::insert_or_get_spec(&conn, "HTML", "https://html.spec.whatwg.org", "whatwg")
793 .unwrap();
794
795 let base_id =
796 write::insert_snapshot(&conn, spec_id, "hash:base2", "2026-01-01T00:00:00Z").unwrap();
797 write::insert_sections_bulk(
798 &conn,
799 base_id,
800 &[
801 ParsedSection {
802 anchor: "sec-a".into(),
803 title: Some("A".into()),
804 content_text: Some("Hello world\n\nfoo".into()),
805 section_type: SectionType::Heading,
806 parent_anchor: None,
807 prev_anchor: None,
808 next_anchor: None,
809 depth: Some(2),
810 },
811 ParsedSection {
812 anchor: "sec-b".into(),
813 title: Some("B".into()),
814 content_text: Some("Real content B".into()),
815 section_type: SectionType::Heading,
816 parent_anchor: None,
817 prev_anchor: None,
818 next_anchor: None,
819 depth: Some(2),
820 },
821 ],
822 )
823 .unwrap();
824
825 let pr_id = write::insert_pr_snapshot(
826 &conn,
827 spec_id,
828 "pr-sha2",
829 "2026-01-01T00:00:00Z",
830 98,
831 "hash:base2",
832 &[],
833 )
834 .unwrap();
835 write::insert_sections_bulk(
836 &conn,
837 pr_id,
838 &[
839 ParsedSection {
840 anchor: "sec-a".into(),
841 title: Some("A".into()),
842 content_text: Some("Hello world\n\nfoo".into()),
843 section_type: SectionType::Heading,
844 parent_anchor: None,
845 prev_anchor: None,
846 next_anchor: None,
847 depth: Some(2),
848 },
849 ParsedSection {
850 anchor: "sec-b".into(),
851 title: Some("B".into()),
852 content_text: Some("Modified content B".into()),
853 section_type: SectionType::Heading,
854 parent_anchor: None,
855 prev_anchor: None,
856 next_anchor: None,
857 depth: Some(2),
858 },
859 ],
860 )
861 .unwrap();
862
863 let diff = compute_pr_diff(&conn, pr_id, base_id).unwrap();
864 assert_eq!(diff.len(), 1);
865 assert_eq!(diff[0].anchor, "sec-b");
866 assert_eq!(diff[0].change_type, "modified");
867 }
868}