1use std::collections::HashMap;
4use std::fmt;
5use std::sync::Arc;
6use std::time::Duration;
7
8use serde_json::Value;
9use servo::accesskit::{Node, NodeId};
10
11use crate::error::Error;
12use crate::net::sanitize_user_agent;
13
14#[derive(Debug, Clone, Default, serde::Serialize)]
16#[non_exhaustive]
17pub struct Page {
18 pub html: String,
20 pub inner_text: String,
22 pub title: Option<String>,
24 #[serde(skip_serializing_if = "Option::is_none")]
26 pub layout_json: Option<String>,
27 #[serde(skip)]
29 visibility_json: Option<String>,
30 #[serde(skip_serializing_if = "Option::is_none")]
32 pub js_result: Option<String>,
33 pub console_messages: Vec<ConsoleMessage>,
35 #[serde(skip_serializing_if = "Option::is_none")]
37 pub accessibility_tree: Option<String>,
38 #[serde(skip_serializing_if = "Option::is_none")]
40 pub extracted: Option<Value>,
41 #[serde(skip)]
43 screenshot_png: Option<Vec<u8>>,
44 #[serde(skip)]
46 a11y: Option<Arc<HashMap<NodeId, Node>>>,
47 #[serde(skip)]
49 visibility_policy: crate::visibility::VisibilityPolicy,
50}
51
52impl Page {
53 pub fn markdown(&self) -> crate::error::Result<String> {
55 self.markdown_with_url("")
56 }
57
58 pub fn markdown_with_url(&self, url: &str) -> crate::error::Result<String> {
60 Ok(crate::extract::extract_text(&self.extract_input(url, None))?)
61 }
62
63 pub fn extract_json(&self) -> crate::error::Result<String> {
65 self.extract_json_with_url("")
66 }
67
68 pub fn extract_json_with_url(&self, url: &str) -> crate::error::Result<String> {
70 Ok(crate::extract::extract_json(&self.extract_input(url, None))?)
71 }
72
73 pub fn markdown_with_selector(&self, url: &str, selector: &str) -> crate::error::Result<String> {
75 Ok(crate::extract::extract_text(&self.extract_input(url, Some(selector)))?)
76 }
77
78 pub fn extract_json_with_selector(&self, url: &str, selector: &str) -> crate::error::Result<String> {
80 Ok(crate::extract::extract_json(&self.extract_input(url, Some(selector)))?)
81 }
82
83 #[must_use]
85 pub fn screenshot_png(&self) -> Option<&[u8]> {
86 self.screenshot_png.as_deref()
87 }
88
89 fn extract_input<'a>(&'a self, url: &'a str, selector: Option<&'a str>) -> crate::extract::ExtractInput<'a> {
90 crate::extract::ExtractInput::new(&self.html, url)
91 .with_layout_json(self.layout_json.as_deref())
92 .with_visibility_json(self.visibility_json.as_deref())
93 .with_a11y(self.a11y.as_deref())
94 .with_inner_text(Some(&self.inner_text))
95 .with_selector(selector)
96 .with_visibility(self.visibility_policy)
97 }
98
99 pub(crate) fn from_servo(page: crate::bridge::ServoPage) -> Self {
100 let title = {
101 let doc = dom_query::Document::from(page.html.as_str());
102 let t = doc.select("title").text().to_string();
103 if t.is_empty() { None } else { Some(t) }
104 };
105 let screenshot_png = page.screenshot.and_then(|img| {
106 let mut buf = std::io::Cursor::new(Vec::new());
107 img.write_to(&mut buf, image::ImageFormat::Png).ok()?;
108 Some(buf.into_inner())
109 });
110 Self {
111 html: page.html,
112 inner_text: page.inner_text.unwrap_or_default(),
113 title,
114 layout_json: page.layout_json,
115 visibility_json: page.visibility_json,
116 js_result: page.js_result,
117 console_messages: page
118 .console_messages
119 .into_iter()
120 .map(|m| ConsoleMessage {
121 level: match m.level {
122 crate::bridge::ConsoleLevel::Log => ConsoleLevel::Log,
123 crate::bridge::ConsoleLevel::Debug => ConsoleLevel::Debug,
124 crate::bridge::ConsoleLevel::Info => ConsoleLevel::Info,
125 crate::bridge::ConsoleLevel::Warn => ConsoleLevel::Warn,
126 crate::bridge::ConsoleLevel::Error => ConsoleLevel::Error,
127 crate::bridge::ConsoleLevel::Trace => ConsoleLevel::Trace,
128 },
129 message: m.message,
130 })
131 .collect(),
132 screenshot_png,
133 accessibility_tree: page.accessibility_tree,
134 a11y: page.a11y.map(Arc::new),
135 extracted: None,
136 visibility_policy: crate::visibility::VisibilityPolicy::default(),
137 }
138 }
139}
140
141#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
143#[non_exhaustive]
144pub struct ConsoleMessage {
145 pub level: ConsoleLevel,
147 pub message: String,
149}
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
153#[serde(rename_all = "lowercase")]
154#[non_exhaustive]
155pub enum ConsoleLevel {
156 Log,
158 Debug,
160 Info,
162 Warn,
164 Error,
166 Trace,
168}
169
170impl ConsoleLevel {
171 #[must_use]
173 pub fn as_str(&self) -> &'static str {
174 match self {
175 Self::Log => "log",
176 Self::Debug => "debug",
177 Self::Info => "info",
178 Self::Warn => "warn",
179 Self::Error => "error",
180 Self::Trace => "trace",
181 }
182 }
183}
184
185impl fmt::Display for ConsoleLevel {
186 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187 f.pad(self.as_str())
188 }
189}
190
191#[derive(Debug, Clone, Default)]
192pub(crate) enum FetchMode {
193 #[default]
194 Content,
195 Screenshot {
196 full_page: bool,
197 },
198 JavaScript(String),
199}
200
201#[must_use = "options do nothing until passed to fetch()"]
203#[derive(Debug, Clone)]
204pub struct FetchOptions {
205 pub(crate) url: String,
206 pub(crate) timeout: Option<Duration>,
207 pub(crate) settle: Option<Duration>,
208 pub(crate) mode: FetchMode,
209 pub(crate) user_agent: Option<String>,
210 pub(crate) extract_schema: Option<crate::schema::ExtractSchema>,
211 pub(crate) visibility: Option<crate::visibility::VisibilityPolicy>,
212}
213
214impl FetchOptions {
215 pub(crate) const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
218
219 pub(crate) const DEFAULT_SETTLE: Duration = Duration::ZERO;
222
223 pub fn new(url: &str) -> Self {
225 Self {
226 url: url.into(),
227 timeout: None,
228 settle: None,
229 mode: FetchMode::Content,
230 user_agent: None,
231 extract_schema: None,
232 visibility: None,
233 }
234 }
235
236 pub fn screenshot(url: &str, full_page: bool) -> Self {
238 Self {
239 mode: FetchMode::Screenshot { full_page },
240 ..Self::new(url)
241 }
242 }
243
244 pub fn javascript(url: &str, expression: impl Into<String>) -> Self {
246 Self {
247 mode: FetchMode::JavaScript(expression.into()),
248 ..Self::new(url)
249 }
250 }
251
252 pub fn timeout(mut self, timeout: Duration) -> Self {
254 self.timeout = Some(timeout);
255 self
256 }
257
258 pub fn settle(mut self, settle: Duration) -> Self {
260 self.settle = Some(settle);
261 self
262 }
263
264 pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
266 self.user_agent = Some(sanitize_user_agent(ua.into()));
267 self
268 }
269
270 pub fn schema(mut self, schema: crate::schema::ExtractSchema) -> Self {
272 self.extract_schema = Some(schema);
273 self
274 }
275
276 pub fn visibility(mut self, policy: crate::visibility::VisibilityPolicy) -> Self {
278 self.visibility = Some(policy);
279 self
280 }
281
282 pub(crate) fn effective_timeout(&self) -> Duration {
284 self.timeout.unwrap_or(Self::DEFAULT_TIMEOUT)
285 }
286
287 pub(crate) fn effective_settle(&self) -> Duration {
289 self.settle.unwrap_or(Self::DEFAULT_SETTLE)
290 }
291
292 pub(crate) fn effective_visibility(&self) -> crate::visibility::VisibilityPolicy {
294 self.visibility.unwrap_or_default()
295 }
296}
297
298pub fn fetch_blocking(opts: &FetchOptions) -> crate::error::Result<Page> {
300 if let Some(pdf_page) = pre_fetch(opts)? {
301 return Ok(pdf_page);
302 }
303 let bridge_opts = build_bridge_options(opts);
304 let servo_page = crate::bridge::fetch_page(bridge_opts).map_err(|e| map_engine_error(e, opts))?;
305 Ok(finalize_page(servo_page, opts))
306}
307
308pub async fn fetch(opts: &FetchOptions) -> crate::error::Result<Page> {
310 if let Some(pdf_page) = pre_fetch_async(opts).await? {
311 return Ok(pdf_page);
312 }
313 let bridge_opts = build_bridge_options(opts);
314 let servo_page = crate::bridge::fetch_page_async(bridge_opts)
315 .await
316 .map_err(|e| map_engine_error(e, opts))?;
317 Ok(finalize_page(servo_page, opts))
318}
319
320pub fn markdown_blocking(url: &str) -> crate::error::Result<String> {
322 fetch_blocking(&FetchOptions::new(url))?.markdown_with_url(url)
323}
324
325pub async fn markdown(url: &str) -> crate::error::Result<String> {
327 fetch(&FetchOptions::new(url)).await?.markdown_with_url(url)
328}
329
330pub fn extract_json_blocking(url: &str) -> crate::error::Result<String> {
332 fetch_blocking(&FetchOptions::new(url))?.extract_json_with_url(url)
333}
334
335pub async fn extract_json(url: &str) -> crate::error::Result<String> {
337 fetch(&FetchOptions::new(url)).await?.extract_json_with_url(url)
338}
339
340pub fn text_blocking(url: &str) -> crate::error::Result<String> {
342 Ok(fetch_blocking(&FetchOptions::new(url))?.inner_text)
343}
344
345pub async fn text(url: &str) -> crate::error::Result<String> {
347 Ok(fetch(&FetchOptions::new(url)).await?.inner_text)
348}
349
350fn pre_fetch(opts: &FetchOptions) -> crate::error::Result<Option<Page>> {
351 crate::net::ensure_crypto_provider();
352 crate::net::validate_url(&opts.url)?;
353
354 if matches!(opts.mode, FetchMode::Content)
355 && let Some(bytes) = crate::pdf::probe(&opts.url, opts.effective_timeout().as_secs().max(1))
356 {
357 return Ok(Some(pdf_page(&bytes)));
358 }
359
360 Ok(None)
361}
362
363async fn pre_fetch_async(opts: &FetchOptions) -> crate::error::Result<Option<Page>> {
364 crate::net::ensure_crypto_provider();
365 crate::net::validate_url(&opts.url)?;
366
367 if matches!(opts.mode, FetchMode::Content) {
368 let url = opts.url.clone();
369 let timeout_secs = opts.effective_timeout().as_secs().max(1);
370 let probe = tokio::task::spawn_blocking(move || crate::pdf::probe(&url, timeout_secs))
371 .await
372 .map_err(|e| Error::engine(anyhow::anyhow!("pdf probe task panicked: {e}"), Some(opts.url.clone())))?;
373 if let Some(bytes) = probe {
374 return Ok(Some(pdf_page(&bytes)));
375 }
376 }
377
378 Ok(None)
379}
380
381fn pdf_page(bytes: &[u8]) -> Page {
382 let text = crate::extract::extract_pdf(bytes);
383 Page {
384 html: String::new(),
385 inner_text: text,
386 ..Page::default()
387 }
388}
389
390fn build_bridge_options(opts: &FetchOptions) -> crate::bridge::FetchOptions<'_> {
391 crate::bridge::FetchOptions {
392 url: &opts.url,
393 timeout_secs: opts.effective_timeout().as_secs().max(1),
394 settle_ms: u64::try_from(opts.effective_settle().as_millis()).unwrap_or(u64::MAX),
395 user_agent: opts.user_agent.as_deref(),
396 mode: match opts.mode {
397 FetchMode::Content => crate::bridge::FetchMode::Content { include_a11y: false },
398 FetchMode::Screenshot { full_page } => crate::bridge::FetchMode::Screenshot { full_page },
399 FetchMode::JavaScript(ref expr) => crate::bridge::FetchMode::ExecuteJs {
400 expression: expr.clone(),
401 },
402 },
403 }
404}
405
406fn finalize_page(servo_page: crate::bridge::ServoPage, opts: &FetchOptions) -> Page {
407 let mut page = Page::from_servo(servo_page);
408 page.visibility_policy = opts.effective_visibility();
409 if let Some(schema) = opts.extract_schema.as_ref() {
410 page.extracted = Some(schema.extract_from(&page.html));
411 }
412 page
413}
414
415fn map_engine_error(e: anyhow::Error, opts: &FetchOptions) -> Error {
416 if format!("{e:#}").contains("timed out") {
417 Error::Timeout {
418 url: opts.url.clone(),
419 timeout: opts.effective_timeout(),
420 }
421 } else {
422 Error::engine(e, Some(opts.url.clone()))
423 }
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429
430 #[test]
431 fn fetch_options_defaults() {
432 let opts = FetchOptions::new("https://example.com");
433 assert_eq!(opts.url, "https://example.com");
434 assert_eq!(opts.timeout, None);
435 assert_eq!(opts.settle, None);
436 assert_eq!(opts.visibility, None);
437 assert!(matches!(opts.mode, FetchMode::Content));
438 }
439
440 #[test]
441 fn fetch_options_effective_defaults() {
442 let opts = FetchOptions::new("https://example.com");
443 assert_eq!(opts.effective_timeout(), Duration::from_secs(30));
444 assert_eq!(opts.effective_settle(), Duration::ZERO);
445 }
446
447 #[test]
448 fn fetch_options_caller_value_preserved() {
449 let opts = FetchOptions::new("https://example.com")
450 .timeout(Duration::from_secs(45))
451 .settle(Duration::from_millis(250));
452 assert_eq!(opts.timeout, Some(Duration::from_secs(45)));
453 assert_eq!(opts.settle, Some(Duration::from_millis(250)));
454 assert_eq!(opts.effective_timeout(), Duration::from_secs(45));
455 assert_eq!(opts.effective_settle(), Duration::from_millis(250));
456 }
457
458 #[test]
459 fn fetch_options_screenshot() {
460 let opts = FetchOptions::screenshot("https://example.com", true);
461 assert!(matches!(opts.mode, FetchMode::Screenshot { full_page: true }));
462 }
463
464 #[test]
465 fn fetch_options_javascript() {
466 let opts = FetchOptions::javascript("https://example.com", "document.title");
467 assert!(matches!(opts.mode, FetchMode::JavaScript(ref e) if e == "document.title"));
468 }
469
470 #[test]
471 fn fetch_options_chaining() {
472 let opts = FetchOptions::new("https://example.com")
473 .timeout(Duration::from_secs(60))
474 .settle(Duration::from_millis(500));
475 assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
476 assert_eq!(opts.settle, Some(Duration::from_millis(500)));
477 }
478
479 #[test]
480 fn fetch_user_agent_set() {
481 let opts = FetchOptions::new("https://example.com").user_agent("MyBot/1.0");
482 assert_eq!(opts.user_agent.as_deref(), Some("MyBot/1.0"));
483 }
484
485 #[test]
486 fn fetch_user_agent_default_is_none() {
487 let opts = FetchOptions::new("https://example.com");
488 assert!(opts.user_agent.is_none());
489 }
490
491 #[test]
492 fn fetch_user_agent_sanitizes_crlf() {
493 let opts = FetchOptions::new("https://example.com").user_agent("Bot\r\nX-Evil: yes");
494 assert_eq!(opts.user_agent.as_deref(), Some("Bot X-Evil: yes"));
495 }
496
497 #[test]
498 fn fetch_user_agent_sanitizes_null() {
499 let opts = FetchOptions::new("https://example.com").user_agent("Bot\0/1.0");
500 assert_eq!(opts.user_agent.as_deref(), Some("Bot /1.0"));
501 }
502
503 #[test]
504 fn fetch_user_agent_empty_string() {
505 let opts = FetchOptions::new("https://example.com").user_agent("");
506 assert_eq!(opts.user_agent.as_deref(), Some(""));
507 }
508
509 #[test]
510 fn page_markdown_from_html() {
511 let page = Page {
512 html: "<html><head><title>Test</title></head><body><p>hello world</p></body></html>".into(),
513 inner_text: "hello world".into(),
514 ..Page::default()
515 };
516 let md = page.markdown().unwrap();
517 assert!(md.contains("hello world"));
518 }
519
520 #[test]
521 fn page_extract_json_produces_valid_json() {
522 let page = Page {
523 html: "<html><head><title>Test</title></head><body><p>content</p></body></html>".into(),
524 inner_text: "content".into(),
525 ..Page::default()
526 };
527 let json = page.extract_json().unwrap();
528 let _: Value = serde_json::from_str(&json).expect("valid JSON");
529 }
530
531 #[test]
532 fn page_screenshot_png_none_by_default() {
533 let page = Page::default();
534 assert!(page.screenshot_png().is_none());
535 }
536
537 #[test]
538 fn page_markdown_with_selector_scopes_to_subtree() {
539 let page = Page {
540 html: "<html><body><article>keep</article><aside>drop</aside></body></html>".into(),
541 ..Page::default()
542 };
543 let md = page.markdown_with_selector("https://example.com", "article").unwrap();
544 assert!(md.contains("keep"));
545 assert!(!md.contains("drop"));
546 }
547
548 #[test]
549 fn page_extract_json_with_selector_includes_url() {
550 let page = Page {
551 html: "<html><body><article>scoped</article></body></html>".into(),
552 ..Page::default()
553 };
554 let json = page
555 .extract_json_with_selector("https://example.com/page", "article")
556 .unwrap();
557 let parsed: Value = serde_json::from_str(&json).expect("valid JSON");
558 assert_eq!(parsed["url"].as_str(), Some("https://example.com/page"));
559 assert!(parsed["text_content"].as_str().unwrap().contains("scoped"));
560 }
561
562 #[test]
563 fn page_markdown_with_selector_no_match_returns_empty() {
564 let page = Page {
565 html: "<html><body><article>x</article></body></html>".into(),
566 ..Page::default()
567 };
568 let md = page.markdown_with_selector("", ".nonexistent").unwrap();
569 assert!(md.is_empty());
570 }
571
572 #[test]
573 fn page_markdown_with_invalid_selector_returns_error() {
574 let page = Page {
575 html: "<html><body><p>x</p></body></html>".into(),
576 ..Page::default()
577 };
578 let err = page.markdown_with_selector("", "###invalid[[[").unwrap_err();
579 assert!(err.to_string().contains("invalid CSS selector"));
580 }
581
582 #[test]
583 fn page_markdown_with_empty_selector_returns_error() {
584 let page = Page {
585 html: "<html><body><p>x</p></body></html>".into(),
586 ..Page::default()
587 };
588 assert!(page.markdown_with_selector("", "").is_err());
589 }
590
591 #[test]
592 fn fetch_rejects_invalid_url() {
593 let result = fetch_blocking(&FetchOptions::new("not a url"));
594 assert!(result.is_err());
595 let err = result.unwrap_err();
596 assert!(matches!(err, Error::InvalidUrl { .. }));
597 }
598
599 #[test]
600 fn fetch_rejects_private_ip() {
601 let result = fetch_blocking(&FetchOptions::new("http://127.0.0.1/"));
602 assert!(result.is_err());
603 }
604
605 #[test]
606 fn fetch_rejects_file_scheme() {
607 let result = fetch_blocking(&FetchOptions::new("file:///etc/passwd"));
608 assert!(result.is_err());
609 }
610
611 mod page_from_servo {
612 use crate::bridge;
613 use crate::fetch::{ConsoleLevel, Page};
614
615 fn synthetic_image(w: u32, h: u32) -> image::RgbaImage {
616 image::RgbaImage::from_pixel(w, h, image::Rgba([255, 0, 0, 255]))
617 }
618
619 fn empty_servo_page() -> bridge::ServoPage {
620 bridge::ServoPage::default()
621 }
622
623 #[test]
624 fn extracts_title_from_html() {
625 let mut sp = empty_servo_page();
626 sp.html = "<html><head><title>Hello World</title></head></html>".into();
627 let page = Page::from_servo(sp);
628 assert_eq!(page.title.as_deref(), Some("Hello World"));
629 }
630
631 #[test]
632 fn title_is_none_when_tag_missing() {
633 let mut sp = empty_servo_page();
634 sp.html = "<html><body>no title here</body></html>".into();
635 let page = Page::from_servo(sp);
636 assert!(page.title.is_none());
637 }
638
639 #[test]
640 fn title_is_none_when_tag_empty() {
641 let mut sp = empty_servo_page();
642 sp.html = "<html><head><title></title></head></html>".into();
643 let page = Page::from_servo(sp);
644 assert!(page.title.is_none());
645 }
646
647 #[test]
648 fn title_is_none_for_empty_html() {
649 let page = Page::from_servo(empty_servo_page());
650 assert!(page.title.is_none());
651 }
652
653 #[test]
654 fn inner_text_none_becomes_empty_string() {
655 let sp = empty_servo_page();
656 assert!(sp.inner_text.is_none());
657 let page = Page::from_servo(sp);
658 assert_eq!(page.inner_text, "");
659 }
660
661 #[test]
662 fn screenshot_is_encoded_as_png() {
663 let mut sp = empty_servo_page();
664 sp.screenshot = Some(synthetic_image(8, 8));
665 let page = Page::from_servo(sp);
666 let bytes = page.screenshot_png().expect("screenshot encoded");
667 assert_eq!(&bytes[..8], b"\x89PNG\r\n\x1a\n", "PNG magic bytes");
668 }
669
670 #[test]
671 fn console_messages_empty_by_default() {
672 let page = Page::from_servo(empty_servo_page());
673 assert!(page.console_messages.is_empty());
674 }
675
676 #[test]
677 fn console_messages_preserve_all_six_levels() {
678 let cases = [
679 (bridge::ConsoleLevel::Log, ConsoleLevel::Log),
680 (bridge::ConsoleLevel::Debug, ConsoleLevel::Debug),
681 (bridge::ConsoleLevel::Info, ConsoleLevel::Info),
682 (bridge::ConsoleLevel::Warn, ConsoleLevel::Warn),
683 (bridge::ConsoleLevel::Error, ConsoleLevel::Error),
684 (bridge::ConsoleLevel::Trace, ConsoleLevel::Trace),
685 ];
686 for (src, expected) in cases {
687 let mut sp = empty_servo_page();
688 sp.console_messages = vec![bridge::ConsoleMessage {
689 level: src,
690 message: "msg".into(),
691 }];
692 let page = Page::from_servo(sp);
693 assert_eq!(
694 page.console_messages.len(),
695 1,
696 "console message lost for source level {src:?}",
697 );
698 assert_eq!(
699 page.console_messages[0].level, expected,
700 "level mapping wrong for source {src:?}",
701 );
702 }
703 }
704
705 #[test]
706 fn console_messages_preserve_ordering_across_levels() {
707 let mut sp = empty_servo_page();
708 sp.console_messages = vec![
709 bridge::ConsoleMessage {
710 level: bridge::ConsoleLevel::Info,
711 message: "first".into(),
712 },
713 bridge::ConsoleMessage {
714 level: bridge::ConsoleLevel::Error,
715 message: "second".into(),
716 },
717 bridge::ConsoleMessage {
718 level: bridge::ConsoleLevel::Warn,
719 message: "third".into(),
720 },
721 ];
722 let page = Page::from_servo(sp);
723 assert_eq!(page.console_messages.len(), 3);
724 assert_eq!(page.console_messages[0].message, "first");
725 assert_eq!(page.console_messages[1].message, "second");
726 assert_eq!(page.console_messages[2].message, "third");
727 assert_eq!(page.console_messages[0].level, ConsoleLevel::Info);
728 assert_eq!(page.console_messages[1].level, ConsoleLevel::Error);
729 assert_eq!(page.console_messages[2].level, ConsoleLevel::Warn);
730 }
731
732 #[test]
733 fn extracted_starts_as_none_until_schema_applied() {
734 let page = Page::from_servo(empty_servo_page());
735 assert!(page.extracted.is_none());
736 }
737
738 #[test]
739 fn full_round_trip_preserves_every_field() {
740 let sp = bridge::ServoPage {
741 html: "<html><head><title>T</title></head><body>B</body></html>".into(),
742 inner_text: Some("B".into()),
743 layout_json: Some("[]".into()),
744 visibility_json: Some("[]".into()),
745 screenshot: Some(synthetic_image(2, 2)),
746 js_result: Some("42".into()),
747 accessibility_tree: Some("{}".into()),
748 a11y: None,
749 console_messages: vec![bridge::ConsoleMessage {
750 level: bridge::ConsoleLevel::Log,
751 message: "x".into(),
752 }],
753 };
754 let page = Page::from_servo(sp);
755 assert_eq!(page.html, "<html><head><title>T</title></head><body>B</body></html>");
756 assert_eq!(page.inner_text, "B");
757 assert_eq!(page.title.as_deref(), Some("T"));
758 assert_eq!(page.layout_json.as_deref(), Some("[]"));
759 assert_eq!(page.js_result.as_deref(), Some("42"));
760 assert_eq!(page.accessibility_tree.as_deref(), Some("{}"));
761 assert_eq!(page.console_messages.len(), 1);
762 assert!(page.screenshot_png().is_some());
763 assert!(page.extracted.is_none());
764 }
765 }
766}