mod common;
use chromewright::tools::{GetMarkdownParams, Tool, ToolContext, markdown::GetMarkdownTool};
use log::info;
#[test]
#[ignore] fn test_basic_markdown_extraction() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Test Article</title>
</head>
<body>
<article>
<h1>Main Article Title</h1>
<p>This is the first paragraph of the article.</p>
<p>This is the second paragraph with <strong>bold text</strong> and <em>italic text</em>.</p>
</article>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("Failed to execute markdown tool");
assert!(result.success, "Tool execution should succeed");
assert!(result.data.is_some());
let data = result.data.unwrap();
info!(
"Markdown result: {}",
serde_json::to_string_pretty(&data).unwrap()
);
let markdown = data["markdown"].as_str().expect("Should have markdown");
assert!(
markdown.contains("Main Article Title"),
"Should contain title"
);
assert!(
markdown.contains("first paragraph"),
"Should contain first paragraph"
);
assert!(
markdown.contains("second paragraph"),
"Should contain second paragraph"
);
assert_eq!(data["currentPage"].as_u64(), Some(1));
assert_eq!(data["totalPages"].as_u64(), Some(1));
assert_eq!(data["hasMorePages"].as_bool(), Some(false));
}
#[test]
#[ignore]
fn test_readability_filtering() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Article with Navigation</title>
</head>
<body>
<nav>
<ul>
<li>Home</li>
<li>About</li>
<li>Contact</li>
</ul>
</nav>
<aside class="sidebar">
<h3>Advertisement</h3>
<p>Buy our product!</p>
</aside>
<article>
<h1>Important Article</h1>
<p>This is the main content that should be extracted by Readability.</p>
<p>It contains valuable information for the reader.</p>
<p>Navigation and ads should be filtered out.</p>
</article>
<footer>
<p>Copyright 2025</p>
</footer>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("Failed to execute markdown tool");
assert!(result.success);
let data = result.data.unwrap();
let markdown = data["markdown"].as_str().expect("Should have markdown");
info!("Extracted markdown:\n{}", markdown);
assert!(
markdown.contains("Important Article"),
"Should contain article title"
);
assert!(
markdown.contains("main content"),
"Should contain main content"
);
}
#[test]
#[ignore]
fn test_markdown_pagination() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let mut paragraphs = String::new();
for i in 1..=200 {
paragraphs.push_str(&format!(
"<p>This is paragraph number {}. It contains some text to make the content longer. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>\n",
i
));
}
let html = format!(
r#"
<!DOCTYPE html>
<html>
<head>
<title>Long Article</title>
</head>
<body>
<article>
<h1>Very Long Article</h1>
{}
</article>
</body>
</html>
"#,
paragraphs
);
common::navigate_encoded_html(session, &html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(
GetMarkdownParams {
page: 1,
page_size: 5000, },
&mut context,
)
.expect("Failed to execute markdown tool");
assert!(result.success);
let data = result.data.unwrap();
info!(
"Pagination result: {}",
serde_json::to_string_pretty(&data).unwrap()
);
let markdown = data["markdown"].as_str().expect("Should have markdown");
let current_page = data["currentPage"]
.as_u64()
.expect("Should have currentPage");
let total_pages = data["totalPages"].as_u64().expect("Should have totalPages");
let has_more = data["hasMorePages"]
.as_bool()
.expect("Should have hasMorePages");
assert_eq!(current_page, 1);
assert!(
total_pages > 1,
"Should have multiple pages, got total_pages={}",
total_pages
);
assert!(has_more, "Should have more pages");
let title_present = markdown.contains("Very Long Article") || markdown.contains("Long Article");
assert!(
title_present,
"First page should have title. Markdown: {}",
&markdown[..200.min(markdown.len())]
);
assert!(
markdown.contains("Page 1 of"),
"Should have pagination info"
);
assert!(markdown.contains("more page"), "Should indicate more pages");
}
#[test]
#[ignore]
fn test_empty_page() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Empty Page</title>
</head>
<body>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool.execute_typed(GetMarkdownParams::default(), &mut context);
match result {
Ok(res) => {
info!("Empty page result: {:?}", res);
}
Err(e) => {
info!("Empty page error (expected): {:?}", e);
}
}
}
#[test]
#[ignore]
fn test_table_conversion() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Table Test</title>
</head>
<body>
<article>
<h1>Data Table</h1>
<table>
<thead>
<tr>
<th>Name</th>
<th>Age</th>
<th>City</th>
</tr>
</thead>
<tbody>
<tr>
<td>Alice</td>
<td>30</td>
<td>New York</td>
</tr>
<tr>
<td>Bob</td>
<td>25</td>
<td>London</td>
</tr>
</tbody>
</table>
</article>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("Failed to execute markdown tool");
assert!(result.success);
let data = result.data.unwrap();
let markdown = data["markdown"].as_str().expect("Should have markdown");
info!("Table markdown:\n{}", markdown);
assert!(markdown.contains("Name"), "Should contain table header");
assert!(markdown.contains("Alice"), "Should contain table data");
assert!(markdown.contains("Bob"), "Should contain table data");
assert!(markdown.contains("30"), "Should contain age data");
assert!(markdown.contains("London"), "Should contain city data");
}
#[test]
#[ignore]
fn test_double_execution_same_page() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Double Execution Test</title>
</head>
<body>
<article>
<h1>Test Article</h1>
<p>This is paragraph one with some content.</p>
<p>This is paragraph two with more content.</p>
<p>This is paragraph three with even more content.</p>
</article>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
info!("Executing get_markdown (first call)...");
let result1 = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("First call to get_markdown should succeed");
assert!(result1.success, "First execution should succeed");
let data1 = result1.data.expect("First call should return data");
let markdown1 = data1["markdown"].as_str().expect("Should have markdown");
info!("First call succeeded, markdown length: {}", markdown1.len());
assert!(
markdown1.contains("Test Article"),
"First call should contain title"
);
assert!(
markdown1.contains("paragraph one"),
"First call should contain content"
);
info!("Executing get_markdown (second call on same page)...");
let result2 = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("Second call to get_markdown should also succeed");
assert!(result2.success, "Second execution should succeed");
let data2 = result2.data.expect("Second call should return data");
let markdown2 = data2["markdown"].as_str().expect("Should have markdown");
info!(
"Second call succeeded, markdown length: {}",
markdown2.len()
);
assert!(
markdown2.contains("Test Article"),
"Second call should contain title"
);
assert!(
markdown2.contains("paragraph one"),
"Second call should contain content"
);
assert_eq!(
markdown1, markdown2,
"Both calls should return the same content"
);
info!("Double execution test passed!");
}
#[test]
#[ignore]
fn test_page_clamping() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Short Article</title>
</head>
<body>
<article>
<h1>Short Content</h1>
<p>This is a very short article.</p>
</article>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(
GetMarkdownParams {
page: 999,
page_size: 100_000,
},
&mut context,
)
.expect("Failed to execute markdown tool");
assert!(result.success);
let data = result.data.unwrap();
assert_eq!(data["currentPage"].as_u64(), Some(1));
assert_eq!(data["totalPages"].as_u64(), Some(1));
assert_eq!(data["hasMorePages"].as_bool(), Some(false));
}
#[test]
#[ignore]
fn test_markdown_extraction_waits_for_delayed_content() {
let Some(browser) = common::browser_or_skip() else {
return;
};
let session = browser.session();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Delayed Article</title>
</head>
<body>
<article id="article"></article>
<script>
setTimeout(() => {
document.getElementById('article').innerHTML = `
<h1>Delayed Title</h1>
<p>Rendered after a short delay.</p>
`;
}, 200);
</script>
</body>
</html>
"#;
common::navigate_encoded_html(session, html).expect("Failed to navigate");
let tool = GetMarkdownTool;
let mut context = ToolContext::new(session);
let result = tool
.execute_typed(GetMarkdownParams::default(), &mut context)
.expect("Delayed markdown extraction should succeed");
assert!(result.success);
let data = result.data.unwrap();
let markdown = data["markdown"].as_str().expect("Should have markdown");
assert!(markdown.contains("Delayed Title"));
assert!(markdown.contains("Rendered after a short delay."));
}