mixtape_tools/fetch/
fetch_tool.rs

1use crate::prelude::*;
2use html2md::parse_html;
3use readability_rust::Readability;
4use reqwest::Client;
5use robotstxt::DefaultMatcher;
6use std::time::Duration;
7use url::Url;
8
9/// Input for fetching web content
10#[derive(Debug, Deserialize, JsonSchema)]
11pub struct FetchInput {
12    /// URL to fetch
13    pub url: String,
14
15    /// Maximum content length in characters (default: 5000)
16    #[serde(default = "default_max_length")]
17    pub max_length: Option<usize>,
18
19    /// Starting character index for pagination (default: 0)
20    #[serde(default)]
21    pub start_index: Option<usize>,
22
23    /// Return raw HTML instead of Markdown (default: false)
24    #[serde(default)]
25    pub raw: bool,
26
27    /// Force fetch even if robots.txt disallows (default: false, use with caution)
28    #[serde(default)]
29    pub force: bool,
30
31    /// Custom user agent (default: "mixtape-bot/1.0")
32    #[serde(default = "default_user_agent")]
33    pub user_agent: String,
34
35    /// Request timeout in seconds (default: 30)
36    #[serde(default = "default_timeout")]
37    pub timeout_seconds: u64,
38}
39
40fn default_max_length() -> Option<usize> {
41    Some(5000)
42}
43
44fn default_user_agent() -> String {
45    "mixtape-bot/1.0 (+https://github.com/your-repo/mixtape)".to_string()
46}
47
48fn default_timeout() -> u64 {
49    30
50}
51
52/// Tool for fetching and processing web content
53pub struct FetchTool {
54    client: Client,
55}
56
57impl FetchTool {
58    pub fn new() -> Self {
59        let client = Client::builder()
60            .timeout(Duration::from_secs(60))
61            .build()
62            .expect("Failed to create HTTP client");
63
64        Self { client }
65    }
66
67    /// Check robots.txt compliance
68    async fn check_robots_txt(
69        &self,
70        url: &Url,
71        user_agent: &str,
72    ) -> std::result::Result<bool, ToolError> {
73        let host = url
74            .host_str()
75            .ok_or_else(|| ToolError::from("Invalid host"))?;
76        let robots_url = format!("{}://{}/robots.txt", url.scheme(), host);
77
78        // Fetch robots.txt with a short timeout
79        let robots_response =
80            match tokio::time::timeout(Duration::from_secs(5), self.client.get(&robots_url).send())
81                .await
82            {
83                Ok(Ok(response)) => response,
84                Ok(Err(_)) => return Ok(true), // No robots.txt, allow
85                Err(_) => return Ok(true),     // Timeout, allow
86            };
87
88        if !robots_response.status().is_success() {
89            return Ok(true); // No robots.txt or error, allow
90        }
91
92        let robots_content = match robots_response.text().await {
93            Ok(content) => content,
94            Err(e) => return Err(format!("Failed to read robots.txt: {}", e).into()),
95        };
96
97        // Parse robots.txt using Google's matcher
98        let mut matcher = DefaultMatcher::default();
99        let url_str = url.as_str();
100
101        Ok(matcher.one_agent_allowed_by_robots(&robots_content, user_agent, url_str))
102    }
103
104    /// Extract main content from HTML using Mozilla's Readability algorithm
105    fn extract_content(&self, html: &str, _url: &str) -> (Option<String>, String) {
106        // Try to use readability-rust for intelligent content extraction
107        match Readability::new(html, None) {
108            Ok(mut parser) => {
109                if let Some(article) = parser.parse() {
110                    // Successfully extracted article content with title and HTML content
111                    let content = article.content.unwrap_or_else(|| html.to_string());
112                    return (article.title, content);
113                }
114            }
115            Err(_) => {
116                // If readability fails, fall back to returning the full HTML
117            }
118        }
119
120        // Fallback: return the entire HTML if readability extraction fails
121        (None, html.to_string())
122    }
123
124    /// Convert HTML to Markdown
125    fn html_to_markdown(&self, html: &str) -> String {
126        parse_html(html)
127    }
128
129    /// Paginate content
130    fn paginate_content(
131        &self,
132        content: String,
133        start_index: Option<usize>,
134        max_length: Option<usize>,
135    ) -> (String, bool, usize) {
136        let total_length = content.len();
137        let start = start_index.unwrap_or(0);
138
139        if start >= total_length {
140            return (String::new(), false, total_length);
141        }
142
143        if let Some(max_len) = max_length {
144            let end = (start + max_len).min(total_length);
145            let truncated_content = content[start..end].to_string();
146            let is_truncated = end < total_length;
147            (truncated_content, is_truncated, total_length)
148        } else {
149            let truncated_content = content[start..].to_string();
150            (truncated_content, false, total_length)
151        }
152    }
153}
154
155impl Default for FetchTool {
156    fn default() -> Self {
157        Self::new()
158    }
159}
160
161impl Tool for FetchTool {
162    type Input = FetchInput;
163
164    fn name(&self) -> &str {
165        "fetch"
166    }
167
168    fn description(&self) -> &str {
169        "Fetch content from a URL with robots.txt compliance, content extraction, and Markdown conversion. \
170         Supports pagination for large documents."
171    }
172
173    fn format_output_plain(&self, result: &ToolResult) -> String {
174        let output = result.as_text();
175        let (metadata, content) = parse_fetch_header(&output);
176
177        if metadata.is_empty() {
178            return output.to_string();
179        }
180
181        let mut out = String::new();
182        out.push_str(&"─".repeat(60));
183        out.push('\n');
184
185        for (key, value) in &metadata {
186            let icon = match *key {
187                "URL" => "[>]",
188                "Title" => "[#]",
189                "Content Length" => "[=]",
190                "Showing" => "[~]",
191                _ => "   ",
192            };
193            out.push_str(&format!("{} {:15} {}\n", icon, key, value));
194        }
195
196        out.push_str(&"─".repeat(60));
197        out.push_str("\n\n");
198        out.push_str(content);
199        out
200    }
201
202    fn format_output_ansi(&self, result: &ToolResult) -> String {
203        let output = result.as_text();
204        let (metadata, content) = parse_fetch_header(&output);
205
206        if metadata.is_empty() {
207            return output.to_string();
208        }
209
210        let mut out = String::new();
211        out.push_str(&format!("\x1b[2m{}\x1b[0m\n", "─".repeat(60)));
212
213        for (key, value) in &metadata {
214            let (icon, color) = match *key {
215                "URL" => ("\x1b[34m󰖟\x1b[0m", "\x1b[34m"),
216                "Title" => ("\x1b[33m󰉹\x1b[0m", "\x1b[1m"),
217                "Content Length" => ("\x1b[32m󰋊\x1b[0m", "\x1b[32m"),
218                "Showing" => ("\x1b[36m󰦨\x1b[0m", "\x1b[36m"),
219                _ => ("  ", "\x1b[0m"),
220            };
221            out.push_str(&format!(
222                "{} \x1b[2m{:15}\x1b[0m {}{}\x1b[0m\n",
223                icon, key, color, value
224            ));
225        }
226
227        out.push_str(&format!("\x1b[2m{}\x1b[0m\n\n", "─".repeat(60)));
228        out.push_str(content);
229        out
230    }
231
232    fn format_output_markdown(&self, result: &ToolResult) -> String {
233        let output = result.as_text();
234        let (metadata, content) = parse_fetch_header(&output);
235
236        if metadata.is_empty() {
237            return output.to_string();
238        }
239
240        let mut out = String::new();
241        let title = metadata
242            .iter()
243            .find(|(k, _)| *k == "Title")
244            .map(|(_, v)| *v);
245
246        if let Some(t) = title {
247            out.push_str(&format!("## {}\n\n", t));
248        }
249
250        for (key, value) in &metadata {
251            if *key != "Title" {
252                out.push_str(&format!("- **{}**: {}\n", key, value));
253            }
254        }
255
256        out.push_str("\n---\n\n");
257        out.push_str(content);
258        out
259    }
260
261    async fn execute(&self, input: Self::Input) -> std::result::Result<ToolResult, ToolError> {
262        // Parse URL
263        let url =
264            Url::parse(&input.url).map_err(|e| ToolError::from(format!("Invalid URL: {}", e)))?;
265
266        // Check robots.txt compliance unless force is set
267        if !input.force {
268            let allowed = self
269                .check_robots_txt(&url, &input.user_agent)
270                .await
271                .map_err(|e| ToolError::from(format!("Robots.txt check failed: {}", e)))?;
272
273            if !allowed {
274                return Err(format!(
275                    "Access to {} is disallowed by robots.txt for user-agent '{}'",
276                    input.url, input.user_agent
277                )
278                .into());
279            }
280        }
281
282        // Fetch the URL
283        let response = tokio::time::timeout(
284            Duration::from_secs(input.timeout_seconds),
285            self.client
286                .get(input.url.clone())
287                .header("User-Agent", &input.user_agent)
288                .send(),
289        )
290        .await
291        .map_err(|_| format!("Request timed out after {} seconds", input.timeout_seconds))?
292        .map_err(|e| ToolError::from(format!("Failed to fetch URL: {}", e)))?;
293
294        // Check response status
295        if !response.status().is_success() {
296            return Err(format!(
297                "HTTP error: {} {}",
298                response.status().as_u16(),
299                response.status().canonical_reason().unwrap_or("Unknown")
300            )
301            .into());
302        }
303
304        // Get the HTML content
305        let html = response
306            .text()
307            .await
308            .map_err(|e| ToolError::from(format!("Failed to read response body: {}", e)))?;
309
310        // Extract main content and title using Readability
311        let (title, content_html) = self.extract_content(&html, &input.url);
312
313        // Convert to markdown unless raw is requested
314        let processed_content = if input.raw {
315            content_html
316        } else {
317            self.html_to_markdown(&content_html)
318        };
319
320        // Apply pagination
321        let (final_content, is_truncated, total_length) =
322            self.paginate_content(processed_content, input.start_index, input.max_length);
323
324        // Format result
325        let mut result = String::new();
326        result.push_str(&format!("URL: {}\n", input.url));
327
328        if let Some(page_title) = title {
329            result.push_str(&format!("Title: {}\n", page_title.trim()));
330        }
331
332        result.push_str(&format!("Content Length: {} characters\n", total_length));
333
334        if is_truncated {
335            let start = input.start_index.unwrap_or(0);
336            let end = start + final_content.len();
337            result.push_str(&format!(
338                "Showing: characters {}-{} (truncated)\n",
339                start, end
340            ));
341        }
342
343        result.push_str("\n---\n\n");
344        result.push_str(&final_content);
345
346        Ok(result.into())
347    }
348}
349
350/// Parse fetch output header into metadata fields
351fn parse_fetch_header(output: &str) -> (Vec<(&str, &str)>, &str) {
352    let mut metadata = Vec::new();
353    let mut content_start = 0;
354
355    for (i, line) in output.lines().enumerate() {
356        if line == "---" {
357            // Find content after the separator
358            let lines: Vec<&str> = output.lines().collect();
359            if i + 1 < lines.len() {
360                // Calculate byte offset to content
361                let header_len: usize = lines[..=i].iter().map(|l| l.len() + 1).sum();
362                content_start = header_len;
363            }
364            break;
365        }
366
367        if let Some(colon_idx) = line.find(": ") {
368            let key = &line[..colon_idx];
369            let value = &line[colon_idx + 2..];
370            metadata.push((key, value));
371        }
372    }
373
374    let content = if content_start < output.len() {
375        output[content_start..].trim_start_matches('\n')
376    } else {
377        ""
378    };
379
380    (metadata, content)
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386    use wiremock::{
387        matchers::{method, path},
388        Mock, MockServer, ResponseTemplate,
389    };
390
391    /// Helper to create a FetchInput with sensible defaults for testing
392    fn test_input(url: impl Into<String>) -> FetchInput {
393        FetchInput {
394            url: url.into(),
395            user_agent: "test-agent".to_string(),
396            timeout_seconds: 30,
397            raw: false,
398            force: false,
399            start_index: None,
400            max_length: None,
401        }
402    }
403
404    // ==================== Default and constructor tests ====================
405
406    #[test]
407    fn test_default() {
408        let tool: FetchTool = Default::default();
409        assert_eq!(tool.name(), "fetch");
410    }
411
412    #[test]
413    fn test_tool_name() {
414        let tool = FetchTool::new();
415        assert_eq!(tool.name(), "fetch");
416    }
417
418    #[test]
419    fn test_tool_description() {
420        let tool = FetchTool::new();
421        assert!(!tool.description().is_empty());
422        assert!(tool.description().contains("Fetch"));
423    }
424
425    // ==================== Default value function tests ====================
426
427    #[test]
428    fn test_default_max_length() {
429        assert_eq!(default_max_length(), Some(5000));
430    }
431
432    #[test]
433    fn test_default_user_agent() {
434        let ua = default_user_agent();
435        assert!(ua.contains("mixtape"));
436    }
437
438    #[test]
439    fn test_default_timeout() {
440        assert_eq!(default_timeout(), 30);
441    }
442
443    // ==================== parse_fetch_header tests ====================
444
445    #[test]
446    fn test_parse_fetch_header_complete() {
447        let output = "URL: https://example.com\nTitle: Test Page\nContent Length: 1000 characters\nShowing: characters 0-500 (truncated)\n\n---\n\nThis is the content.";
448        let (metadata, content) = parse_fetch_header(output);
449
450        assert_eq!(metadata.len(), 4);
451        assert_eq!(metadata[0], ("URL", "https://example.com"));
452        assert_eq!(metadata[1], ("Title", "Test Page"));
453        assert_eq!(metadata[2], ("Content Length", "1000 characters"));
454        assert_eq!(metadata[3], ("Showing", "characters 0-500 (truncated)"));
455        assert!(content.contains("This is the content"));
456    }
457
458    #[test]
459    fn test_parse_fetch_header_no_separator() {
460        let output = "Just plain content without headers";
461        let (metadata, content) = parse_fetch_header(output);
462
463        // Without "---" separator, content_start stays at 0
464        // so the entire output is returned as content
465        assert!(metadata.is_empty());
466        // Content is the full output when there's no "---" separator
467        assert_eq!(content, output);
468    }
469
470    #[test]
471    fn test_parse_fetch_header_with_metadata_no_separator() {
472        // Has metadata-like content but no "---" separator
473        let output = "URL: https://example.com\nTitle: Test";
474        let (metadata, content) = parse_fetch_header(output);
475
476        // Metadata is extracted even without separator
477        assert_eq!(metadata.len(), 2);
478        assert_eq!(metadata[0], ("URL", "https://example.com"));
479        // But content includes everything since content_start=0
480        assert!(content.contains("URL:"));
481    }
482
483    #[test]
484    fn test_parse_fetch_header_minimal() {
485        let output = "URL: https://example.com\n\n---\n\nContent";
486        let (metadata, content) = parse_fetch_header(output);
487
488        assert_eq!(metadata.len(), 1);
489        assert_eq!(metadata[0], ("URL", "https://example.com"));
490        assert!(content.contains("Content"));
491    }
492
493    #[test]
494    fn test_parse_fetch_header_empty() {
495        let output = "";
496        let (metadata, content) = parse_fetch_header(output);
497
498        assert!(metadata.is_empty());
499        assert_eq!(content, "");
500    }
501
502    #[test]
503    fn test_parse_fetch_header_no_content_after_separator() {
504        // When "---" is the last line with no content after it
505        // The implementation doesn't update content_start if there's nothing after "---"
506        // so content includes everything (content_start stays 0)
507        let output = "URL: https://example.com\n---";
508        let (metadata, content) = parse_fetch_header(output);
509
510        assert_eq!(metadata.len(), 1);
511        // Due to implementation: content includes the whole output since content_start stays 0
512        // This is a quirk - when "---" is the last line, i+1 < lines.len() is false
513        assert!(content.contains("URL:"));
514    }
515
516    #[test]
517    fn test_parse_fetch_header_content_after_separator() {
518        // When there IS content after "---", content_start is properly set
519        let output = "URL: https://example.com\n---\nBody content here";
520        let (metadata, content) = parse_fetch_header(output);
521
522        assert_eq!(metadata.len(), 1);
523        assert_eq!(metadata[0], ("URL", "https://example.com"));
524        // Content should be just the body, not the URL
525        assert_eq!(content, "Body content here");
526        assert!(!content.contains("URL:"));
527    }
528
529    // ==================== format_output_plain tests ====================
530
531    #[test]
532    fn test_format_output_plain_no_metadata() {
533        let tool = FetchTool::new();
534        let result: ToolResult = "Plain content without headers".into();
535
536        let formatted = tool.format_output_plain(&result);
537        assert_eq!(formatted, "Plain content without headers");
538    }
539
540    #[test]
541    fn test_format_output_plain_with_metadata() {
542        let tool = FetchTool::new();
543        let result: ToolResult = "URL: https://example.com\nTitle: Test\nContent Length: 100 characters\n\n---\n\nContent here".into();
544
545        let formatted = tool.format_output_plain(&result);
546
547        // Should have separator line
548        assert!(formatted.contains("─"));
549        // Should have icon indicators
550        assert!(
551            formatted.contains("[>]") || formatted.contains("[#]") || formatted.contains("[=]")
552        );
553        // Should have content
554        assert!(formatted.contains("Content here"));
555    }
556
557    #[test]
558    fn test_format_output_plain_icons() {
559        let tool = FetchTool::new();
560        let result: ToolResult = "URL: https://example.com\nTitle: Test Title\nContent Length: 500 characters\nShowing: 0-100\n\n---\n\nBody".into();
561
562        let formatted = tool.format_output_plain(&result);
563
564        // Check icons for different metadata types
565        assert!(formatted.contains("[>]")); // URL
566        assert!(formatted.contains("[#]")); // Title
567        assert!(formatted.contains("[=]")); // Content Length
568        assert!(formatted.contains("[~]")); // Showing
569    }
570
571    // ==================== format_output_ansi tests ====================
572
573    #[test]
574    fn test_format_output_ansi_no_metadata() {
575        let tool = FetchTool::new();
576        let result: ToolResult = "Plain content".into();
577
578        let formatted = tool.format_output_ansi(&result);
579        assert_eq!(formatted, "Plain content");
580    }
581
582    #[test]
583    fn test_format_output_ansi_with_metadata() {
584        let tool = FetchTool::new();
585        let result: ToolResult = "URL: https://example.com\nTitle: Test\n\n---\n\nContent".into();
586
587        let formatted = tool.format_output_ansi(&result);
588
589        // Should contain ANSI escape codes
590        assert!(formatted.contains("\x1b["));
591        // Should have dimmed separator
592        assert!(formatted.contains("\x1b[2m"));
593    }
594
595    #[test]
596    fn test_format_output_ansi_colors() {
597        let tool = FetchTool::new();
598        let result: ToolResult = "URL: https://example.com\nTitle: Test\nContent Length: 100 characters\nShowing: 0-50\n\n---\n\nBody".into();
599
600        let formatted = tool.format_output_ansi(&result);
601
602        // URL should be blue
603        assert!(formatted.contains("\x1b[34m"));
604        // Title should be bold
605        assert!(formatted.contains("\x1b[1m"));
606        // Content Length should be green
607        assert!(formatted.contains("\x1b[32m"));
608        // Showing should be cyan
609        assert!(formatted.contains("\x1b[36m"));
610    }
611
612    // ==================== format_output_markdown tests ====================
613
614    #[test]
615    fn test_format_output_markdown_no_metadata() {
616        let tool = FetchTool::new();
617        let result: ToolResult = "Plain content".into();
618
619        let formatted = tool.format_output_markdown(&result);
620        assert_eq!(formatted, "Plain content");
621    }
622
623    #[test]
624    fn test_format_output_markdown_with_title() {
625        let tool = FetchTool::new();
626        let result: ToolResult =
627            "URL: https://example.com\nTitle: My Page Title\n\n---\n\nContent".into();
628
629        let formatted = tool.format_output_markdown(&result);
630
631        // Title should be a heading
632        assert!(formatted.contains("## My Page Title"));
633    }
634
635    #[test]
636    fn test_format_output_markdown_metadata_as_list() {
637        let tool = FetchTool::new();
638        let result: ToolResult =
639            "URL: https://example.com\nTitle: Test\nContent Length: 500 characters\n\n---\n\nBody"
640                .into();
641
642        let formatted = tool.format_output_markdown(&result);
643
644        // Non-title metadata should be in list format with bold keys
645        assert!(formatted.contains("- **URL**: https://example.com"));
646        assert!(formatted.contains("- **Content Length**: 500 characters"));
647        // Title should NOT be in the list (it's a heading)
648        assert!(!formatted.contains("- **Title**"));
649    }
650
651    #[test]
652    fn test_format_output_markdown_separator() {
653        let tool = FetchTool::new();
654        let result: ToolResult = "URL: https://example.com\n\n---\n\nBody content".into();
655
656        let formatted = tool.format_output_markdown(&result);
657
658        // Should have horizontal rule separator
659        assert!(formatted.contains("---"));
660        // Should have content
661        assert!(formatted.contains("Body content"));
662    }
663
664    // ==================== paginate_content edge cases ====================
665
666    #[test]
667    fn test_paginate_content_start_beyond_length() {
668        let tool = FetchTool::new();
669        let content = "Short".to_string();
670
671        let (result, truncated, total) = tool.paginate_content(content, Some(100), Some(10));
672
673        assert_eq!(result, "");
674        assert!(!truncated);
675        assert_eq!(total, 5);
676    }
677
678    #[test]
679    fn test_paginate_content_exact_length() {
680        let tool = FetchTool::new();
681        let content = "12345".to_string();
682
683        let (result, truncated, total) = tool.paginate_content(content, Some(0), Some(5));
684
685        assert_eq!(result, "12345");
686        assert!(!truncated);
687        assert_eq!(total, 5);
688    }
689
690    // ==================== Integration tests ====================
691
692    #[test]
693    fn test_extract_content() {
694        let tool = FetchTool::new();
695        let html = r#"
696            <html>
697                <head><title>Test Page</title></head>
698                <body>
699                    <nav>Navigation</nav>
700                    <article>
701                        <h1>Main Content</h1>
702                        <p>This is the article content.</p>
703                    </article>
704                    <footer>Footer</footer>
705                </body>
706            </html>
707        "#;
708
709        let (title, content) = tool.extract_content(html, "https://example.com/test");
710        // Readability may extract the H1 as title if it's more prominent than <title>
711        assert_eq!(title, Some("Main Content".to_string()));
712        // The content should include the article body
713        assert!(content.contains("This is the article content"));
714        // Navigation and footer should be removed by readability
715        assert!(!content.contains("Navigation") || content.len() < html.len());
716    }
717
718    #[test]
719    fn test_paginate_content() {
720        let tool = FetchTool::new();
721        let content = "0123456789".to_string();
722
723        // Full content
724        let (result, truncated, total) = tool.paginate_content(content.clone(), None, None);
725        assert_eq!(result, "0123456789");
726        assert!(!truncated);
727        assert_eq!(total, 10);
728
729        // Paginated
730        let (result, truncated, total) = tool.paginate_content(content.clone(), Some(2), Some(5));
731        assert_eq!(result, "23456");
732        assert!(truncated);
733        assert_eq!(total, 10);
734
735        // Last page
736        let (result, truncated, total) = tool.paginate_content(content.clone(), Some(5), Some(10));
737        assert_eq!(result, "56789");
738        assert!(!truncated);
739        assert_eq!(total, 10);
740    }
741
742    #[test]
743    fn test_html_to_markdown() {
744        let tool = FetchTool::new();
745        let html = "<h1>Title</h1><p>Paragraph with <strong>bold</strong> text.</p>";
746        let markdown = tool.html_to_markdown(html);
747
748        assert!(markdown.contains("Title"));
749        assert!(markdown.contains("Paragraph"));
750        assert!(markdown.contains("bold"));
751    }
752
753    // ===== Tests with wiremock for execute() method =====
754
755    #[tokio::test]
756    async fn test_fetch_successful_html() {
757        let mock_server = MockServer::start().await;
758
759        let html_body = r#"
760            <html>
761                <head><title>Test Article</title></head>
762                <body>
763                    <article>
764                        <h1>Main Heading</h1>
765                        <p>This is the main content of the article.</p>
766                    </article>
767                </body>
768            </html>
769        "#;
770
771        Mock::given(method("GET"))
772            .and(path("/test"))
773            .respond_with(ResponseTemplate::new(200).set_body_string(html_body))
774            .mount(&mock_server)
775            .await;
776
777        let tool = FetchTool::new();
778        let input = test_input(format!("{}/test", mock_server.uri()));
779
780        let result = tool.execute(input).await.unwrap();
781        let output = result.as_text();
782
783        assert!(output.contains("URL:"));
784        assert!(output.contains("Title: Main Heading"));
785        assert!(output.contains("main content"));
786    }
787
788    #[tokio::test]
789    async fn test_fetch_404_error() {
790        let mock_server = MockServer::start().await;
791
792        Mock::given(method("GET"))
793            .and(path("/notfound"))
794            .respond_with(ResponseTemplate::new(404))
795            .mount(&mock_server)
796            .await;
797
798        let tool = FetchTool::new();
799        let input = test_input(format!("{}/notfound", mock_server.uri()));
800
801        let result = tool.execute(input).await;
802        assert!(result.is_err());
803        let err = result.unwrap_err().to_string();
804        assert!(err.contains("HTTP error") || err.contains("404"));
805    }
806
807    #[tokio::test]
808    async fn test_fetch_timeout() {
809        let mock_server = MockServer::start().await;
810
811        Mock::given(method("GET"))
812            .and(path("/slow"))
813            .respond_with(
814                ResponseTemplate::new(200)
815                    .set_delay(std::time::Duration::from_millis(1500))
816                    .set_body_string("<html><body>Slow</body></html>"),
817            )
818            .mount(&mock_server)
819            .await;
820
821        let tool = FetchTool::new();
822        let mut input = test_input(format!("{}/slow", mock_server.uri()));
823        input.timeout_seconds = 1; // Short timeout
824
825        let result = tool.execute(input).await;
826        assert!(result.is_err());
827        let err = result.unwrap_err().to_string();
828        assert!(err.contains("timeout") || err.contains("timed out"));
829    }
830
831    #[tokio::test]
832    async fn test_fetch_raw_mode() {
833        let mock_server = MockServer::start().await;
834
835        let html_body = "<html><body><h1>Raw HTML</h1><p>Content</p></body></html>";
836
837        Mock::given(method("GET"))
838            .and(path("/raw"))
839            .respond_with(ResponseTemplate::new(200).set_body_string(html_body))
840            .mount(&mock_server)
841            .await;
842
843        let tool = FetchTool::new();
844        let mut input = test_input(format!("{}/raw", mock_server.uri()));
845        input.raw = true;
846
847        let result = tool.execute(input).await.unwrap();
848        let output = result.as_text();
849
850        // Should contain HTML tags when in raw mode
851        assert!(output.contains("<h1>") || output.contains("Raw HTML"));
852    }
853
854    #[tokio::test]
855    async fn test_fetch_with_pagination() {
856        let mock_server = MockServer::start().await;
857
858        let html_body = r#"
859            <html>
860                <body>
861                    <article>
862                        <p>This is a very long article with lots of content that will be paginated.</p>
863                    </article>
864                </body>
865            </html>
866        "#;
867
868        Mock::given(method("GET"))
869            .and(path("/paginated"))
870            .respond_with(ResponseTemplate::new(200).set_body_string(html_body))
871            .mount(&mock_server)
872            .await;
873
874        let tool = FetchTool::new();
875        let mut input = test_input(format!("{}/paginated", mock_server.uri()));
876        input.start_index = Some(0);
877        input.max_length = Some(50);
878
879        let result = tool.execute(input).await.unwrap();
880        let output = result.as_text();
881
882        assert!(output.contains("Showing:") || output.contains("truncated"));
883    }
884
885    #[tokio::test]
886    async fn test_fetch_invalid_url() {
887        let tool = FetchTool::new();
888        let input = test_input("not-a-valid-url");
889
890        let result = tool.execute(input).await;
891        assert!(result.is_err());
892        let err = result.unwrap_err().to_string();
893        assert!(err.contains("Invalid URL") || err.contains("scheme"));
894    }
895
896    #[tokio::test]
897    async fn test_fetch_disallowed_scheme() {
898        let tool = FetchTool::new();
899        let input = test_input("file:///etc/passwd");
900
901        let result = tool.execute(input).await;
902        assert!(result.is_err());
903        // The error message may vary based on implementation
904        // Just verify it fails for non-HTTP schemes
905    }
906}
mixtape_tools/fetch/fetch_tool.rs

mixtape_tools/fetch/
fetch_tool.rs