gigi-cli 1.0.0

Gigi — A Claude Code-like AI coding assistant CLI in Rust
use anyhow::Result;
use async_trait::async_trait;
use reqwest::header::USER_AGENT;
use serde::{Deserialize, Serialize};
use serde_json::Value;

use super::{Tool, ToolOutput};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebFetchInput {
    pub url: String,
    pub prompt: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebFetchOutput {
    pub url: String,
    pub text: String,
}

pub struct WebFetchTool {
    client: reqwest::Client,
}

impl WebFetchTool {
    pub fn new() -> Self {
        Self {
            client: reqwest::Client::new(),
        }
    }
}

impl Default for WebFetchTool {
    fn default() -> Self {
        Self::new()
    }
}

// Remove the content and boundaries of specific tags (like <script> and <style>)
fn remove_tag_content(html: &str, tag_name: &str) -> String {
    let open_tag = format!("<{}", tag_name);
    let close_tag = format!("</{}", tag_name);
    
    let mut result = String::new();
    let mut cursor = 0;
    
    while let Some(start_idx) = html[cursor..].to_lowercase().find(&open_tag) {
        let absolute_start = cursor + start_idx;
        result.push_str(&html[cursor..absolute_start]);
        
        let remaining = &html[absolute_start..];
        if let Some(end_idx) = remaining.to_lowercase().find(&close_tag) {
            let absolute_end = absolute_start + end_idx + close_tag.len();
            // Move cursor past the closing tag (e.g. </script>)
            if let Some(tag_closing_bracket) = html[absolute_end..].find('>') {
                cursor = absolute_end + tag_closing_bracket + 1;
            } else {
                cursor = absolute_end;
            }
        } else {
            // Unmatched opening tag, discard the rest
            cursor = html.len();
            break;
        }
    }
    
    if cursor < html.len() {
        result.push_str(&html[cursor..]);
    }
    result
}

// Strip all other HTML tags, leaving the text content
fn strip_all_tags(html: &str) -> String {
    let mut result = String::new();
    let mut in_tag = false;
    
    for ch in html.chars() {
        if ch == '<' {
            in_tag = true;
        } else if ch == '>' {
            in_tag = false;
            // Add a space to separate words when stripping tags
            result.push(' ');
        } else if !in_tag {
            result.push(ch);
        }
    }
    
    // Normalize spaces and decode entities
    let cleaned = result
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#x27;", "'")
        .replace("&#39;", "'")
        .replace("&nbsp;", " ");
        
    let mut normalized = String::new();
    let mut last_was_whitespace = false;
    
    for ch in cleaned.chars() {
        if ch.is_whitespace() {
            if !last_was_whitespace {
                normalized.push(' ');
                last_was_whitespace = true;
            }
        } else {
            normalized.push(ch);
            last_was_whitespace = false;
        }
    }
    
    normalized.trim().to_string()
}

#[async_trait]
impl Tool for WebFetchTool {
    fn name(&self) -> &str {
        "WebFetch"
    }

    fn description(&self) -> &str {
        "Fetch a URL and convert it into readable text."
    }

    fn parameters_schema(&self) -> Value {
        serde_json::json!({
            "type": "object",
            "properties": {
                "url": { "type": "string", "format": "uri" },
                "prompt": { "type": "string" }
            },
            "required": ["url", "prompt"],
            "additionalProperties": false
        })
    }

    async fn execute(&self, input: Value) -> Result<ToolOutput> {
        let fetch_input: WebFetchInput = serde_json::from_value(input)?;

        let res = self
            .client
            .get(&fetch_input.url)
            .header(
                USER_AGENT,
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
                 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            )
            .send()
            .await?;

        let html_content = res.text().await?;
        
        let mut text = remove_tag_content(&html_content, "script");
        text = remove_tag_content(&text, "style");
        text = strip_all_tags(&text);
        
        // Truncate to stay within safety boundaries (max 100,000 chars, ~20k tokens)
        if text.len() > 100_000 {
            text.truncate(100_000);
            text.push_str("\n\n[Content truncated — exceeded 100,000 characters]");
        }

        let output = WebFetchOutput {
            url: fetch_input.url,
            text,
        };

        let serialized = serde_json::to_string_pretty(&output)?;
        Ok(ToolOutput::success(serialized))
    }
}