Skip to main content

fetchium_mcp/
tools.rs

1//! MCP tool schema definitions (PRD §30).
2//!
3//! Defines the 5 composite Fetchium tools and their JSON Schema input types.
4
5use serde::Deserialize;
6use serde_json::{json, Value};
7
8/// Return the JSON Schema definitions for all 5 composite MCP tools.
9pub fn tool_definitions() -> Vec<Value> {
10    vec![
11        json!({
12            "name": "fetchium_search",
13            "description": "Search the web and return token-efficient results. Handles the full pipeline: multi-backend search, ranking, deduplication, validation, and token budgeting in a single call.",
14            "inputSchema": {
15                "type": "object",
16                "properties": {
17                    "query": { "type": "string", "description": "The search query" },
18                    "token_budget": { "type": "integer", "description": "Maximum tokens in response (default: 2000)" },
19                    "tier": { "type": "string", "enum": ["key_facts", "summary", "detailed", "complete"], "description": "Detail level (default: summary)" },
20                    "max_sources": { "type": "integer", "description": "Maximum number of sources to include (default: 10)" }
21                },
22                "required": ["query"]
23            }
24        }),
25        json!({
26            "name": "fetchium_fetch",
27            "description": "Fetch a URL with query-aware extraction. Extracts only content relevant to the query, within the token budget. Far more efficient than raw scraping.",
28            "inputSchema": {
29                "type": "object",
30                "properties": {
31                    "url": { "type": "string", "description": "The URL to fetch" },
32                    "query": { "type": "string", "description": "Extract only content relevant to this query (optional but recommended)" },
33                    "token_budget": { "type": "integer", "description": "Maximum tokens in response (default: 3000)" },
34                    "format": { "type": "string", "enum": ["markdown", "segments", "json"], "description": "Output format (default: markdown)" }
35                },
36                "required": ["url"]
37            }
38        }),
39        json!({
40            "name": "fetchium_research",
41            "description": "Conduct multi-source research with citations and evidence tracking. Searches, extracts, ranks, validates, and synthesizes findings with full citation chains.",
42            "inputSchema": {
43                "type": "object",
44                "properties": {
45                    "query": { "type": "string", "description": "The research query" },
46                    "token_budget": { "type": "integer", "description": "Maximum tokens in response (default: 4000)" },
47                    "depth": { "type": "string", "enum": ["shallow", "standard", "deep"], "description": "Research depth (default: standard)" },
48                    "max_sources": { "type": "integer", "description": "Maximum number of sources to analyze (default: 10)" },
49                    "strict_evidence": { "type": "boolean", "description": "Require citation for every claim (default: false)" },
50                    "citation_style": { "type": "string", "description": "Citation style: inline, apa, ieee (default: inline)" }
51                },
52                "required": ["query"]
53            }
54        }),
55        json!({
56            "name": "fetchium_estimate",
57            "description": "Estimate the token cost of fetching a URL without actually fetching it. Use this before committing tokens.",
58            "inputSchema": {
59                "type": "object",
60                "properties": {
61                    "url": { "type": "string", "description": "The URL to estimate" }
62                },
63                "required": ["url"]
64            }
65        }),
66        json!({
67            "name": "fetchium_expand",
68            "description": "Get more detail on a previous result using its result_id and Progressive Detail Streaming (PDS). Expands from key_facts to summary to detailed to complete without re-fetching.",
69            "inputSchema": {
70                "type": "object",
71                "properties": {
72                    "result_id": { "type": "string", "description": "The result_id from a previous search or fetch call" },
73                    "tier": { "type": "string", "enum": ["key_facts", "summary", "detailed", "complete"], "description": "The detail tier to expand to" }
74                },
75                "required": ["result_id", "tier"]
76            }
77        }),
78        json!({
79            "name": "youtube_search",
80            "description": "Search YouTube videos with VideoFusion ranking. Returns ranked videos with relevance, freshness, authority, engagement, and educational scores.",
81            "inputSchema": {
82                "type": "object",
83                "properties": {
84                    "query": { "type": "string", "description": "The search query" },
85                    "max_results": { "type": "integer", "description": "Maximum videos to return (default: 5)" },
86                    "fact_check": { "type": "boolean", "description": "Enable cross-video fact checking (default: false)" }
87                },
88                "required": ["query"]
89            }
90        }),
91        json!({
92            "name": "youtube_analyze",
93            "description": "Analyze a single YouTube video: metadata, transcript, comments, credibility, clickbait detection, and educational scoring.",
94            "inputSchema": {
95                "type": "object",
96                "properties": {
97                    "url": { "type": "string", "description": "The YouTube video URL" },
98                    "transcript": { "type": "boolean", "description": "Fetch transcript (default: true)" },
99                    "comments": { "type": "boolean", "description": "Fetch comments (default: true)" },
100                    "teaching": { "type": "boolean", "description": "Generate teaching content (default: false)" }
101                },
102                "required": ["url"]
103            }
104        }),
105        json!({
106            "name": "youtube_watch",
107            "description": "Unified YouTube watch report: metadata + transcript + summary + key moments + comment signals in one payload.",
108            "inputSchema": {
109                "type": "object",
110                "properties": {
111                    "url": { "type": "string", "description": "The YouTube video URL" },
112                    "transcript": { "type": "boolean", "description": "Fetch transcript (default: true)" },
113                    "comments": { "type": "boolean", "description": "Fetch comments (default: true)" },
114                    "highlights": { "type": "integer", "description": "Top key moments to include (default: 5)" }
115                },
116                "required": ["url"]
117            }
118        }),
119        json!({
120            "name": "youtube_transcript",
121            "description": "Extract universal transcript (YouTube fast path + fallback), with key moments and quality score.",
122            "inputSchema": {
123                "type": "object",
124                "properties": {
125                    "url": { "type": "string", "description": "Video URL" },
126                    "highlights": { "type": "integer", "description": "Top key moments to include (default: 5)" }
127                },
128                "required": ["url"]
129            }
130        }),
131        json!({
132            "name": "social_research",
133            "description": "Unified cross-platform social media research: Twitter/X, Reddit, TikTok, HackerNews, YouTube simultaneously. Returns trends, viral content, and content ideas.",
134            "inputSchema": {
135                "type": "object",
136                "properties": {
137                    "query": { "type": "string", "description": "The research query or topic" },
138                    "platforms": { "type": "array", "items": { "type": "string", "enum": ["twitter", "reddit", "tiktok", "hackernews", "youtube"] }, "description": "Platforms to include (default: all)" },
139                    "max_per_platform": { "type": "integer", "description": "Max posts per platform (default: 20)" },
140                    "generate_ideas": { "type": "boolean", "description": "Generate content ideas (default: true)" }
141                },
142                "required": ["query"]
143            }
144        }),
145        json!({
146            "name": "reddit_search",
147            "description": "Search Reddit posts with sentiment analysis, subreddit clustering, and viral detection. Uses the free public JSON API.",
148            "inputSchema": {
149                "type": "object",
150                "properties": {
151                    "query": { "type": "string", "description": "Search query" },
152                    "subreddits": { "type": "array", "items": { "type": "string" }, "description": "Specific subreddits to search (optional)" },
153                    "max_posts": { "type": "integer", "description": "Maximum posts (default: 25)" }
154                },
155                "required": ["query"]
156            }
157        }),
158        json!({
159            "name": "hackernews_search",
160            "description": "Search Hacker News stories via Algolia + Firebase APIs. Free, no rate limits, returns ranked stories with engagement metrics.",
161            "inputSchema": {
162                "type": "object",
163                "properties": {
164                    "query": { "type": "string", "description": "Search query" },
165                    "max_results": { "type": "integer", "description": "Maximum stories (default: 20)" }
166                },
167                "required": ["query"]
168            }
169        }),
170    ]
171}
172
173// ─── Input structs ────────────────────────────────────────────────
174
175#[derive(Debug, Deserialize)]
176pub struct SearchInput {
177    pub query: String,
178    pub token_budget: Option<usize>,
179    pub tier: Option<String>,
180    pub max_sources: Option<usize>,
181    pub include_content: Option<bool>,
182}
183
184#[derive(Debug, Deserialize)]
185pub struct FetchInput {
186    pub url: String,
187    pub query: Option<String>,
188    pub token_budget: Option<usize>,
189    pub format: Option<String>,
190}
191
192#[derive(Debug, Deserialize)]
193pub struct ResearchInput {
194    pub query: String,
195    pub token_budget: Option<usize>,
196    pub max_sources: Option<usize>,
197    pub depth: Option<String>,
198    pub strict_evidence: Option<bool>,
199    pub citation_style: Option<String>,
200}
201
202#[derive(Debug, Deserialize)]
203pub struct EstimateInput {
204    pub url: String,
205}
206
207#[derive(Debug, Deserialize)]
208pub struct ExpandInput {
209    pub result_id: String,
210    pub tier: String,
211}
212
213#[derive(Debug, Deserialize)]
214pub struct YouTubeSearchInput {
215    pub query: String,
216    pub max_results: Option<usize>,
217    pub fact_check: Option<bool>,
218}
219
220#[derive(Debug, Deserialize)]
221pub struct YouTubeAnalyzeInput {
222    pub url: String,
223    pub transcript: Option<bool>,
224    pub comments: Option<bool>,
225    pub teaching: Option<bool>,
226}
227
228#[derive(Debug, Deserialize)]
229pub struct YouTubeWatchInput {
230    pub url: String,
231    pub transcript: Option<bool>,
232    pub comments: Option<bool>,
233    pub highlights: Option<usize>,
234}
235
236#[derive(Debug, Deserialize)]
237pub struct YouTubeTranscriptInput {
238    pub url: String,
239    pub highlights: Option<usize>,
240}
241
242#[derive(Debug, Deserialize)]
243pub struct SocialResearchInput {
244    pub query: String,
245    pub platforms: Option<Vec<String>>,
246    pub max_per_platform: Option<usize>,
247    pub generate_ideas: Option<bool>,
248}
249
250#[derive(Debug, Deserialize)]
251pub struct RedditSearchInput {
252    pub query: String,
253    pub subreddits: Option<Vec<String>>,
254    pub max_posts: Option<usize>,
255}
256
257#[derive(Debug, Deserialize)]
258pub struct HackerNewsSearchInput {
259    pub query: String,
260    pub max_results: Option<usize>,
261}