Skip to main content

mcp_postgres/actions/
data_io.rs

1use crate::errors::Result as MCPResult;
2use crate::validation::quote_ident;
3use futures::SinkExt;
4use futures::StreamExt;
5use serde_json::{Value, json};
6use std::time::Duration;
7use tokio_postgres::Client;
8
9/// Cap on the response body fetched by `import_from_url` (100 MiB).
10const MAX_IMPORT_BYTES: usize = 100 * 1024 * 1024;
11/// Cap on the CSV produced by `export_csv` (100 MiB) to bound memory.
12const MAX_EXPORT_BYTES: usize = 100 * 1024 * 1024;
13/// Timeout for the outbound fetch in `import_from_url`.
14const IMPORT_FETCH_TIMEOUT: Duration = Duration::from_secs(30);
15
16pub async fn import_from_url(client: &Client, params: &Option<&Value>) -> MCPResult<Value> {
17    let url = params
18        .as_ref()
19        .and_then(|p| p.get("url").and_then(|v| v.as_str()))
20        .ok_or_else(|| crate::errors::MCPError::InvalidParams("Missing 'url' parameter".into()))?;
21    let table = params
22        .as_ref()
23        .and_then(|p| p.get("table").and_then(|v| v.as_str()))
24        .ok_or_else(|| {
25            crate::errors::MCPError::InvalidParams("Missing 'table' parameter".into())
26        })?;
27    let schema = params
28        .as_ref()
29        .and_then(|p| p.get("schema").and_then(|v| v.as_str()))
30        .unwrap_or("public");
31    let delimiter = params
32        .as_ref()
33        .and_then(|p| p.get("delimiter").and_then(|v| v.as_str()))
34        .unwrap_or(",");
35    let header = params
36        .as_ref()
37        .and_then(|p| p.get("header").and_then(|v| v.as_bool()))
38        .unwrap_or(true);
39    let truncate = params
40        .as_ref()
41        .and_then(|p| p.get("truncate").and_then(|v| v.as_bool()))
42        .unwrap_or(false);
43    let columns = params
44        .as_ref()
45        .and_then(|p| p.get("columns").and_then(|v| v.as_str()));
46
47    // COPY requires a single-character delimiter; reject anything else so the
48    // value cannot smuggle extra COPY options.
49    if delimiter.chars().count() != 1 {
50        return Err(crate::errors::MCPError::InvalidParams(
51            "'delimiter' must be a single character".into(),
52        ));
53    }
54
55    // Validate the optional column list as identifiers and rebuild it quoted,
56    // instead of interpolating the raw string into the COPY statement.
57    let col_clause = match columns {
58        Some(c) => {
59            let mut quoted = Vec::new();
60            for col in c.split(',') {
61                let col = col.trim();
62                crate::validation::validate_identifier(col, "column")?;
63                quoted.push(quote_ident(col));
64            }
65            format!(" ({})", quoted.join(", "))
66        }
67        None => String::new(),
68    };
69
70    // SSRF guard: only http(s), and the host must resolve to a public address.
71    crate::ssrf::validate_import_url(url).await?;
72
73    let qualified = format!("{}.{}", quote_ident(schema), quote_ident(table));
74
75    if truncate {
76        client
77            .execute(&format!("TRUNCATE {}", qualified), &[])
78            .await?;
79    }
80
81    // Build the COPY SQL early so we can open the sink before the HTTP fetch.
82    let copy_sql = format!(
83        "COPY {} FROM STDIN (FORMAT csv, HEADER {}, DELIMITER '{}'){}",
84        qualified,
85        if header { "true" } else { "false" },
86        delimiter.replace('\'', "''"),
87        col_clause,
88    );
89
90    // Open the COPY sink first — chunks stream directly into it.
91    let mut sink = Box::pin(client.copy_in(&copy_sql).await?);
92
93    // Disable redirects (a 3xx could redirect to a blocked internal address)
94    // and bound the request time.
95    let http = reqwest::Client::builder()
96        .redirect(reqwest::redirect::Policy::none())
97        .timeout(IMPORT_FETCH_TIMEOUT)
98        .build()
99        .map_err(|e| {
100            crate::errors::MCPError::InvalidParams(format!("Failed to build HTTP client: {e}"))
101        })?;
102
103    let resp = http.get(url).send().await.map_err(|e| {
104        crate::errors::MCPError::InvalidParams(format!("Failed to fetch URL: {}", e))
105    })?;
106    let status = resp.status();
107    if !status.is_success() {
108        return Err(crate::errors::MCPError::InvalidParams(format!(
109            "URL returned HTTP {}",
110            status
111        )));
112    }
113
114    // Stream body chunks directly into the COPY sink instead of buffering
115    // the entire file. A hard size cap still bounds worst-case memory.
116    let mut stream = resp.bytes_stream();
117    let mut total_bytes: usize = 0;
118    while let Some(chunk) = stream.next().await {
119        let chunk = chunk.map_err(|e| {
120            crate::errors::MCPError::InvalidParams(format!("Failed to read response body: {}", e))
121        })?;
122        total_bytes += chunk.len();
123        if total_bytes > MAX_IMPORT_BYTES {
124            return Err(crate::errors::MCPError::InvalidParams(format!(
125                "Response body exceeds maximum import size of {} bytes",
126                MAX_IMPORT_BYTES
127            )));
128        }
129        sink.as_mut().send(chunk).await?;
130    }
131    // finish() flushes, ends the COPY, and returns the number of rows imported.
132    let count = sink.as_mut().finish().await?;
133
134    Ok(json!({
135        "success": true,
136        "table": table,
137        "schema": schema,
138        "rows_imported": count,
139        "source": url,
140    }))
141}
142
143pub async fn export_csv(client: &Client, params: &Option<&Value>) -> MCPResult<Value> {
144    let query = params
145        .as_ref()
146        .and_then(|p| p.get("query").and_then(|v| v.as_str()));
147    let table = params
148        .as_ref()
149        .and_then(|p| p.get("table").and_then(|v| v.as_str()));
150    let schema = params
151        .as_ref()
152        .and_then(|p| p.get("schema").and_then(|v| v.as_str()))
153        .unwrap_or("public");
154    let header = params
155        .as_ref()
156        .and_then(|p| p.get("header").and_then(|v| v.as_bool()))
157        .unwrap_or(true);
158    let delimiter = params
159        .as_ref()
160        .and_then(|p| p.get("delimiter").and_then(|v| v.as_str()))
161        .unwrap_or(",");
162    let limit = params
163        .as_ref()
164        .and_then(|p| p.get("limit").and_then(|v| v.as_i64()))
165        .unwrap_or(10000)
166        .min(100000);
167
168    if delimiter.chars().count() != 1 {
169        return Err(crate::errors::MCPError::InvalidParams(
170            "'delimiter' must be a single character".into(),
171        ));
172    }
173
174    let sql = match (query, table) {
175        (Some(q), _) => {
176            crate::actions::query::validate_sql(q, "SELECT", "SELECT")?;
177            let trimmed = q.trim();
178            format!("({}) AS _export", trimmed.trim_end_matches(';'))
179        }
180        (None, Some(t)) => format!("{}.{}", quote_ident(schema), quote_ident(t)),
181        (None, None) => {
182            return Err(crate::errors::MCPError::InvalidParams(
183                "Either 'query' or 'table' is required".into(),
184            ));
185        }
186    };
187
188    let copy_sql = format!(
189        "COPY {} TO STDOUT (FORMAT csv, HEADER {}, DELIMITER '{}', LIMIT {})",
190        sql,
191        if header { "true" } else { "false" },
192        delimiter.replace('\'', "''"),
193        limit,
194    );
195
196    let stream = client.copy_out(&copy_sql).await?;
197    let mut stream = Box::pin(stream);
198    let mut output = Vec::new();
199    while let Some(chunk) = stream.next().await {
200        let chunk = chunk?;
201        if output.len() + chunk.len() > MAX_EXPORT_BYTES {
202            return Err(crate::errors::MCPError::InvalidParams(format!(
203                "Export exceeds maximum size of {} bytes; narrow the query or lower the limit",
204                MAX_EXPORT_BYTES
205            )));
206        }
207        output.extend_from_slice(&chunk);
208    }
209
210    let csv_text = String::from_utf8(output).map_err(|e| {
211        crate::errors::MCPError::InvalidParams(format!("Output is not valid UTF-8: {}", e))
212    })?;
213
214    Ok(json!({
215        "csv": csv_text,
216        "row_count": csv_text.lines().count().saturating_sub(if header { 1 } else { 0 }),
217        "format": "csv",
218    }))
219}