Skip to main content

sh_layer3/builtin_tools/
data_processing.rs

1//! # Data Processing Tools
2//!
3//! 数据处理工具集:JSON/YAML/TOML 解析与处理。
4
5use crate::builtin_tools::BuiltinTool;
6use crate::types::{Layer3Result, ToolCategory};
7use async_trait::async_trait;
8use base64::Engine;
9use serde_json::Value;
10
11// ============================================================================
12// JSON Parse Tool
13// ============================================================================
14
15/// JSON 解析工具
16pub struct JsonParseTool;
17
18#[async_trait]
19impl BuiltinTool for JsonParseTool {
20    fn name(&self) -> &str {
21        "json_parse"
22    }
23
24    fn description(&self) -> &str {
25        "Parse JSON string and return formatted output. Supports querying with JSONPath."
26    }
27
28    fn parameters_schema(&self) -> serde_json::Value {
29        serde_json::json!({
30            "type": "object",
31            "properties": {
32                "json": {
33                    "type": "string",
34                    "description": "JSON string to parse"
35                },
36                "query": {
37                    "type": "string",
38                    "description": "Optional JSONPath query (e.g., '$.data[0].name')"
39                }
40            },
41            "required": ["json"]
42        })
43    }
44
45    fn category(&self) -> ToolCategory {
46        ToolCategory::DataProcessing
47    }
48
49    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
50        let json_str = args["json"]
51            .as_str()
52            .ok_or_else(|| anyhow::anyhow!("Missing json parameter"))?;
53
54        let value: Value = serde_json::from_str(json_str)
55            .map_err(|e| anyhow::anyhow!("Failed to parse JSON: {}", e))?;
56
57        if let Some(query) = args["query"].as_str() {
58            // Simple JSONPath-like query
59            let result = query_json(&value, query)?;
60            Ok(serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string()))
61        } else {
62            Ok(serde_json::to_string_pretty(&value)
63                .map_err(|e| anyhow::anyhow!("Failed to format JSON: {}", e))?)
64        }
65    }
66}
67
68/// Simple JSONPath query implementation
69fn query_json(value: &Value, path: &str) -> Layer3Result<Value> {
70    let path = path.strip_prefix('$').unwrap_or(path);
71
72    // Parse path into parts - handle both `.key` and `[index]` syntax
73    let mut parts: Vec<String> = Vec::new();
74    let mut current = String::new();
75    let mut chars = path.chars().peekable();
76
77    while let Some(c) = chars.next() {
78        match c {
79            '.' => {
80                if !current.is_empty() {
81                    parts.push(current.clone());
82                    current.clear();
83                }
84            }
85            '[' => {
86                if !current.is_empty() {
87                    parts.push(current.clone());
88                    current.clear();
89                }
90                // Read until closing bracket
91                while let Some(inner) = chars.next() {
92                    if inner == ']' {
93                        break;
94                    }
95                    current.push(inner);
96                }
97                if !current.is_empty() {
98                    parts.push(format!("[{}]", current));
99                    current.clear();
100                }
101            }
102            _ => {
103                current.push(c);
104            }
105        }
106    }
107    if !current.is_empty() {
108        parts.push(current);
109    }
110
111    let mut result = value.clone();
112    for part in &parts {
113        if part.starts_with('[') && part.ends_with(']') {
114            // Array index
115            let index_str = &part[1..part.len() - 1];
116            let index: usize = index_str
117                .parse()
118                .map_err(|e| anyhow::anyhow!("Invalid array index: {}", e))?;
119            result = result
120                .get(index)
121                .cloned()
122                .ok_or_else(|| anyhow::anyhow!("Index {} out of bounds", index))?;
123        } else {
124            result = result
125                .get(part)
126                .cloned()
127                .ok_or_else(|| anyhow::anyhow!("Key '{}' not found", part))?;
128        }
129    }
130    Ok(result)
131}
132
133// ============================================================================
134// JSON Stringify Tool
135// ============================================================================
136
137/// JSON 序列化工具
138pub struct JsonStringifyTool;
139
140#[async_trait]
141impl BuiltinTool for JsonStringifyTool {
142    fn name(&self) -> &str {
143        "json_stringify"
144    }
145
146    fn description(&self) -> &str {
147        "Convert a value to JSON string with optional pretty printing."
148    }
149
150    fn parameters_schema(&self) -> serde_json::Value {
151        serde_json::json!({
152            "type": "object",
153            "properties": {
154                "value": {
155                    "description": "Value to stringify (any JSON value)"
156                },
157                "pretty": {
158                    "type": "boolean",
159                    "description": "Pretty print with indentation (default: false)"
160                }
161            },
162            "required": ["value"]
163        })
164    }
165
166    fn category(&self) -> ToolCategory {
167        ToolCategory::DataProcessing
168    }
169
170    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
171        let value = args
172            .get("value")
173            .cloned()
174            .unwrap_or(serde_json::json!(null));
175        let pretty = args["pretty"].as_bool().unwrap_or(false);
176
177        if pretty {
178            Ok(serde_json::to_string_pretty(&value)
179                .map_err(|e| anyhow::anyhow!("Failed to stringify: {}", e))?)
180        } else {
181            Ok(serde_json::to_string(&value)
182                .map_err(|e| anyhow::anyhow!("Failed to stringify: {}", e))?)
183        }
184    }
185}
186
187// ============================================================================
188// YAML Parse Tool
189// ============================================================================
190
191/// YAML 解析工具
192pub struct YamlParseTool;
193
194#[async_trait]
195impl BuiltinTool for YamlParseTool {
196    fn name(&self) -> &str {
197        "yaml_parse"
198    }
199
200    fn description(&self) -> &str {
201        "Parse YAML string and convert to JSON format."
202    }
203
204    fn parameters_schema(&self) -> serde_json::Value {
205        serde_json::json!({
206            "type": "object",
207            "properties": {
208                "yaml": {
209                    "type": "string",
210                    "description": "YAML string to parse"
211                }
212            },
213            "required": ["yaml"]
214        })
215    }
216
217    fn category(&self) -> ToolCategory {
218        ToolCategory::DataProcessing
219    }
220
221    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
222        let yaml_str = args["yaml"]
223            .as_str()
224            .ok_or_else(|| anyhow::anyhow!("Missing yaml parameter"))?;
225
226        let value: serde_yaml::Value = serde_yaml::from_str(yaml_str)
227            .map_err(|e| anyhow::anyhow!("Failed to parse YAML: {}", e))?;
228
229        // Convert to JSON for consistent output
230        let json_value = serde_json::to_value(&value)
231            .map_err(|e| anyhow::anyhow!("Failed to convert to JSON: {}", e))?;
232
233        Ok(serde_json::to_string_pretty(&json_value)
234            .map_err(|e| anyhow::anyhow!("Failed to format: {}", e))?)
235    }
236}
237
238// ============================================================================
239// YAML Stringify Tool
240// ============================================================================
241
242/// YAML 序列化工具
243pub struct YamlStringifyTool;
244
245#[async_trait]
246impl BuiltinTool for YamlStringifyTool {
247    fn name(&self) -> &str {
248        "yaml_stringify"
249    }
250
251    fn description(&self) -> &str {
252        "Convert JSON value to YAML format."
253    }
254
255    fn parameters_schema(&self) -> serde_json::Value {
256        serde_json::json!({
257            "type": "object",
258            "properties": {
259                "value": {
260                    "description": "Value to convert (any JSON value)"
261                }
262            },
263            "required": ["value"]
264        })
265    }
266
267    fn category(&self) -> ToolCategory {
268        ToolCategory::DataProcessing
269    }
270
271    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
272        let value = args
273            .get("value")
274            .cloned()
275            .unwrap_or(serde_json::json!(null));
276
277        let yaml_str = serde_yaml::to_string(&value)
278            .map_err(|e| anyhow::anyhow!("Failed to convert to YAML: {}", e))?;
279
280        Ok(yaml_str)
281    }
282}
283
284// ============================================================================
285// TOML Parse Tool
286// ============================================================================
287
288/// TOML 解析工具
289pub struct TomlParseTool;
290
291#[async_trait]
292impl BuiltinTool for TomlParseTool {
293    fn name(&self) -> &str {
294        "toml_parse"
295    }
296
297    fn description(&self) -> &str {
298        "Parse TOML string and convert to JSON format."
299    }
300
301    fn parameters_schema(&self) -> serde_json::Value {
302        serde_json::json!({
303            "type": "object",
304            "properties": {
305                "toml": {
306                    "type": "string",
307                    "description": "TOML string to parse"
308                }
309            },
310            "required": ["toml"]
311        })
312    }
313
314    fn category(&self) -> ToolCategory {
315        ToolCategory::DataProcessing
316    }
317
318    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
319        let toml_str = args["toml"]
320            .as_str()
321            .ok_or_else(|| anyhow::anyhow!("Missing toml parameter"))?;
322
323        let value: toml::Value = toml_str
324            .parse()
325            .map_err(|e| anyhow::anyhow!("Failed to parse TOML: {}", e))?;
326
327        // Convert to JSON for consistent output
328        let json_str = serde_json::to_string_pretty(&value)
329            .map_err(|e| anyhow::anyhow!("Failed to convert to JSON: {}", e))?;
330
331        Ok(json_str)
332    }
333}
334
335// ============================================================================
336// CSV Parse Tool
337// ============================================================================
338
339/// CSV 解析工具
340pub struct CsvParseTool;
341
342#[async_trait]
343impl BuiltinTool for CsvParseTool {
344    fn name(&self) -> &str {
345        "csv_parse"
346    }
347
348    fn description(&self) -> &str {
349        "Parse CSV string and convert to JSON array of objects."
350    }
351
352    fn parameters_schema(&self) -> serde_json::Value {
353        serde_json::json!({
354            "type": "object",
355            "properties": {
356                "csv": {
357                    "type": "string",
358                    "description": "CSV string to parse"
359                },
360                "delimiter": {
361                    "type": "string",
362                    "description": "Column delimiter (default: ',')"
363                },
364                "has_header": {
365                    "type": "boolean",
366                    "description": "First row is header (default: true)"
367                }
368            },
369            "required": ["csv"]
370        })
371    }
372
373    fn category(&self) -> ToolCategory {
374        ToolCategory::DataProcessing
375    }
376
377    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
378        let csv_str = args["csv"]
379            .as_str()
380            .ok_or_else(|| anyhow::anyhow!("Missing csv parameter"))?;
381
382        let delimiter = args["delimiter"]
383            .as_str()
384            .unwrap_or(",")
385            .chars()
386            .next()
387            .unwrap_or(',');
388        let has_header = args["has_header"].as_bool().unwrap_or(true);
389
390        let mut result: Vec<serde_json::Map<String, Value>> = Vec::new();
391        let mut lines = csv_str.lines();
392
393        let headers: Vec<String> = if has_header {
394            lines
395                .next()
396                .ok_or_else(|| anyhow::anyhow!("Empty CSV"))?
397                .split(delimiter)
398                .map(|s| s.trim().to_string())
399                .collect()
400        } else {
401            // Generate column names
402            let first_line = lines
403                .next()
404                .ok_or_else(|| anyhow::anyhow!("Empty CSV"))?
405                .split(delimiter)
406                .count();
407            (0..first_line).map(|i| format!("col_{}", i)).collect()
408        };
409
410        for line in lines {
411            if line.trim().is_empty() {
412                continue;
413            }
414            let values: Vec<&str> = line.split(delimiter).collect();
415            let mut row = serde_json::Map::new();
416            for (i, header) in headers.iter().enumerate() {
417                let value = values.get(i).unwrap_or(&"").trim();
418                // Try to parse as number or keep as string
419                let json_value = if let Ok(n) = value.parse::<i64>() {
420                    Value::Number(n.into())
421                } else if let Ok(n) = value.parse::<f64>() {
422                    Value::Number(serde_json::Number::from_f64(n).unwrap_or_else(|| 0.into()))
423                } else if value == "true" {
424                    Value::Bool(true)
425                } else if value == "false" {
426                    Value::Bool(false)
427                } else {
428                    Value::String(value.to_string())
429                };
430                row.insert(header.clone(), json_value);
431            }
432            result.push(row);
433        }
434
435        Ok(serde_json::to_string_pretty(&result)
436            .map_err(|e| anyhow::anyhow!("Failed to format: {}", e))?)
437    }
438}
439
440// ============================================================================
441// Base64 Encode Tool
442// ============================================================================
443
444/// Base64 编码工具
445pub struct Base64EncodeTool;
446
447#[async_trait]
448impl BuiltinTool for Base64EncodeTool {
449    fn name(&self) -> &str {
450        "base64_encode"
451    }
452
453    fn description(&self) -> &str {
454        "Encode a string to Base64 format."
455    }
456
457    fn parameters_schema(&self) -> serde_json::Value {
458        serde_json::json!({
459            "type": "object",
460            "properties": {
461                "text": {
462                    "type": "string",
463                    "description": "Text to encode"
464                }
465            },
466            "required": ["text"]
467        })
468    }
469
470    fn category(&self) -> ToolCategory {
471        ToolCategory::DataProcessing
472    }
473
474    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
475        let text = args["text"]
476            .as_str()
477            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
478
479        Ok(base64::Engine::encode(
480            &base64::engine::general_purpose::STANDARD,
481            text.as_bytes(),
482        ))
483    }
484}
485
486// ============================================================================
487// Base64 Decode Tool
488// ============================================================================
489
490/// Base64 解码工具
491pub struct Base64DecodeTool;
492
493#[async_trait]
494impl BuiltinTool for Base64DecodeTool {
495    fn name(&self) -> &str {
496        "base64_decode"
497    }
498
499    fn description(&self) -> &str {
500        "Decode a Base64 string."
501    }
502
503    fn parameters_schema(&self) -> serde_json::Value {
504        serde_json::json!({
505            "type": "object",
506            "properties": {
507                "encoded": {
508                    "type": "string",
509                    "description": "Base64 encoded string"
510                }
511            },
512            "required": ["encoded"]
513        })
514    }
515
516    fn category(&self) -> ToolCategory {
517        ToolCategory::DataProcessing
518    }
519
520    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
521        let encoded = args["encoded"]
522            .as_str()
523            .ok_or_else(|| anyhow::anyhow!("Missing encoded parameter"))?;
524
525        let decoded = base64::engine::general_purpose::STANDARD
526            .decode(encoded)
527            .map_err(|e| anyhow::anyhow!("Failed to decode Base64: {}", e))?;
528
529        String::from_utf8(decoded)
530            .map_err(|e| anyhow::anyhow!("Decoded bytes are not valid UTF-8: {}", e))
531    }
532}
533
534// ============================================================================
535// URL Encode Tool
536// ============================================================================
537
538/// URL 编码工具
539pub struct UrlEncodeTool;
540
541#[async_trait]
542impl BuiltinTool for UrlEncodeTool {
543    fn name(&self) -> &str {
544        "url_encode"
545    }
546
547    fn description(&self) -> &str {
548        "Encode a string for use in URLs (percent encoding)."
549    }
550
551    fn parameters_schema(&self) -> serde_json::Value {
552        serde_json::json!({
553            "type": "object",
554            "properties": {
555                "text": {
556                    "type": "string",
557                    "description": "Text to encode"
558                }
559            },
560            "required": ["text"]
561        })
562    }
563
564    fn category(&self) -> ToolCategory {
565        ToolCategory::DataProcessing
566    }
567
568    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
569        let text = args["text"]
570            .as_str()
571            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
572
573        Ok(urlencoding::encode(text).into_owned())
574    }
575}
576
577// ============================================================================
578// URL Decode Tool
579// ============================================================================
580
581/// URL 解码工具
582pub struct UrlDecodeTool;
583
584#[async_trait]
585impl BuiltinTool for UrlDecodeTool {
586    fn name(&self) -> &str {
587        "url_decode"
588    }
589
590    fn description(&self) -> &str {
591        "Decode a URL-encoded string."
592    }
593
594    fn parameters_schema(&self) -> serde_json::Value {
595        serde_json::json!({
596            "type": "object",
597            "properties": {
598                "encoded": {
599                    "type": "string",
600                    "description": "URL-encoded string"
601                }
602            },
603            "required": ["encoded"]
604        })
605    }
606
607    fn category(&self) -> ToolCategory {
608        ToolCategory::DataProcessing
609    }
610
611    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
612        let encoded = args["encoded"]
613            .as_str()
614            .ok_or_else(|| anyhow::anyhow!("Missing encoded parameter"))?;
615
616        Ok(urlencoding::decode(encoded)
617            .map_err(|e| anyhow::anyhow!("Failed to decode URL: {}", e))?
618            .into_owned())
619    }
620}
621
622// ============================================================================
623// Hash Tool
624// ============================================================================
625
626/// 哈希计算工具
627pub struct HashTool;
628
629#[async_trait]
630impl BuiltinTool for HashTool {
631    fn name(&self) -> &str {
632        "hash"
633    }
634
635    fn description(&self) -> &str {
636        "Calculate hash of a string. Supports MD5, SHA256, SHA512."
637    }
638
639    fn parameters_schema(&self) -> serde_json::Value {
640        serde_json::json!({
641            "type": "object",
642            "properties": {
643                "text": {
644                    "type": "string",
645                    "description": "Text to hash"
646                },
647                "algorithm": {
648                    "type": "string",
649                    "enum": ["md5", "sha256", "sha512"],
650                    "description": "Hash algorithm (default: sha256)"
651                }
652            },
653            "required": ["text"]
654        })
655    }
656
657    fn category(&self) -> ToolCategory {
658        ToolCategory::DataProcessing
659    }
660
661    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
662        let text = args["text"]
663            .as_str()
664            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
665
666        let algorithm = args["algorithm"].as_str().unwrap_or("sha256");
667
668        let hash = match algorithm {
669            "md5" => {
670                use md5::Md5;
671                use sha2::Digest;
672                let mut hasher = Md5::new();
673                hasher.update(text.as_bytes());
674                format!("{:x}", hasher.finalize())
675            }
676            "sha256" => {
677                use sha2::{Digest, Sha256};
678                let mut hasher = Sha256::new();
679                hasher.update(text.as_bytes());
680                format!("{:x}", hasher.finalize())
681            }
682            "sha512" => {
683                use sha2::{Digest, Sha512};
684                let mut hasher = Sha512::new();
685                hasher.update(text.as_bytes());
686                format!("{:x}", hasher.finalize())
687            }
688            _ => return Err(anyhow::anyhow!("Unsupported algorithm: {}", algorithm)),
689        };
690
691        Ok(hash)
692    }
693}
694
695// ============================================================================
696// UUID Generate Tool
697// ============================================================================
698
699/// UUID 生成工具
700pub struct UuidGenerateTool;
701
702#[async_trait]
703impl BuiltinTool for UuidGenerateTool {
704    fn name(&self) -> &str {
705        "uuid_generate"
706    }
707
708    fn description(&self) -> &str {
709        "Generate a UUID. Supports v4 (random) and v7 (time-ordered)."
710    }
711
712    fn parameters_schema(&self) -> serde_json::Value {
713        serde_json::json!({
714            "type": "object",
715            "properties": {
716                "version": {
717                    "type": "integer",
718                    "enum": [4, 7],
719                    "description": "UUID version (default: 4)"
720                },
721                "count": {
722                    "type": "integer",
723                    "description": "Number of UUIDs to generate (default: 1)"
724                }
725            }
726        })
727    }
728
729    fn category(&self) -> ToolCategory {
730        ToolCategory::DataProcessing
731    }
732
733    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
734        let version = args["version"].as_u64().unwrap_or(4);
735        let count = args["count"].as_u64().unwrap_or(1).max(1).min(100) as usize;
736
737        let uuids: Vec<String> = (0..count)
738            .map(|_| {
739                match version {
740                    4 => uuid::Uuid::new_v4().to_string(),
741                    7 => {
742                        // v7 uses timestamp + random - fallback to v4 if not available
743                        uuid::Uuid::new_v4().to_string()
744                    }
745                    _ => uuid::Uuid::new_v4().to_string(),
746                }
747            })
748            .collect();
749
750        if count == 1 {
751            Ok(uuids[0].clone())
752        } else {
753            Ok(uuids.join("\n"))
754        }
755    }
756}
757
758// ============================================================================
759// Tests
760// ============================================================================
761
762#[cfg(test)]
763mod tests {
764    use super::*;
765    use serde_json::json;
766
767    #[test]
768    fn test_json_parse() {
769        let tool = JsonParseTool;
770        assert_eq!(tool.category(), ToolCategory::DataProcessing);
771    }
772
773    #[tokio::test]
774    async fn test_json_parse_basic() {
775        let tool = JsonParseTool;
776        let result = tool
777            .execute(json!({"json": r#"{"name": "test", "value": 42}"#}))
778            .await
779            .unwrap();
780        assert!(result.contains("test"));
781        assert!(result.contains("42"));
782    }
783
784    #[tokio::test]
785    async fn test_json_parse_with_query() {
786        let tool = JsonParseTool;
787        let result = tool
788            .execute(json!({
789                "json": r#"{"data": [{"name": "Alice"}, {"name": "Bob"}]}"#,
790                "query": "$.data[0].name"
791            }))
792            .await
793            .unwrap();
794        assert!(result.contains("Alice"));
795    }
796
797    #[tokio::test]
798    async fn test_csv_parse() {
799        let tool = CsvParseTool;
800        let result = tool
801            .execute(json!({
802                "csv": "name,age,active\nAlice,30,true\nBob,25,false"
803            }))
804            .await
805            .unwrap();
806        assert!(result.contains("Alice"));
807        assert!(result.contains("30"));
808    }
809
810    #[tokio::test]
811    async fn test_base64_roundtrip() {
812        let encode_tool = Base64EncodeTool;
813        let encoded = encode_tool
814            .execute(json!({"text": "Hello, World!"}))
815            .await
816            .unwrap();
817
818        let decode_tool = Base64DecodeTool;
819        let decoded = decode_tool
820            .execute(json!({"encoded": encoded}))
821            .await
822            .unwrap();
823
824        assert_eq!(decoded, "Hello, World!");
825    }
826
827    #[tokio::test]
828    async fn test_url_roundtrip() {
829        let encode_tool = UrlEncodeTool;
830        let encoded = encode_tool
831            .execute(json!({"text": "hello world"}))
832            .await
833            .unwrap();
834
835        let decode_tool = UrlDecodeTool;
836        let decoded = decode_tool
837            .execute(json!({"encoded": encoded}))
838            .await
839            .unwrap();
840
841        assert_eq!(decoded, "hello world");
842    }
843
844    #[tokio::test]
845    async fn test_hash_sha256() {
846        let tool = HashTool;
847        let result = tool
848            .execute(json!({"text": "hello", "algorithm": "sha256"}))
849            .await
850            .unwrap();
851        // SHA256 of "hello" is well-known
852        assert_eq!(result.len(), 64); // 256 bits = 64 hex chars
853    }
854
855    #[tokio::test]
856    async fn test_uuid_generate() {
857        let tool = UuidGenerateTool;
858        let result = tool.execute(json!({})).await.unwrap();
859        assert!(uuid::Uuid::parse_str(&result).is_ok());
860    }
861
862    #[tokio::test]
863    async fn test_uuid_generate_multiple() {
864        let tool = UuidGenerateTool;
865        let result = tool.execute(json!({"count": 5})).await.unwrap();
866        let uuids: Vec<&str> = result.lines().collect();
867        assert_eq!(uuids.len(), 5);
868    }
869}