Skip to main content

sh_layer3/builtin_tools/
data_processing.rs

1//! # Data Processing Tools
2//!
3//! 数据处理工具集:JSON/YAML/TOML 解析与处理。
4
5use crate::builtin_tools::BuiltinTool;
6use crate::types::{Layer3Result, ToolCategory};
7use async_trait::async_trait;
8use base64::Engine;
9use serde_json::Value;
10
11// ============================================================================
12// JSON Parse Tool
13// ============================================================================
14
15/// JSON 解析工具
16pub struct JsonParseTool;
17
18#[async_trait]
19impl BuiltinTool for JsonParseTool {
20    fn name(&self) -> &str {
21        "json_parse"
22    }
23
24    fn description(&self) -> &str {
25        "Parse JSON string and return formatted output. Supports querying with JSONPath."
26    }
27
28    fn parameters_schema(&self) -> serde_json::Value {
29        serde_json::json!({
30            "type": "object",
31            "properties": {
32                "json": {
33                    "type": "string",
34                    "description": "JSON string to parse"
35                },
36                "query": {
37                    "type": "string",
38                    "description": "Optional JSONPath query (e.g., '$.data[0].name')"
39                }
40            },
41            "required": ["json"]
42        })
43    }
44
45    fn category(&self) -> ToolCategory {
46        ToolCategory::DataProcessing
47    }
48
49    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
50        let json_str = args["json"]
51            .as_str()
52            .ok_or_else(|| anyhow::anyhow!("Missing json parameter"))?;
53
54        let value: Value = serde_json::from_str(json_str)
55            .map_err(|e| anyhow::anyhow!("Failed to parse JSON: {}", e))?;
56
57        if let Some(query) = args["query"].as_str() {
58            // Simple JSONPath-like query
59            let result = query_json(&value, query)?;
60            Ok(serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string()))
61        } else {
62            Ok(serde_json::to_string_pretty(&value)
63                .map_err(|e| anyhow::anyhow!("Failed to format JSON: {}", e))?)
64        }
65    }
66}
67
68/// Simple JSONPath query implementation
69fn query_json(value: &Value, path: &str) -> Layer3Result<Value> {
70    let path = path.strip_prefix('$').unwrap_or(path);
71
72    // Parse path into parts - handle both `.key` and `[index]` syntax
73    let mut parts: Vec<String> = Vec::new();
74    let mut current = String::new();
75    let mut chars = path.chars().peekable();
76
77    while let Some(c) = chars.next() {
78        match c {
79            '.' => {
80                if !current.is_empty() {
81                    parts.push(current.clone());
82                    current.clear();
83                }
84            }
85            '[' => {
86                if !current.is_empty() {
87                    parts.push(current.clone());
88                    current.clear();
89                }
90                // Read until closing bracket
91                #[allow(clippy::while_let_on_iterator)]
92                while let Some(inner) = chars.next() {
93                    if inner == ']' {
94                        break;
95                    }
96                    current.push(inner);
97                }
98                if !current.is_empty() {
99                    parts.push(format!("[{}]", current));
100                    current.clear();
101                }
102            }
103            _ => {
104                current.push(c);
105            }
106        }
107    }
108    if !current.is_empty() {
109        parts.push(current);
110    }
111
112    let mut result = value.clone();
113    for part in &parts {
114        if part.starts_with('[') && part.ends_with(']') {
115            // Array index
116            let index_str = &part[1..part.len() - 1];
117            let index: usize = index_str
118                .parse()
119                .map_err(|e| anyhow::anyhow!("Invalid array index: {}", e))?;
120            result = result
121                .get(index)
122                .cloned()
123                .ok_or_else(|| anyhow::anyhow!("Index {} out of bounds", index))?;
124        } else {
125            result = result
126                .get(part)
127                .cloned()
128                .ok_or_else(|| anyhow::anyhow!("Key '{}' not found", part))?;
129        }
130    }
131    Ok(result)
132}
133
134// ============================================================================
135// JSON Stringify Tool
136// ============================================================================
137
138/// JSON 序列化工具
139pub struct JsonStringifyTool;
140
141#[async_trait]
142impl BuiltinTool for JsonStringifyTool {
143    fn name(&self) -> &str {
144        "json_stringify"
145    }
146
147    fn description(&self) -> &str {
148        "Convert a value to JSON string with optional pretty printing."
149    }
150
151    fn parameters_schema(&self) -> serde_json::Value {
152        serde_json::json!({
153            "type": "object",
154            "properties": {
155                "value": {
156                    "description": "Value to stringify (any JSON value)"
157                },
158                "pretty": {
159                    "type": "boolean",
160                    "description": "Pretty print with indentation (default: false)"
161                }
162            },
163            "required": ["value"]
164        })
165    }
166
167    fn category(&self) -> ToolCategory {
168        ToolCategory::DataProcessing
169    }
170
171    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
172        let value = args
173            .get("value")
174            .cloned()
175            .unwrap_or(serde_json::json!(null));
176        let pretty = args["pretty"].as_bool().unwrap_or(false);
177
178        if pretty {
179            Ok(serde_json::to_string_pretty(&value)
180                .map_err(|e| anyhow::anyhow!("Failed to stringify: {}", e))?)
181        } else {
182            Ok(serde_json::to_string(&value)
183                .map_err(|e| anyhow::anyhow!("Failed to stringify: {}", e))?)
184        }
185    }
186}
187
188// ============================================================================
189// YAML Parse Tool
190// ============================================================================
191
192/// YAML 解析工具
193pub struct YamlParseTool;
194
195#[async_trait]
196impl BuiltinTool for YamlParseTool {
197    fn name(&self) -> &str {
198        "yaml_parse"
199    }
200
201    fn description(&self) -> &str {
202        "Parse YAML string and convert to JSON format."
203    }
204
205    fn parameters_schema(&self) -> serde_json::Value {
206        serde_json::json!({
207            "type": "object",
208            "properties": {
209                "yaml": {
210                    "type": "string",
211                    "description": "YAML string to parse"
212                }
213            },
214            "required": ["yaml"]
215        })
216    }
217
218    fn category(&self) -> ToolCategory {
219        ToolCategory::DataProcessing
220    }
221
222    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
223        let yaml_str = args["yaml"]
224            .as_str()
225            .ok_or_else(|| anyhow::anyhow!("Missing yaml parameter"))?;
226
227        let value: serde_yaml::Value = serde_yaml::from_str(yaml_str)
228            .map_err(|e| anyhow::anyhow!("Failed to parse YAML: {}", e))?;
229
230        // Convert to JSON for consistent output
231        let json_value = serde_json::to_value(&value)
232            .map_err(|e| anyhow::anyhow!("Failed to convert to JSON: {}", e))?;
233
234        Ok(serde_json::to_string_pretty(&json_value)
235            .map_err(|e| anyhow::anyhow!("Failed to format: {}", e))?)
236    }
237}
238
239// ============================================================================
240// YAML Stringify Tool
241// ============================================================================
242
243/// YAML 序列化工具
244pub struct YamlStringifyTool;
245
246#[async_trait]
247impl BuiltinTool for YamlStringifyTool {
248    fn name(&self) -> &str {
249        "yaml_stringify"
250    }
251
252    fn description(&self) -> &str {
253        "Convert JSON value to YAML format."
254    }
255
256    fn parameters_schema(&self) -> serde_json::Value {
257        serde_json::json!({
258            "type": "object",
259            "properties": {
260                "value": {
261                    "description": "Value to convert (any JSON value)"
262                }
263            },
264            "required": ["value"]
265        })
266    }
267
268    fn category(&self) -> ToolCategory {
269        ToolCategory::DataProcessing
270    }
271
272    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
273        let value = args
274            .get("value")
275            .cloned()
276            .unwrap_or(serde_json::json!(null));
277
278        let yaml_str = serde_yaml::to_string(&value)
279            .map_err(|e| anyhow::anyhow!("Failed to convert to YAML: {}", e))?;
280
281        Ok(yaml_str)
282    }
283}
284
285// ============================================================================
286// TOML Parse Tool
287// ============================================================================
288
289/// TOML 解析工具
290pub struct TomlParseTool;
291
292#[async_trait]
293impl BuiltinTool for TomlParseTool {
294    fn name(&self) -> &str {
295        "toml_parse"
296    }
297
298    fn description(&self) -> &str {
299        "Parse TOML string and convert to JSON format."
300    }
301
302    fn parameters_schema(&self) -> serde_json::Value {
303        serde_json::json!({
304            "type": "object",
305            "properties": {
306                "toml": {
307                    "type": "string",
308                    "description": "TOML string to parse"
309                }
310            },
311            "required": ["toml"]
312        })
313    }
314
315    fn category(&self) -> ToolCategory {
316        ToolCategory::DataProcessing
317    }
318
319    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
320        let toml_str = args["toml"]
321            .as_str()
322            .ok_or_else(|| anyhow::anyhow!("Missing toml parameter"))?;
323
324        let value: toml::Value = toml_str
325            .parse()
326            .map_err(|e| anyhow::anyhow!("Failed to parse TOML: {}", e))?;
327
328        // Convert to JSON for consistent output
329        let json_str = serde_json::to_string_pretty(&value)
330            .map_err(|e| anyhow::anyhow!("Failed to convert to JSON: {}", e))?;
331
332        Ok(json_str)
333    }
334}
335
336// ============================================================================
337// CSV Parse Tool
338// ============================================================================
339
340/// CSV 解析工具
341pub struct CsvParseTool;
342
343#[async_trait]
344impl BuiltinTool for CsvParseTool {
345    fn name(&self) -> &str {
346        "csv_parse"
347    }
348
349    fn description(&self) -> &str {
350        "Parse CSV string and convert to JSON array of objects."
351    }
352
353    fn parameters_schema(&self) -> serde_json::Value {
354        serde_json::json!({
355            "type": "object",
356            "properties": {
357                "csv": {
358                    "type": "string",
359                    "description": "CSV string to parse"
360                },
361                "delimiter": {
362                    "type": "string",
363                    "description": "Column delimiter (default: ',')"
364                },
365                "has_header": {
366                    "type": "boolean",
367                    "description": "First row is header (default: true)"
368                }
369            },
370            "required": ["csv"]
371        })
372    }
373
374    fn category(&self) -> ToolCategory {
375        ToolCategory::DataProcessing
376    }
377
378    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
379        let csv_str = args["csv"]
380            .as_str()
381            .ok_or_else(|| anyhow::anyhow!("Missing csv parameter"))?;
382
383        let delimiter = args["delimiter"]
384            .as_str()
385            .unwrap_or(",")
386            .chars()
387            .next()
388            .unwrap_or(',');
389        let has_header = args["has_header"].as_bool().unwrap_or(true);
390
391        let mut result: Vec<serde_json::Map<String, Value>> = Vec::new();
392        let mut lines = csv_str.lines();
393
394        let headers: Vec<String> = if has_header {
395            lines
396                .next()
397                .ok_or_else(|| anyhow::anyhow!("Empty CSV"))?
398                .split(delimiter)
399                .map(|s| s.trim().to_string())
400                .collect()
401        } else {
402            // Generate column names
403            let first_line = lines
404                .next()
405                .ok_or_else(|| anyhow::anyhow!("Empty CSV"))?
406                .split(delimiter)
407                .count();
408            (0..first_line).map(|i| format!("col_{}", i)).collect()
409        };
410
411        for line in lines {
412            if line.trim().is_empty() {
413                continue;
414            }
415            let values: Vec<&str> = line.split(delimiter).collect();
416            let mut row = serde_json::Map::new();
417            for (i, header) in headers.iter().enumerate() {
418                let value = values.get(i).unwrap_or(&"").trim();
419                // Try to parse as number or keep as string
420                let json_value = if let Ok(n) = value.parse::<i64>() {
421                    Value::Number(n.into())
422                } else if let Ok(n) = value.parse::<f64>() {
423                    Value::Number(serde_json::Number::from_f64(n).unwrap_or_else(|| 0.into()))
424                } else if value == "true" {
425                    Value::Bool(true)
426                } else if value == "false" {
427                    Value::Bool(false)
428                } else {
429                    Value::String(value.to_string())
430                };
431                row.insert(header.clone(), json_value);
432            }
433            result.push(row);
434        }
435
436        Ok(serde_json::to_string_pretty(&result)
437            .map_err(|e| anyhow::anyhow!("Failed to format: {}", e))?)
438    }
439}
440
441// ============================================================================
442// Base64 Encode Tool
443// ============================================================================
444
445/// Base64 编码工具
446pub struct Base64EncodeTool;
447
448#[async_trait]
449impl BuiltinTool for Base64EncodeTool {
450    fn name(&self) -> &str {
451        "base64_encode"
452    }
453
454    fn description(&self) -> &str {
455        "Encode a string to Base64 format."
456    }
457
458    fn parameters_schema(&self) -> serde_json::Value {
459        serde_json::json!({
460            "type": "object",
461            "properties": {
462                "text": {
463                    "type": "string",
464                    "description": "Text to encode"
465                }
466            },
467            "required": ["text"]
468        })
469    }
470
471    fn category(&self) -> ToolCategory {
472        ToolCategory::DataProcessing
473    }
474
475    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
476        let text = args["text"]
477            .as_str()
478            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
479
480        Ok(base64::Engine::encode(
481            &base64::engine::general_purpose::STANDARD,
482            text.as_bytes(),
483        ))
484    }
485}
486
487// ============================================================================
488// Base64 Decode Tool
489// ============================================================================
490
491/// Base64 解码工具
492pub struct Base64DecodeTool;
493
494#[async_trait]
495impl BuiltinTool for Base64DecodeTool {
496    fn name(&self) -> &str {
497        "base64_decode"
498    }
499
500    fn description(&self) -> &str {
501        "Decode a Base64 string."
502    }
503
504    fn parameters_schema(&self) -> serde_json::Value {
505        serde_json::json!({
506            "type": "object",
507            "properties": {
508                "encoded": {
509                    "type": "string",
510                    "description": "Base64 encoded string"
511                }
512            },
513            "required": ["encoded"]
514        })
515    }
516
517    fn category(&self) -> ToolCategory {
518        ToolCategory::DataProcessing
519    }
520
521    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
522        let encoded = args["encoded"]
523            .as_str()
524            .ok_or_else(|| anyhow::anyhow!("Missing encoded parameter"))?;
525
526        let decoded = base64::engine::general_purpose::STANDARD
527            .decode(encoded)
528            .map_err(|e| anyhow::anyhow!("Failed to decode Base64: {}", e))?;
529
530        String::from_utf8(decoded)
531            .map_err(|e| anyhow::anyhow!("Decoded bytes are not valid UTF-8: {}", e))
532    }
533}
534
535// ============================================================================
536// URL Encode Tool
537// ============================================================================
538
539/// URL 编码工具
540pub struct UrlEncodeTool;
541
542#[async_trait]
543impl BuiltinTool for UrlEncodeTool {
544    fn name(&self) -> &str {
545        "url_encode"
546    }
547
548    fn description(&self) -> &str {
549        "Encode a string for use in URLs (percent encoding)."
550    }
551
552    fn parameters_schema(&self) -> serde_json::Value {
553        serde_json::json!({
554            "type": "object",
555            "properties": {
556                "text": {
557                    "type": "string",
558                    "description": "Text to encode"
559                }
560            },
561            "required": ["text"]
562        })
563    }
564
565    fn category(&self) -> ToolCategory {
566        ToolCategory::DataProcessing
567    }
568
569    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
570        let text = args["text"]
571            .as_str()
572            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
573
574        Ok(urlencoding::encode(text).into_owned())
575    }
576}
577
578// ============================================================================
579// URL Decode Tool
580// ============================================================================
581
582/// URL 解码工具
583pub struct UrlDecodeTool;
584
585#[async_trait]
586impl BuiltinTool for UrlDecodeTool {
587    fn name(&self) -> &str {
588        "url_decode"
589    }
590
591    fn description(&self) -> &str {
592        "Decode a URL-encoded string."
593    }
594
595    fn parameters_schema(&self) -> serde_json::Value {
596        serde_json::json!({
597            "type": "object",
598            "properties": {
599                "encoded": {
600                    "type": "string",
601                    "description": "URL-encoded string"
602                }
603            },
604            "required": ["encoded"]
605        })
606    }
607
608    fn category(&self) -> ToolCategory {
609        ToolCategory::DataProcessing
610    }
611
612    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
613        let encoded = args["encoded"]
614            .as_str()
615            .ok_or_else(|| anyhow::anyhow!("Missing encoded parameter"))?;
616
617        Ok(urlencoding::decode(encoded)
618            .map_err(|e| anyhow::anyhow!("Failed to decode URL: {}", e))?
619            .into_owned())
620    }
621}
622
623// ============================================================================
624// Hash Tool
625// ============================================================================
626
627/// 哈希计算工具
628pub struct HashTool;
629
630#[async_trait]
631impl BuiltinTool for HashTool {
632    fn name(&self) -> &str {
633        "hash"
634    }
635
636    fn description(&self) -> &str {
637        "Calculate hash of a string. Supports MD5, SHA256, SHA512."
638    }
639
640    fn parameters_schema(&self) -> serde_json::Value {
641        serde_json::json!({
642            "type": "object",
643            "properties": {
644                "text": {
645                    "type": "string",
646                    "description": "Text to hash"
647                },
648                "algorithm": {
649                    "type": "string",
650                    "enum": ["md5", "sha256", "sha512"],
651                    "description": "Hash algorithm (default: sha256)"
652                }
653            },
654            "required": ["text"]
655        })
656    }
657
658    fn category(&self) -> ToolCategory {
659        ToolCategory::DataProcessing
660    }
661
662    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
663        let text = args["text"]
664            .as_str()
665            .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
666
667        let algorithm = args["algorithm"].as_str().unwrap_or("sha256");
668
669        let hash = match algorithm {
670            "md5" => {
671                use md5::Md5;
672                use sha2::Digest;
673                let mut hasher = Md5::new();
674                hasher.update(text.as_bytes());
675                format!("{:x}", hasher.finalize())
676            }
677            "sha256" => {
678                use sha2::{Digest, Sha256};
679                let mut hasher = Sha256::new();
680                hasher.update(text.as_bytes());
681                format!("{:x}", hasher.finalize())
682            }
683            "sha512" => {
684                use sha2::{Digest, Sha512};
685                let mut hasher = Sha512::new();
686                hasher.update(text.as_bytes());
687                format!("{:x}", hasher.finalize())
688            }
689            _ => return Err(anyhow::anyhow!("Unsupported algorithm: {}", algorithm)),
690        };
691
692        Ok(hash)
693    }
694}
695
696// ============================================================================
697// UUID Generate Tool
698// ============================================================================
699
700/// UUID 生成工具
701pub struct UuidGenerateTool;
702
703#[async_trait]
704impl BuiltinTool for UuidGenerateTool {
705    fn name(&self) -> &str {
706        "uuid_generate"
707    }
708
709    fn description(&self) -> &str {
710        "Generate a UUID. Supports v4 (random) and v7 (time-ordered)."
711    }
712
713    fn parameters_schema(&self) -> serde_json::Value {
714        serde_json::json!({
715            "type": "object",
716            "properties": {
717                "version": {
718                    "type": "integer",
719                    "enum": [4, 7],
720                    "description": "UUID version (default: 4)"
721                },
722                "count": {
723                    "type": "integer",
724                    "description": "Number of UUIDs to generate (default: 1)"
725                }
726            }
727        })
728    }
729
730    fn category(&self) -> ToolCategory {
731        ToolCategory::DataProcessing
732    }
733
734    async fn execute(&self, args: serde_json::Value) -> Layer3Result<String> {
735        let version = args["version"].as_u64().unwrap_or(4);
736        let count = args["count"].as_u64().unwrap_or(1).clamp(1, 100) as usize;
737
738        let uuids: Vec<String> = (0..count)
739            .map(|_| {
740                match version {
741                    4 => uuid::Uuid::new_v4().to_string(),
742                    7 => {
743                        // v7 uses timestamp + random - fallback to v4 if not available
744                        uuid::Uuid::new_v4().to_string()
745                    }
746                    _ => uuid::Uuid::new_v4().to_string(),
747                }
748            })
749            .collect();
750
751        if count == 1 {
752            Ok(uuids[0].clone())
753        } else {
754            Ok(uuids.join("\n"))
755        }
756    }
757}
758
759// ============================================================================
760// Tests
761// ============================================================================
762
763#[cfg(test)]
764mod tests {
765    use super::*;
766    use serde_json::json;
767
768    #[test]
769    fn test_json_parse() {
770        let tool = JsonParseTool;
771        assert_eq!(tool.category(), ToolCategory::DataProcessing);
772    }
773
774    #[tokio::test]
775    async fn test_json_parse_basic() {
776        let tool = JsonParseTool;
777        let result = tool
778            .execute(json!({"json": r#"{"name": "test", "value": 42}"#}))
779            .await
780            .unwrap();
781        assert!(result.contains("test"));
782        assert!(result.contains("42"));
783    }
784
785    #[tokio::test]
786    async fn test_json_parse_with_query() {
787        let tool = JsonParseTool;
788        let result = tool
789            .execute(json!({
790                "json": r#"{"data": [{"name": "Alice"}, {"name": "Bob"}]}"#,
791                "query": "$.data[0].name"
792            }))
793            .await
794            .unwrap();
795        assert!(result.contains("Alice"));
796    }
797
798    #[tokio::test]
799    async fn test_csv_parse() {
800        let tool = CsvParseTool;
801        let result = tool
802            .execute(json!({
803                "csv": "name,age,active\nAlice,30,true\nBob,25,false"
804            }))
805            .await
806            .unwrap();
807        assert!(result.contains("Alice"));
808        assert!(result.contains("30"));
809    }
810
811    #[tokio::test]
812    async fn test_base64_roundtrip() {
813        let encode_tool = Base64EncodeTool;
814        let encoded = encode_tool
815            .execute(json!({"text": "Hello, World!"}))
816            .await
817            .unwrap();
818
819        let decode_tool = Base64DecodeTool;
820        let decoded = decode_tool
821            .execute(json!({"encoded": encoded}))
822            .await
823            .unwrap();
824
825        assert_eq!(decoded, "Hello, World!");
826    }
827
828    #[tokio::test]
829    async fn test_url_roundtrip() {
830        let encode_tool = UrlEncodeTool;
831        let encoded = encode_tool
832            .execute(json!({"text": "hello world"}))
833            .await
834            .unwrap();
835
836        let decode_tool = UrlDecodeTool;
837        let decoded = decode_tool
838            .execute(json!({"encoded": encoded}))
839            .await
840            .unwrap();
841
842        assert_eq!(decoded, "hello world");
843    }
844
845    #[tokio::test]
846    async fn test_hash_sha256() {
847        let tool = HashTool;
848        let result = tool
849            .execute(json!({"text": "hello", "algorithm": "sha256"}))
850            .await
851            .unwrap();
852        // SHA256 of "hello" is well-known
853        assert_eq!(result.len(), 64); // 256 bits = 64 hex chars
854    }
855
856    #[tokio::test]
857    async fn test_uuid_generate() {
858        let tool = UuidGenerateTool;
859        let result = tool.execute(json!({})).await.unwrap();
860        assert!(uuid::Uuid::parse_str(&result).is_ok());
861    }
862
863    #[tokio::test]
864    async fn test_uuid_generate_multiple() {
865        let tool = UuidGenerateTool;
866        let result = tool.execute(json!({"count": 5})).await.unwrap();
867        let uuids: Vec<&str> = result.lines().collect();
868        assert_eq!(uuids.len(), 5);
869    }
870}