Skip to main content

aptos_sdk/codegen/
move_parser.rs

1//! Move source code parser for extracting function signatures and documentation.
2//!
3//! This module parses Move source files to extract:
4//! - Function parameter names
5//! - Documentation comments
6//! - Struct field documentation
7//!
8//! This information is combined with ABI data to generate more readable bindings.
9
10use std::collections::HashMap;
11
12/// Information extracted from a Move function definition.
13#[derive(Debug, Clone, Default)]
14pub struct MoveFunctionInfo {
15    /// The function name.
16    pub name: String,
17    /// Documentation comment (from `///` comments).
18    pub doc: Option<String>,
19    /// Parameter names in order.
20    pub param_names: Vec<String>,
21    /// Type parameter names (e.g., `T`, `CoinType`).
22    pub type_param_names: Vec<String>,
23}
24
25/// Information extracted from a Move struct definition.
26#[derive(Debug, Clone, Default)]
27pub struct MoveStructInfo {
28    /// The struct name.
29    pub name: String,
30    /// Documentation comment.
31    pub doc: Option<String>,
32    /// Field names to documentation.
33    pub field_docs: HashMap<String, String>,
34}
35
36/// Information extracted from a Move module.
37#[derive(Debug, Clone, Default)]
38pub struct MoveModuleInfo {
39    /// Module documentation.
40    pub doc: Option<String>,
41    /// Functions by name.
42    pub functions: HashMap<String, MoveFunctionInfo>,
43    /// Structs by name.
44    pub structs: HashMap<String, MoveStructInfo>,
45}
46
47/// Parses Move source code to extract metadata.
48#[derive(Debug, Clone, Copy, Default)]
49pub struct MoveSourceParser;
50
51/// Maximum Move source file size for parsing (10 MB).
52///
53/// # Security
54///
55/// Prevents excessive memory usage when parsing very large or malicious input.
56const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024;
57
58impl MoveSourceParser {
59    /// Parses Move source code and extracts module information.
60    ///
61    /// # Security
62    ///
63    /// Returns an empty `MoveModuleInfo` if the source exceeds `MAX_SOURCE_SIZE`
64    /// (10 MB) to prevent memory exhaustion from extremely large inputs.
65    pub fn parse(source: &str) -> MoveModuleInfo {
66        if source.len() > MAX_SOURCE_SIZE {
67            return MoveModuleInfo::default();
68        }
69        MoveModuleInfo {
70            doc: Self::extract_leading_doc(source),
71            functions: Self::parse_functions(source),
72            structs: Self::parse_structs(source),
73        }
74    }
75
76    /// Extracts leading documentation comments.
77    fn extract_leading_doc(source: &str) -> Option<String> {
78        let mut doc_lines = Vec::new();
79        let mut in_doc = false;
80
81        for line in source.lines() {
82            let trimmed = line.trim();
83            if trimmed.starts_with("///") {
84                in_doc = true;
85                let doc_content = trimmed.strip_prefix("///").unwrap_or("").trim();
86                doc_lines.push(doc_content.to_string());
87            } else if trimmed.starts_with("module ")
88                || trimmed.starts_with("script ")
89                || (in_doc && !trimmed.is_empty() && !trimmed.starts_with("//"))
90            {
91                break;
92            }
93        }
94
95        if doc_lines.is_empty() {
96            None
97        } else {
98            Some(doc_lines.join("\n"))
99        }
100    }
101
102    /// Parses all function definitions from the source.
103    fn parse_functions(source: &str) -> HashMap<String, MoveFunctionInfo> {
104        let mut functions = HashMap::new();
105        let lines: Vec<&str> = source.lines().collect();
106
107        let mut i = 0;
108        while i < lines.len() {
109            let line = lines[i].trim();
110
111            // Look for function definitions
112            if Self::is_function_start(line) {
113                let (func_info, consumed) = Self::parse_function(&lines, i);
114                if !func_info.name.is_empty() {
115                    functions.insert(func_info.name.clone(), func_info);
116                }
117                i += consumed.max(1);
118            } else {
119                i += 1;
120            }
121        }
122
123        functions
124    }
125
126    /// Checks if a line starts a function definition.
127    fn is_function_start(line: &str) -> bool {
128        let patterns = [
129            "public fun ",
130            "public entry fun ",
131            "public(friend) fun ",
132            "entry fun ",
133            "fun ",
134            "#[view]",
135        ];
136        patterns.iter().any(|p| line.contains(p))
137    }
138
139    /// Parses a single function definition.
140    fn parse_function(lines: &[&str], start: usize) -> (MoveFunctionInfo, usize) {
141        let mut info = MoveFunctionInfo::default();
142        let mut consumed = 0;
143
144        // Look backwards for doc comments, collecting in reverse then reversing
145        let mut doc_lines = Vec::new();
146        let mut j = start;
147        while j > 0 {
148            j -= 1;
149            let prev_line = lines[j].trim();
150            if prev_line.starts_with("///") {
151                let doc_content = prev_line.strip_prefix("///").unwrap_or("").trim();
152                doc_lines.push(doc_content.to_string());
153            } else if prev_line.is_empty() || prev_line.starts_with("#[") {
154                // Skip empty lines and attributes
155            } else {
156                break;
157            }
158        }
159
160        if !doc_lines.is_empty() {
161            doc_lines.reverse();
162            info.doc = Some(doc_lines.join("\n"));
163        }
164
165        // Collect the full function signature (may span multiple lines)
166        let mut signature = String::new();
167        let mut i = start;
168        let mut paren_depth = 0;
169
170        while i < lines.len() {
171            let line = lines[i].trim();
172            consumed += 1;
173
174            signature.push_str(line);
175            signature.push(' ');
176
177            // Track parenthesis depth
178            for c in line.chars() {
179                match c {
180                    '(' => paren_depth += 1,
181                    ')' => paren_depth -= 1,
182                    _ => {}
183                }
184            }
185
186            // Stop when we've closed the parameter list and hit the body
187            if paren_depth == 0 && (line.contains('{') || line.ends_with(';')) {
188                break;
189            }
190
191            i += 1;
192        }
193
194        // Extract function name
195        if let Some(name) = Self::extract_function_name(&signature) {
196            info.name = name;
197        }
198
199        // Extract type parameters
200        info.type_param_names = Self::extract_type_params(&signature);
201
202        // Extract parameter names
203        info.param_names = Self::extract_param_names(&signature);
204
205        (info, consumed)
206    }
207
208    /// Extracts the function name from a signature.
209    fn extract_function_name(signature: &str) -> Option<String> {
210        // Look for "fun name" or "fun name<"
211        let fun_idx = signature.find("fun ")?;
212        let after_fun = &signature[fun_idx + 4..];
213        let after_fun = after_fun.trim_start();
214
215        // Get until the first non-identifier character
216        let name: String = after_fun
217            .chars()
218            .take_while(|c| c.is_alphanumeric() || *c == '_')
219            .collect();
220
221        if name.is_empty() { None } else { Some(name) }
222    }
223
224    /// Extracts type parameter names from a signature.
225    fn extract_type_params(signature: &str) -> Vec<String> {
226        let mut params = Vec::new();
227
228        // Find the type params section after function name
229        if let Some(fun_idx) = signature.find("fun ") {
230            let after_fun = &signature[fun_idx..];
231
232            // Look for <...> before (
233            if let Some(lt_idx) = after_fun.find('<')
234                && let Some(gt_idx) = after_fun.find('>')
235                && lt_idx < gt_idx
236            {
237                let type_params = &after_fun[lt_idx + 1..gt_idx];
238                for param in type_params.split(',') {
239                    let param = param.trim();
240                    // Extract just the name (before any constraints)
241                    let name: String = param
242                        .chars()
243                        .take_while(|c| c.is_alphanumeric() || *c == '_')
244                        .collect();
245                    if !name.is_empty() {
246                        params.push(name);
247                    }
248                }
249            }
250        }
251
252        params
253    }
254
255    /// Extracts parameter names from a function signature.
256    fn extract_param_names(signature: &str) -> Vec<String> {
257        let mut params = Vec::new();
258
259        // Find the parameter section (...) after function name
260        // We need to handle nested generics properly
261        let Some(paren_start) = signature.find('(') else {
262            return params;
263        };
264
265        let after_paren = &signature[paren_start + 1..];
266
267        // Find matching closing paren
268        let mut depth = 1;
269        let mut end_idx = 0;
270        for (i, c) in after_paren.chars().enumerate() {
271            match c {
272                '(' => depth += 1,
273                ')' => {
274                    depth -= 1;
275                    if depth == 0 {
276                        end_idx = i;
277                        break;
278                    }
279                }
280                _ => {}
281            }
282        }
283
284        let params_str = &after_paren[..end_idx];
285
286        // Split by comma, handling nested generics
287        let mut current_param = String::new();
288        let mut angle_depth = 0;
289
290        for c in params_str.chars() {
291            match c {
292                '<' => {
293                    angle_depth += 1;
294                    current_param.push(c);
295                }
296                '>' => {
297                    angle_depth -= 1;
298                    current_param.push(c);
299                }
300                ',' if angle_depth == 0 => {
301                    if let Some(name) = Self::extract_single_param_name(&current_param) {
302                        params.push(name);
303                    }
304                    current_param.clear();
305                }
306                _ => current_param.push(c),
307            }
308        }
309
310        // Don't forget the last parameter
311        if let Some(name) = Self::extract_single_param_name(&current_param) {
312            params.push(name);
313        }
314
315        params
316    }
317
318    /// Extracts the parameter name from a single "name: Type" declaration.
319    fn extract_single_param_name(param: &str) -> Option<String> {
320        let param = param.trim();
321        if param.is_empty() {
322            return None;
323        }
324
325        // Handle "name: Type" format
326        if let Some(colon_idx) = param.find(':') {
327            let name = param[..colon_idx].trim();
328            // Remove any leading & for references
329            let name = name.trim_start_matches('&').trim();
330            if name.is_empty() || name == "_" {
331                None
332            } else {
333                Some(name.to_string())
334            }
335        } else {
336            None
337        }
338    }
339
340    /// Parses all struct definitions from the source.
341    fn parse_structs(source: &str) -> HashMap<String, MoveStructInfo> {
342        let mut structs = HashMap::new();
343        let lines: Vec<&str> = source.lines().collect();
344
345        let mut i = 0;
346        while i < lines.len() {
347            let line = lines[i].trim();
348
349            // Look for struct definitions
350            if line.contains("struct ") && (line.contains(" has ") || line.contains('{')) {
351                let (struct_info, consumed) = Self::parse_struct(&lines, i);
352                if !struct_info.name.is_empty() {
353                    structs.insert(struct_info.name.clone(), struct_info);
354                }
355                i += consumed.max(1);
356            } else {
357                i += 1;
358            }
359        }
360
361        structs
362    }
363
364    /// Parses a single struct definition.
365    fn parse_struct(lines: &[&str], start: usize) -> (MoveStructInfo, usize) {
366        let mut info = MoveStructInfo::default();
367        let mut consumed = 0;
368
369        // Look backwards for doc comments, collecting in reverse then reversing
370        let mut doc_lines = Vec::new();
371        let mut j = start;
372        while j > 0 {
373            j -= 1;
374            let prev_line = lines[j].trim();
375            if prev_line.starts_with("///") {
376                let doc_content = prev_line.strip_prefix("///").unwrap_or("").trim();
377                doc_lines.push(doc_content.to_string());
378            } else if prev_line.is_empty() || prev_line.starts_with("#[") {
379                // Skip empty lines and attributes
380            } else {
381                break;
382            }
383        }
384
385        if !doc_lines.is_empty() {
386            doc_lines.reverse();
387            info.doc = Some(doc_lines.join("\n"));
388        }
389
390        // Extract struct name
391        let line = lines[start].trim();
392        if let Some(struct_idx) = line.find("struct ") {
393            let after_struct = &line[struct_idx + 7..];
394            let name: String = after_struct
395                .chars()
396                .take_while(|c| c.is_alphanumeric() || *c == '_')
397                .collect();
398            info.name = name;
399        }
400
401        // Parse struct body for field documentation
402        let mut i = start;
403        let mut in_struct = false;
404        let mut current_doc: Option<String> = None;
405
406        while i < lines.len() {
407            let line = lines[i].trim();
408            consumed += 1;
409
410            if line.contains('{') {
411                in_struct = true;
412            }
413
414            if in_struct {
415                if line.starts_with("///") {
416                    let doc = line.strip_prefix("///").unwrap_or("").trim();
417                    current_doc = Some(doc.to_string());
418                } else if line.contains(':') && !line.starts_with("//") {
419                    // This is a field
420                    let field_name: String = line
421                        .trim()
422                        .chars()
423                        .take_while(|c| c.is_alphanumeric() || *c == '_')
424                        .collect();
425
426                    if !field_name.is_empty()
427                        && let Some(doc) = current_doc.take()
428                    {
429                        info.field_docs.insert(field_name, doc);
430                    }
431                } else if !line.starts_with("//") && !line.is_empty() {
432                    current_doc = None;
433                }
434
435                if line.contains('}') {
436                    break;
437                }
438            }
439
440            i += 1;
441        }
442
443        (info, consumed)
444    }
445}
446
447/// Merges Move source information with ABI function parameters.
448#[derive(Debug, Clone)]
449pub struct EnrichedFunctionInfo {
450    /// Function name.
451    pub name: String,
452    /// Documentation from Move source.
453    pub doc: Option<String>,
454    /// Parameters with names and types.
455    pub params: Vec<EnrichedParam>,
456    /// Type parameter names.
457    pub type_param_names: Vec<String>,
458}
459
460/// A parameter with both name and type information.
461#[derive(Debug, Clone)]
462pub struct EnrichedParam {
463    /// Parameter name from Move source.
464    pub name: String,
465    /// Parameter type from ABI.
466    pub move_type: String,
467    /// Whether this is a signer parameter.
468    pub is_signer: bool,
469}
470
471impl EnrichedFunctionInfo {
472    /// Creates enriched function info by merging Move source and ABI data.
473    pub fn from_abi_and_source(
474        func_name: &str,
475        abi_params: &[String],
476        abi_type_params_count: usize,
477        source_info: Option<&MoveFunctionInfo>,
478    ) -> Self {
479        let mut info = Self {
480            name: func_name.to_string(),
481            doc: source_info.and_then(|s| s.doc.clone()),
482            params: Vec::new(),
483            type_param_names: Vec::new(),
484        };
485
486        // Get parameter names from source, or generate defaults
487        let source_names = source_info
488            .map(|s| s.param_names.clone())
489            .unwrap_or_default();
490
491        // Get type parameter names
492        if let Some(src) = source_info {
493            info.type_param_names.clone_from(&src.type_param_names);
494        }
495        // Fill in missing type param names
496        while info.type_param_names.len() < abi_type_params_count {
497            info.type_param_names
498                .push(format!("T{}", info.type_param_names.len()));
499        }
500
501        // Create enriched params
502        let mut source_idx = 0;
503        for (i, move_type) in abi_params.iter().enumerate() {
504            let is_signer = move_type == "&signer" || move_type == "signer";
505
506            // Get name from source if available
507            let name = if source_idx < source_names.len() {
508                let name = source_names[source_idx].clone();
509                source_idx += 1;
510                name
511            } else {
512                // Generate a meaningful name based on type
513                Self::generate_param_name(move_type, i)
514            };
515
516            info.params.push(EnrichedParam {
517                name,
518                move_type: move_type.clone(),
519                is_signer,
520            });
521        }
522
523        info
524    }
525
526    /// Generates a parameter name based on its type.
527    fn generate_param_name(move_type: &str, index: usize) -> String {
528        match move_type {
529            "&signer" | "signer" => "account".to_string(),
530            "address" => "addr".to_string(),
531            "u8" | "u16" | "u32" | "u64" | "u128" | "u256" => "amount".to_string(),
532            "bool" => "flag".to_string(),
533            t if t.starts_with("vector<u8>") => "bytes".to_string(),
534            t if t.starts_with("vector<") => "items".to_string(),
535            t if t.contains("::string::String") => "name".to_string(),
536            t if t.contains("::object::Object") => "object".to_string(),
537            t if t.contains("::option::Option") => "maybe_value".to_string(),
538            _ => format!("arg{index}"),
539        }
540    }
541
542    /// Returns non-signer parameters.
543    pub fn non_signer_params(&self) -> Vec<&EnrichedParam> {
544        self.params.iter().filter(|p| !p.is_signer).collect()
545    }
546}
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551
552    const SAMPLE_MOVE_SOURCE: &str = r"
553/// A module for managing tokens.
554///
555/// This module provides functionality for minting and transferring tokens.
556module my_addr::my_token {
557    use std::string::String;
558    use aptos_framework::object::Object;
559
560    /// Represents token information.
561    struct TokenInfo has key {
562        /// The name of the token.
563        name: String,
564        /// The symbol of the token.
565        symbol: String,
566        /// Number of decimal places.
567        decimals: u8,
568    }
569
570    /// Mints new tokens to a recipient.
571    ///
572    /// # Arguments
573    /// * `admin` - The admin account
574    /// * `recipient` - The address to receive tokens
575    /// * `amount` - The amount to mint
576    public entry fun mint(
577        admin: &signer,
578        recipient: address,
579        amount: u64,
580    ) acquires TokenInfo {
581        // implementation
582    }
583
584    /// Transfers tokens between accounts.
585    public entry fun transfer<CoinType>(
586        sender: &signer,
587        to: address,
588        amount: u64,
589    ) {
590        // implementation
591    }
592
593    /// Gets the balance of an account.
594    #[view]
595    public fun balance(owner: address): u64 {
596        0
597    }
598
599    /// Gets the total supply.
600    #[view]
601    public fun total_supply(): u64 {
602        0
603    }
604}
605";
606
607    #[test]
608    fn test_parse_module_doc() {
609        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
610        assert!(info.doc.is_some());
611        assert!(info.doc.unwrap().contains("managing tokens"));
612    }
613
614    #[test]
615    fn test_parse_function_names() {
616        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
617
618        assert!(info.functions.contains_key("mint"));
619        assert!(info.functions.contains_key("transfer"));
620        assert!(info.functions.contains_key("balance"));
621        assert!(info.functions.contains_key("total_supply"));
622    }
623
624    #[test]
625    fn test_parse_function_params() {
626        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
627
628        let mint = info.functions.get("mint").unwrap();
629        assert_eq!(mint.param_names, vec!["admin", "recipient", "amount"]);
630
631        let transfer = info.functions.get("transfer").unwrap();
632        assert_eq!(transfer.param_names, vec!["sender", "to", "amount"]);
633
634        let balance = info.functions.get("balance").unwrap();
635        assert_eq!(balance.param_names, vec!["owner"]);
636    }
637
638    #[test]
639    fn test_parse_type_params() {
640        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
641
642        let transfer = info.functions.get("transfer").unwrap();
643        assert_eq!(transfer.type_param_names, vec!["CoinType"]);
644    }
645
646    #[test]
647    fn test_parse_function_docs() {
648        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
649
650        let mint = info.functions.get("mint").unwrap();
651        assert!(mint.doc.is_some());
652        assert!(mint.doc.as_ref().unwrap().contains("Mints new tokens"));
653    }
654
655    #[test]
656    fn test_parse_struct() {
657        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
658
659        assert!(info.structs.contains_key("TokenInfo"));
660        let token_info = info.structs.get("TokenInfo").unwrap();
661        assert!(token_info.doc.is_some());
662        assert!(
663            token_info
664                .doc
665                .as_ref()
666                .unwrap()
667                .contains("token information")
668        );
669
670        // Field docs
671        assert!(token_info.field_docs.contains_key("name"));
672        assert!(
673            token_info
674                .field_docs
675                .get("name")
676                .unwrap()
677                .contains("name of the token")
678        );
679    }
680
681    #[test]
682    fn test_enriched_function() {
683        let info = MoveSourceParser::parse(SAMPLE_MOVE_SOURCE);
684        let mint_source = info.functions.get("mint");
685
686        let abi_params = vec![
687            "&signer".to_string(),
688            "address".to_string(),
689            "u64".to_string(),
690        ];
691
692        let enriched =
693            EnrichedFunctionInfo::from_abi_and_source("mint", &abi_params, 0, mint_source);
694
695        assert_eq!(enriched.params[0].name, "admin");
696        assert!(enriched.params[0].is_signer);
697        assert_eq!(enriched.params[1].name, "recipient");
698        assert_eq!(enriched.params[2].name, "amount");
699
700        let non_signers = enriched.non_signer_params();
701        assert_eq!(non_signers.len(), 2);
702    }
703
704    #[test]
705    fn test_enriched_function_without_source() {
706        let abi_params = vec![
707            "&signer".to_string(),
708            "address".to_string(),
709            "u64".to_string(),
710        ];
711
712        let enriched = EnrichedFunctionInfo::from_abi_and_source("transfer", &abi_params, 0, None);
713
714        // Should generate reasonable names
715        assert_eq!(enriched.params[0].name, "account");
716        assert_eq!(enriched.params[1].name, "addr");
717        assert_eq!(enriched.params[2].name, "amount");
718    }
719}