python_ast/ast/tree/
module.rs

1use std::{collections::HashMap, default::Default};
2
3use log::info;
4use proc_macro2::TokenStream;
5use pyo3::{Bound, FromPyObject, PyAny, PyResult, prelude::PyAnyMethods};
6use quote::{format_ident, quote};
7use serde::{Deserialize, Serialize};
8
9use crate::{CodeGen, CodeGenContext, Name, Object, PythonOptions, Statement, StatementType, ExprType, SymbolTableScopes};
10
11#[derive(Clone, Debug, Serialize, Deserialize)]
12pub enum Type {
13    Unimplemented,
14}
15
16impl<'a> FromPyObject<'a> for Type {
17    fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult<Self> {
18        info!("Type: {:?}", ob);
19        Ok(Type::Unimplemented)
20    }
21}
22
23/// Represents a module as imported from an ast. See the Module struct for the processed module.
24#[derive(Clone, Debug, Default, FromPyObject, Serialize, Deserialize)]
25pub struct RawModule {
26    pub body: Vec<Statement>,
27    pub type_ignores: Vec<Type>,
28}
29
30/// Represents a module as imported from an ast.
31#[derive(Clone, Debug, Default, Serialize, Deserialize)]
32pub struct Module {
33    pub raw: RawModule,
34    pub name: Option<Name>,
35    pub doc: Option<String>,
36    pub filename: Option<String>,
37    pub attributes: HashMap<Name, String>,
38}
39
40impl<'a> FromPyObject<'a> for Module {
41    fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
42        let raw_module = ob.extract().expect("Failed parsing module.");
43
44        Ok(Self {
45            raw: raw_module,
46            ..Default::default()
47        })
48    }
49}
50
51impl CodeGen for Module {
52    type Context = CodeGenContext;
53    type Options = PythonOptions;
54    type SymbolTable = SymbolTableScopes;
55
56    fn find_symbols(self, symbols: Self::SymbolTable) -> Self::SymbolTable {
57        let mut symbols = symbols;
58        symbols.new_scope();
59        for s in self.raw.body {
60            symbols = s.clone().find_symbols(symbols);
61        }
62        symbols
63    }
64
65    fn to_rust(
66        self,
67        ctx: Self::Context,
68        options: Self::Options,
69        symbols: Self::SymbolTable,
70    ) -> Result<TokenStream, Box<dyn std::error::Error>> {
71        let mut stream = TokenStream::new();
72        
73        // Add module-level documentation if available and not just an expression
74        if let Some(docstring) = self.get_module_docstring() {
75            // Only add module docs if there are multiple statements or if this seems to be a real module docstring
76            if self.raw.body.len() > 1 || self.looks_like_module_docstring() {
77                let doc_lines: Vec<_> = docstring
78                    .lines()
79                    .map(|line| {
80                        if line.trim().is_empty() {
81                            quote! { #![doc = ""] }
82                        } else {
83                            let doc_line = format!("{}", line);
84                            quote! { #![doc = #doc_line] }
85                        }
86                    })
87                    .collect();
88                stream.extend(quote! { #(#doc_lines)* });
89                
90                // Add generated by comment only when we have actual module docs
91                let generated_comment = format!("Generated from Python file: {}", 
92                    self.filename.unwrap_or_else(|| "unknown.py".to_string()));
93                stream.extend(quote! { #![doc = #generated_comment] });
94            }
95        }
96        
97        let stdpython = format_ident!("{}", options.stdpython);
98        if options.with_std_python {
99            stream.extend(quote!(use #stdpython::*;));
100        }
101        
102        for s in self.raw.body {
103            let statement = s
104                .clone()
105                .to_rust(ctx.clone(), options.clone(), symbols.clone())
106                .expect(format!("parsing statement {:?} in module", s).as_str());
107            if statement.to_string() != "" {
108                stream.extend(statement);
109            }
110        }
111        Ok(stream)
112    }
113}
114
115impl Module {
116    fn get_module_docstring(&self) -> Option<String> {
117        if self.raw.body.is_empty() {
118            return None;
119        }
120        
121        // Check if the first statement is a string constant (docstring)
122        let first_stmt = &self.raw.body[0];
123        match &first_stmt.statement {
124            StatementType::Expr(expr) => match &expr.value {
125                ExprType::Constant(c) => {
126                    let raw_string = c.to_string();
127                    Some(self.format_module_docstring(&raw_string))
128                },
129                _ => None,
130            },
131            _ => None,
132        }
133    }
134    
135    fn format_module_docstring(&self, raw: &str) -> String {
136        // Remove surrounding quotes
137        let content = raw.trim_matches('"');
138        
139        // Split into lines and clean up Python-style indentation
140        let lines: Vec<&str> = content.lines().collect();
141        if lines.is_empty() {
142            return String::new();
143        }
144        
145        // For module docstrings, preserve more of the original formatting
146        let mut formatted = Vec::new();
147        
148        for line in lines {
149            let cleaned = line.trim();
150            if !cleaned.is_empty() {
151                formatted.push(cleaned.to_string());
152            } else {
153                formatted.push(String::new());
154            }
155        }
156        
157        formatted.join("\n")
158    }
159    
160    fn looks_like_module_docstring(&self) -> bool {
161        if self.raw.body.is_empty() {
162            return false;
163        }
164        
165        // Check if the first statement looks like a module docstring
166        let first_stmt = &self.raw.body[0];
167        if let StatementType::Expr(expr) = &first_stmt.statement {
168            if let ExprType::Constant(c) = &expr.value {
169                let raw_string = c.to_string();
170                let content = raw_string.trim_matches('"');
171                
172                // Heuristics to detect if this is a module docstring vs just a string expression:
173                // 1. Contains multiple lines
174                // 2. Contains common docstring keywords
175                // 3. Looks like documentation rather than a simple string
176                return content.lines().count() > 1 
177                    || content.to_lowercase().contains("module")
178                    || content.to_lowercase().contains("this ")
179                    || content.len() > 50; // Longer strings are more likely to be docstrings
180            }
181        }
182        false
183    }
184}
185
186impl Object for Module {
187    /// __dir__ is called to list the attributes of the object.
188    fn __dir__(&self) -> Vec<impl AsRef<str>> {
189        // XXX - Make this meaningful.
190        vec![
191            "__class__",
192            "__class_getitem__",
193            "__contains__",
194            "__delattr__",
195            "__delitem__",
196            "__dir__",
197            "__doc__",
198            "__eq__",
199            "__format__",
200            "__ge__",
201            "__getattribute__",
202            "__getitem__",
203            "__getstate__",
204            "__gt__",
205            "__hash__",
206            "__init__",
207            "__init_subclass__",
208            "__ior__",
209            "__iter__",
210            "__le__",
211            "__len__",
212            "__lt__",
213            "__ne__",
214            "__new__",
215            "__or__",
216            "__reduce__",
217            "__reduce_ex__",
218            "__repr__",
219            "__reversed__",
220            "__ror__",
221            "__setattr__",
222            "__setitem__",
223            "__sizeof__",
224            "__str__",
225            "__subclasshook__",
226            "clear",
227            "copy",
228            "fromkeys",
229            "get",
230            "items",
231            "keys",
232            "pop",
233            "popitem",
234            "setdefault",
235            "update",
236            "values",
237        ]
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn can_we_print() {
247        let options = PythonOptions::default();
248        let result = crate::parse(
249            "#test comment
250def foo():
251    print(\"Test print.\")
252",
253            "test_case.py",
254        )
255        .unwrap();
256        info!("Python tree: {:?}", result);
257        //info!("{}", result);
258
259        let code = result.to_rust(
260            CodeGenContext::Module("test_case".to_string()),
261            options,
262            SymbolTableScopes::new(),
263        );
264        info!("module: {:?}", code);
265    }
266
267    #[test]
268    fn can_we_import() {
269        let result = crate::parse("import ast", "ast.py").unwrap();
270        let options = PythonOptions::default();
271        info!("{:?}", result);
272
273        let code = result.to_rust(
274            CodeGenContext::Module("test_case".to_string()),
275            options,
276            SymbolTableScopes::new(),
277        );
278        info!("module: {:?}", code);
279    }
280
281    #[test]
282    fn can_we_import2() {
283        let result = crate::parse("import ast as test", "ast.py").unwrap();
284        let options = PythonOptions::default();
285        info!("{:?}", result);
286
287        let code = result.to_rust(
288            CodeGenContext::Module("test_case".to_string()),
289            options,
290            SymbolTableScopes::new(),
291        );
292        info!("module: {:?}", code);
293    }
294}