syncdoc_migrate/
extract.rs

1use proc_macro2::TokenStream;
2pub(crate) use syncdoc_core::parse::{Attribute, InnerAttribute};
3pub(crate) use unsynn::*;
4
5/// Extracts documentation content from a list of attributes
6///
7/// Returns the concatenated documentation strings if any doc attributes are found,
8/// otherwise returns None.
9pub fn extract_doc_content(attrs: &Option<Many<Attribute>>) -> Option<String> {
10    let attrs = attrs.as_ref()?;
11
12    let mut doc_strings = Vec::new();
13
14    for attr_delimited in &attrs.0 {
15        // Extract the actual Attribute from the Delimited wrapper
16        if let Some(doc_content) = extract_from_single_attr(&attr_delimited.value) {
17            // Strip leading space that Rust adds to doc comments
18            let trimmed = doc_content.strip_prefix(' ').unwrap_or(&doc_content);
19            doc_strings.push(trimmed.to_string());
20        }
21    }
22
23    if doc_strings.is_empty() {
24        None
25    } else {
26        Some(doc_strings.join("\n").trim().to_string())
27    }
28}
29
30/// Extracts documentation content from inner attributes (#![doc = "..."])
31pub fn extract_inner_doc_content(attrs: &Option<Many<InnerAttribute>>) -> Option<String> {
32    let attrs = attrs.as_ref()?;
33
34    let mut doc_strings = Vec::new();
35
36    for attr_delimited in &attrs.0 {
37        if let Some(doc_content) = extract_from_inner_attr(&attr_delimited.value) {
38            // Strip leading space that Rust adds to doc comments
39            let trimmed = doc_content.strip_prefix(' ').unwrap_or(&doc_content);
40            doc_strings.push(trimmed.to_string());
41        }
42    }
43
44    if doc_strings.is_empty() {
45        None
46    } else {
47        Some(doc_strings.join("\n").trim().to_string())
48    }
49}
50
51/// Helper to check if any doc attributes are present
52pub fn has_doc_attrs(attrs: &Option<Many<Attribute>>) -> bool {
53    extract_doc_content(attrs).is_some()
54}
55
56/// Checks if a BracketGroup contains a doc attribute
57/// This properly parses the attribute content instead of string manipulation
58pub fn is_doc_attribute_bracket(bracket: &BracketGroup) -> bool {
59    // Extract the token stream from the bracket group
60    let mut ts = TokenStream::new();
61    unsynn::ToTokens::to_tokens(bracket, &mut ts);
62
63    // Get the content inside the brackets
64    let content = if let Some(proc_macro2::TokenTree::Group(g)) = ts.into_iter().next() {
65        g.stream()
66    } else {
67        return false;
68    };
69
70    // Try to parse as tokens and check first ident
71    let tokens: Vec<proc_macro2::TokenTree> = content.into_iter().collect();
72
73    if let Some(proc_macro2::TokenTree::Ident(ident)) = tokens.first() {
74        let ident_str = ident.to_string();
75        // Only check the identifier itself
76        ident_str == "doc" || ident_str == "cfg_attr"
77    } else {
78        false
79    }
80}
81
82/// Checks if an inner attribute is a doc attribute
83pub fn is_inner_doc_attr(attr: &InnerAttribute) -> bool {
84    is_doc_attribute_bracket(&attr.content)
85}
86
87/// Checks if an outer attribute is a doc attribute
88pub fn is_outer_doc_attr(attr: &Attribute) -> bool {
89    is_doc_attribute_bracket(&attr.content)
90}
91
92/// Extracts doc content from a single inner attribute
93pub(crate) fn extract_from_inner_attr(attr: &InnerAttribute) -> Option<String> {
94    let mut tokens = TokenStream::new();
95    unsynn::ToTokens::to_tokens(attr, &mut tokens);
96
97    let token_str = tokens.to_string();
98
99    // Check if this is a doc attribute (inner attrs start with #![)
100    if !token_str.starts_with("# ! [") {
101        return None;
102    }
103
104    // Look for doc = "..." pattern (reuse same logic)
105    if let Some(doc_start) = token_str.find("doc") {
106        let after_doc = &token_str[doc_start..];
107
108        if let Some(eq_pos) = after_doc.find('=') {
109            let after_eq = &after_doc[eq_pos + 1..].trim_start();
110
111            if let Some(content) = extract_string_literal(after_eq) {
112                return Some(content);
113            }
114        }
115    }
116
117    None
118}
119
120/// Extracts doc content from a single attribute
121pub(crate) fn extract_from_single_attr(attr: &Attribute) -> Option<String> {
122    let mut tokens = TokenStream::new();
123    unsynn::ToTokens::to_tokens(attr, &mut tokens);
124
125    let token_str = tokens.to_string();
126
127    // Check if this is a doc attribute
128    if !token_str.starts_with("# [") {
129        return None;
130    }
131
132    // Look for doc = "..." pattern
133    if let Some(doc_start) = token_str.find("doc") {
134        let after_doc = &token_str[doc_start..];
135
136        // Find the equals sign and opening quote
137        if let Some(eq_pos) = after_doc.find('=') {
138            let after_eq = &after_doc[eq_pos + 1..].trim_start();
139
140            // Extract string content
141            if let Some(content) = extract_string_literal(after_eq) {
142                return Some(content);
143            }
144        }
145    }
146
147    None
148}
149
150/// Extracts a string literal from token text and unescapes it
151pub(crate) fn extract_string_literal(s: &str) -> Option<String> {
152    let s = s.trim();
153
154    // Handle regular string "..."
155    if s.starts_with('"') {
156        if let Some(end_pos) = find_closing_quote(s, 1) {
157            let escaped_content = &s[1..end_pos];
158            return Some(unescape_rust_string(escaped_content));
159        }
160    }
161
162    // Handle raw string r#"..."#
163    if s.starts_with("r#") {
164        if let Some(start) = s.find('"') {
165            if let Some(end) = s[start + 1..].find("\"#") {
166                // Raw strings have no escapes, return as-is
167                return Some(s[start + 1..start + 1 + end].to_string());
168            }
169        }
170    }
171
172    // Handle raw string r"..."
173    if s.starts_with("r\"") {
174        if let Some(end_pos) = find_closing_quote(s, 2) {
175            // Raw strings have no escapes, return as-is
176            return Some(s[2..end_pos].to_string());
177        }
178    }
179
180    None
181}
182
183/// Unescapes Rust string escape sequences
184pub(crate) fn unescape_rust_string(s: &str) -> String {
185    let mut result = String::with_capacity(s.len());
186    let mut chars = s.chars();
187
188    while let Some(ch) = chars.next() {
189        if ch == '\\' {
190            match chars.next() {
191                Some('n') => result.push('\n'),
192                Some('r') => result.push('\r'),
193                Some('t') => result.push('\t'),
194                Some('\\') => result.push('\\'),
195                Some('0') => result.push('\0'),
196                Some('\'') => result.push('\''),
197                Some('"') => result.push('"'),
198                Some('x') => {
199                    // Hex escape: \xNN
200                    let hex: String = chars.by_ref().take(2).collect();
201                    if let Ok(byte) = u8::from_str_radix(&hex, 16) {
202                        result.push(byte as char);
203                    } else {
204                        // Invalid escape, keep as-is
205                        result.push('\\');
206                        result.push('x');
207                        result.push_str(&hex);
208                    }
209                }
210                Some('u') => {
211                    // Unicode escape: \u{NNNN}
212                    if chars.next() == Some('{') {
213                        let hex: String = chars.by_ref().take_while(|&c| c != '}').collect();
214                        if let Ok(code_point) = u32::from_str_radix(&hex, 16) {
215                            if let Some(c) = char::from_u32(code_point) {
216                                result.push(c);
217                            } else {
218                                // Invalid code point
219                                result.push_str("\\u{");
220                                result.push_str(&hex);
221                                result.push('}');
222                            }
223                        } else {
224                            // Invalid hex
225                            result.push_str("\\u{");
226                            result.push_str(&hex);
227                            result.push('}');
228                        }
229                    } else {
230                        // Malformed unicode escape
231                        result.push_str("\\u");
232                    }
233                }
234                Some(other) => {
235                    // Unknown escape, keep the backslash
236                    result.push('\\');
237                    result.push(other);
238                }
239                // Trailing backslash
240                None => result.push('\\'),
241            }
242        } else {
243            result.push(ch);
244        }
245    }
246
247    result
248}
249
250/// Finds the closing quote, accounting for escaped quotes
251pub(crate) fn find_closing_quote(s: &str, start: usize) -> Option<usize> {
252    let chars = s[start..].chars().enumerate();
253    let mut escaped = false;
254
255    for (i, ch) in chars {
256        if escaped {
257            escaped = false;
258            continue;
259        }
260
261        match ch {
262            '\\' => escaped = true,
263            '"' => return Some(start + i),
264            _ => {}
265        }
266    }
267
268    None
269}