Skip to main content

perl_pod/
lib.rs

1//! POD documentation extractor for Perl `.pm` files.
2//!
3//! Parses POD (Plain Old Documentation) sections from Perl source files and
4//! returns structured documentation suitable for hover display in an LSP.
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11use std::collections::HashMap;
12use std::io;
13use std::path::Path;
14
15/// Extracted POD documentation from a Perl module.
16#[derive(Debug, Clone, Default, PartialEq, Eq)]
17pub struct PodDoc {
18    /// Module name and optional one-line description from `=head1 NAME`.
19    pub name: Option<String>,
20    /// Usage example from `=head1 SYNOPSIS`.
21    pub synopsis: Option<String>,
22    /// First paragraph of `=head1 DESCRIPTION`.
23    pub description: Option<String>,
24    /// Method/function docs keyed by name, from `=head2 method_name`.
25    pub methods: HashMap<String, String>,
26}
27
28impl PodDoc {
29    /// Returns `true` if no documentation was extracted.
30    #[must_use]
31    pub fn is_empty(&self) -> bool {
32        self.name.is_none()
33            && self.synopsis.is_none()
34            && self.description.is_none()
35            && self.methods.is_empty()
36    }
37}
38
39/// Read a file and extract its POD documentation.
40///
41/// # Errors
42///
43/// Returns an I/O error if the file cannot be read.
44pub fn extract_pod_from_file(path: &Path) -> io::Result<PodDoc> {
45    let content = std::fs::read_to_string(path)?;
46    Ok(extract_pod(&content))
47}
48
49/// Extract POD documentation from a string of Perl source code.
50#[must_use]
51pub fn extract_pod(source: &str) -> PodDoc {
52    let mut doc = PodDoc::default();
53    let mut current_section: Option<Section> = None;
54    let mut body = String::new();
55    let mut in_pod = false;
56    let mut in_over = false;
57
58    for line in source.lines() {
59        // Detect POD start directives
60        if line.starts_with("=head")
61            || line.starts_with("=pod")
62            || line.starts_with("=over")
63            || line.starts_with("=begin")
64            || line.starts_with("=for")
65            || line.starts_with("=encoding")
66            || line.starts_with("=item")
67        {
68            in_pod = true;
69        }
70
71        if !in_pod {
72            continue;
73        }
74
75        // =cut ends POD
76        if line.starts_with("=cut") {
77            flush_section(&mut doc, &current_section, &body, in_over);
78            current_section = None;
79            body.clear();
80            in_pod = false;
81            in_over = false;
82            continue;
83        }
84
85        // =over / =item / =back for lists
86        if line.starts_with("=over") {
87            in_over = true;
88            body.push('\n');
89            continue;
90        }
91        if line.starts_with("=back") {
92            in_over = false;
93            body.push('\n');
94            continue;
95        }
96        if line.starts_with("=item") {
97            let item_text = line.strip_prefix("=item").map(str::trim).unwrap_or("");
98            if !body.is_empty() {
99                body.push('\n');
100            }
101            body.push_str("- ");
102            body.push_str(&strip_pod_formatting(item_text));
103            body.push('\n');
104            continue;
105        }
106
107        // New head1 section
108        if let Some(heading) = line.strip_prefix("=head1") {
109            flush_section(&mut doc, &current_section, &body, false);
110            body.clear();
111            let heading = heading.trim();
112            current_section = Some(match heading {
113                "NAME" => Section::Name,
114                "SYNOPSIS" => Section::Synopsis,
115                "DESCRIPTION" => Section::Description,
116                _ => Section::Other(()),
117            });
118            continue;
119        }
120
121        // New head2 section — treated as method documentation
122        if let Some(heading) = line.strip_prefix("=head2") {
123            flush_section(&mut doc, &current_section, &body, false);
124            body.clear();
125            let heading = heading.trim().to_string();
126            current_section = Some(Section::Method(heading));
127            continue;
128        }
129
130        // Skip other directives
131        if line.starts_with("=pod")
132            || line.starts_with("=encoding")
133            || line.starts_with("=begin")
134            || line.starts_with("=end")
135            || line.starts_with("=for")
136        {
137            continue;
138        }
139
140        // Accumulate body text
141        if current_section.is_some() && (!body.is_empty() || !line.is_empty()) {
142            if !body.is_empty() {
143                body.push('\n');
144            }
145            body.push_str(line);
146        }
147    }
148
149    // Flush any remaining section (POD can end at EOF without =cut)
150    flush_section(&mut doc, &current_section, &body, in_over);
151
152    doc
153}
154
155#[derive(Debug)]
156enum Section {
157    Name,
158    Synopsis,
159    Description,
160    Method(String),
161    Other(()),
162}
163
164fn flush_section(doc: &mut PodDoc, section: &Option<Section>, body: &str, _in_over: bool) {
165    let section = match section {
166        Some(s) => s,
167        None => return,
168    };
169
170    let trimmed = body.trim();
171    if trimmed.is_empty() {
172        return;
173    }
174
175    let cleaned = strip_pod_formatting(trimmed);
176
177    match section {
178        Section::Name => {
179            doc.name = Some(cleaned);
180        }
181        Section::Synopsis => {
182            doc.synopsis = Some(cleaned);
183        }
184        Section::Description => {
185            // Take only the first paragraph
186            let first_para = first_paragraph(&cleaned);
187            doc.description = Some(first_para);
188        }
189        Section::Method(name) => {
190            doc.methods.insert(name.clone(), cleaned);
191        }
192        Section::Other(_) => {
193            // Ignore other head1 sections for now
194        }
195    }
196}
197
198/// Extract the first paragraph (text before the first blank line).
199fn first_paragraph(text: &str) -> String {
200    let mut result = String::new();
201    for line in text.lines() {
202        if line.trim().is_empty() && !result.is_empty() {
203            break;
204        }
205        if !result.is_empty() {
206            result.push('\n');
207        }
208        result.push_str(line);
209    }
210    result
211}
212
213/// Strip POD inline formatting codes: `B<bold>`, `I<italic>`, `C<code>`, `L<link>`.
214///
215/// Handles simple (non-nested) formatting codes. Nested codes like `B<I<text>>`
216/// are handled by stripping outer codes first.
217fn strip_pod_formatting(text: &str) -> String {
218    let mut result = String::with_capacity(text.len());
219    let chars: Vec<char> = text.chars().collect();
220    let len = chars.len();
221    let mut i = 0;
222
223    while i < len {
224        // Check for formatting code: X<...> where X is a letter
225        if i + 2 < len
226            && chars[i].is_ascii_alphabetic()
227            && chars[i + 1] == '<'
228            && is_pod_format_code(chars[i])
229        {
230            let code_char = chars[i];
231            i += 2; // skip X<
232
233            // Find matching > accounting for nested <>
234            let mut depth = 1;
235            let start = i;
236            while i < len && depth > 0 {
237                if chars[i] == '<' {
238                    depth += 1;
239                } else if chars[i] == '>' {
240                    depth -= 1;
241                }
242                if depth > 0 {
243                    i += 1;
244                }
245            }
246            let inner = &chars[start..i];
247            let inner_str: String = inner.iter().collect();
248
249            // For L<> links, extract display text
250            let display = if code_char == 'L' {
251                extract_link_display(&inner_str)
252            } else {
253                // Recursively strip formatting from inner content
254                strip_pod_formatting(&inner_str)
255            };
256
257            result.push_str(&display);
258            if i < len {
259                i += 1; // skip >
260            }
261        } else {
262            result.push(chars[i]);
263            i += 1;
264        }
265    }
266
267    result
268}
269
270/// Extract display text from a POD L<> link.
271///
272/// Handles common forms:
273/// - `L<Module::Name>` -> `Module::Name`
274/// - `L<text|Module::Name>` -> `text`
275/// - `L<text|Module::Name/section>` -> `text`
276/// - `L<Module::Name/section>` -> `Module::Name`
277fn extract_link_display(link: &str) -> String {
278    // L<text|target> -> show text
279    if let Some(pipe_pos) = link.find('|') {
280        return strip_pod_formatting(&link[..pipe_pos]);
281    }
282    // L<Module/section> -> show Module
283    if let Some(slash_pos) = link.find('/') {
284        return strip_pod_formatting(&link[..slash_pos]);
285    }
286    strip_pod_formatting(link)
287}
288
289fn is_pod_format_code(c: char) -> bool {
290    matches!(c, 'B' | 'I' | 'C' | 'L' | 'F' | 'S' | 'E' | 'X' | 'Z')
291}