typst_count/
lib.rs

1//! A library for counting words and characters in Typst documents.
2//!
3//! This crate provides functionality to compile Typst documents and count the
4//! words and characters in the rendered output. It works by:
5//!
6//! 1. Compiling Typst documents using the Typst compiler
7//! 2. Traversing the compiled document's element tree
8//! 3. Extracting plain text from rendered elements
9//! 4. Counting words (by whitespace) and characters
10//!
11//! # Features
12//!
13//! - Count words and characters from compiled Typst documents
14//! - Handle imported and included files
15//! - Multiple output formats (human-readable, JSON, CSV)
16//! - CI/CD integration with limit checking
17//!
18//! # Examples
19//!
20//! ```no_run
21//! use typst_count::compile_document;
22//! use std::path::Path;
23//!
24//! let path = Path::new("document.typ");
25//! let count = compile_document(path, false).unwrap();
26//! println!("Words: {}, Characters: {}", count.words, count.characters);
27//! ```
28pub mod cli;
29pub mod counter;
30pub mod output;
31pub mod world;
32
33use anyhow::{Context, Result};
34use cli::Cli;
35use counter::Count;
36use std::path::Path;
37use typst::{World, layout::PagedDocument};
38
39/// Compiles a Typst document and counts its words and characters.
40///
41/// This function loads a Typst document, compiles it using the Typst compiler,
42/// and extracts word and character counts from the rendered output.
43///
44/// # Arguments
45///
46/// * `path` - Path to the Typst document file
47/// * `exclude_imports` - If `true`, only counts content from the main file,
48///   excluding imported/included files
49///
50/// # Returns
51///
52/// A `Count` struct containing word and character counts, or an error if
53/// compilation fails.
54///
55/// # Errors
56///
57/// Returns an error if:
58/// - The file cannot be read
59/// - The document fails to compile
60/// - There are syntax errors in the Typst code
61///
62/// # Examples
63///
64/// ```no_run
65/// use typst_count::compile_document;
66/// use std::path::Path;
67///
68/// // Count all content including imports
69/// let count = compile_document(Path::new("document.typ"), false)?;
70///
71/// // Count only the main file
72/// let count = compile_document(Path::new("document.typ"), true)?;
73/// # Ok::<(), anyhow::Error>(())
74/// ```
75pub fn compile_document(path: &Path, exclude_imports: bool) -> Result<Count> {
76    let world = world::SimpleWorld::new(path)
77        .with_context(|| format!("Failed to load {}", path.display()))?;
78    let main_file_id = world.main();
79
80    let result = typst::compile(&world);
81    let document: PagedDocument = result.output.map_err(|errors| {
82        let error_msg = errors
83            .iter()
84            .map(|e| format!("{}", e.message))
85            .collect::<Vec<_>>()
86            .join(", ");
87        anyhow::anyhow!("Failed to compile {}: {}", path.display(), error_msg)
88    })?;
89
90    Ok(counter::count_document(
91        &document.introspector,
92        exclude_imports,
93        main_file_id,
94    ))
95}
96
97/// Processes multiple Typst files and returns their counts.
98///
99/// Compiles each input file specified in the CLI arguments and collects
100/// the word and character counts for each file.
101///
102/// # Arguments
103///
104/// * `args` - Command-line arguments containing input files and options
105///
106/// # Returns
107///
108/// A vector of tuples, each containing a file path (as a string) and its
109/// corresponding `Count`, or an error if any file fails to compile.
110///
111/// # Errors
112///
113/// Returns an error if any of the input files:
114/// - Cannot be read
115/// - Fails to compile
116/// - Contains syntax errors
117///
118/// # Examples
119///
120/// ```no_run
121/// use typst_count::{process_files, cli::Cli};
122/// use clap::Parser;
123///
124/// let args = Cli::parse();
125/// let results = process_files(&args)?;
126///
127/// for (path, count) in results {
128///     println!("{}: {} words", path, count.words);
129/// }
130/// # Ok::<(), anyhow::Error>(())
131/// ```
132pub fn process_files(args: &Cli) -> Result<Vec<(String, Count)>> {
133    args.input
134        .iter()
135        .map(|path| {
136            compile_document(path, args.exclude_imports)
137                .map(|count| (path.display().to_string(), count))
138        })
139        .collect()
140}
141
142/// Checks if word and character counts are within specified limits.
143///
144/// Validates that the total counts meet any minimum or maximum limits
145/// specified in the CLI arguments. This is useful for CI/CD pipelines
146/// to enforce document length requirements.
147///
148/// # Arguments
149///
150/// * `args` - Command-line arguments containing limit specifications
151/// * `total` - The total count to check against limits
152///
153/// # Returns
154///
155/// - `Ok(())` if all limits are satisfied
156/// - `Err(Vec<String>)` containing error messages for each violated limit
157///
158/// # Limit Checks
159///
160/// The following limits are checked if specified:
161/// - `max_words` - Maximum allowed word count
162/// - `min_words` - Minimum required word count
163/// - `max_characters` - Maximum allowed character count
164/// - `min_characters` - Minimum required character count
165///
166/// # Examples
167///
168/// ```no_run
169/// use typst_count::{check_limits, cli::Cli, counter::Count};
170/// use clap::Parser;
171///
172/// let args = Cli::parse();
173/// let total = Count { words: 500, characters: 2500 };
174///
175/// match check_limits(&args, &total) {
176///     Ok(()) => println!("All limits satisfied"),
177///     Err(errors) => {
178///         for error in errors {
179///             eprintln!("Limit violation: {}", error);
180///         }
181///     }
182/// }
183/// ```
184pub fn check_limits(args: &Cli, total: &Count) -> Result<(), Vec<String>> {
185    let mut errors = Vec::new();
186
187    if let Some(max) = args.max_words
188        && total.words > max
189    {
190        errors.push(format!(
191            "Word count exceeds maximum ({} > {})",
192            total.words, max
193        ));
194    }
195
196    if let Some(min) = args.min_words
197        && total.words < min
198    {
199        errors.push(format!(
200            "Word count below minimum ({} < {})",
201            total.words, min
202        ));
203    }
204
205    if let Some(max) = args.max_characters
206        && total.characters > max
207    {
208        errors.push(format!(
209            "Character count exceeds maximum ({} > {})",
210            total.characters, max
211        ));
212    }
213
214    if let Some(min) = args.min_characters
215        && total.characters < min
216    {
217        errors.push(format!(
218            "Character count below minimum ({} < {})",
219            total.characters, min
220        ));
221    }
222
223    if errors.is_empty() {
224        Ok(())
225    } else {
226        Err(errors)
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use crate::cli::{Cli, CountMode, DisplayMode, OutputFormat};
234
235    fn make_test_cli() -> Cli {
236        Cli {
237            input: vec![],
238            format: OutputFormat::Human,
239            mode: CountMode::Both,
240            output: None,
241            display: DisplayMode::Auto,
242            exclude_imports: false,
243            max_words: None,
244            min_words: None,
245            max_characters: None,
246            min_characters: None,
247        }
248    }
249
250    #[test]
251    fn test_check_limits_no_limits() {
252        let args = make_test_cli();
253        let count = Count {
254            words: 100,
255            characters: 500,
256        };
257
258        assert!(check_limits(&args, &count).is_ok());
259    }
260
261    #[test]
262    fn test_check_limits_max_words_ok() {
263        let mut args = make_test_cli();
264        args.max_words = Some(200);
265        let count = Count {
266            words: 100,
267            characters: 500,
268        };
269
270        assert!(check_limits(&args, &count).is_ok());
271    }
272
273    #[test]
274    fn test_check_limits_max_words_exceeded() {
275        let mut args = make_test_cli();
276        args.max_words = Some(50);
277        let count = Count {
278            words: 100,
279            characters: 500,
280        };
281
282        let result = check_limits(&args, &count);
283        assert!(result.is_err());
284        let errors = result.unwrap_err();
285        assert_eq!(errors.len(), 1);
286        assert!(errors[0].contains("exceeds maximum"));
287        assert!(errors[0].contains("100 > 50"));
288    }
289
290    #[test]
291    fn test_check_limits_min_words_ok() {
292        let mut args = make_test_cli();
293        args.min_words = Some(50);
294        let count = Count {
295            words: 100,
296            characters: 500,
297        };
298
299        assert!(check_limits(&args, &count).is_ok());
300    }
301
302    #[test]
303    fn test_check_limits_min_words_below() {
304        let mut args = make_test_cli();
305        args.min_words = Some(200);
306        let count = Count {
307            words: 100,
308            characters: 500,
309        };
310
311        let result = check_limits(&args, &count);
312        assert!(result.is_err());
313        let errors = result.unwrap_err();
314        assert_eq!(errors.len(), 1);
315        assert!(errors[0].contains("below minimum"));
316        assert!(errors[0].contains("100 < 200"));
317    }
318
319    #[test]
320    fn test_check_limits_max_characters_ok() {
321        let mut args = make_test_cli();
322        args.max_characters = Some(1000);
323        let count = Count {
324            words: 100,
325            characters: 500,
326        };
327
328        assert!(check_limits(&args, &count).is_ok());
329    }
330
331    #[test]
332    fn test_check_limits_max_characters_exceeded() {
333        let mut args = make_test_cli();
334        args.max_characters = Some(300);
335        let count = Count {
336            words: 100,
337            characters: 500,
338        };
339
340        let result = check_limits(&args, &count);
341        assert!(result.is_err());
342        let errors = result.unwrap_err();
343        assert_eq!(errors.len(), 1);
344        assert!(errors[0].contains("exceeds maximum"));
345        assert!(errors[0].contains("500 > 300"));
346    }
347
348    #[test]
349    fn test_check_limits_min_characters_ok() {
350        let mut args = make_test_cli();
351        args.min_characters = Some(100);
352        let count = Count {
353            words: 100,
354            characters: 500,
355        };
356
357        assert!(check_limits(&args, &count).is_ok());
358    }
359
360    #[test]
361    fn test_check_limits_min_characters_below() {
362        let mut args = make_test_cli();
363        args.min_characters = Some(1000);
364        let count = Count {
365            words: 100,
366            characters: 500,
367        };
368
369        let result = check_limits(&args, &count);
370        assert!(result.is_err());
371        let errors = result.unwrap_err();
372        assert_eq!(errors.len(), 1);
373        assert!(errors[0].contains("below minimum"));
374        assert!(errors[0].contains("500 < 1000"));
375    }
376
377    #[test]
378    fn test_check_limits_multiple_violations() {
379        let mut args = make_test_cli();
380        args.max_words = Some(50);
381        args.min_words = Some(200);
382        args.max_characters = Some(300);
383        args.min_characters = Some(1000);
384        let count = Count {
385            words: 100,
386            characters: 500,
387        };
388
389        let result = check_limits(&args, &count);
390        assert!(result.is_err());
391        let errors = result.unwrap_err();
392        // Should have 4 violations: max_words exceeded, min_words not met,
393        // max_characters exceeded, min_characters not met
394        assert_eq!(errors.len(), 4);
395    }
396
397    #[test]
398    fn test_check_limits_boundary_values() {
399        let mut args = make_test_cli();
400        args.max_words = Some(100);
401        args.min_words = Some(100);
402        let count = Count {
403            words: 100,
404            characters: 500,
405        };
406
407        // Exactly at the boundary should be OK
408        assert!(check_limits(&args, &count).is_ok());
409    }
410
411    #[test]
412    fn test_check_limits_mixed_ok_and_violations() {
413        let mut args = make_test_cli();
414        args.max_words = Some(200); // OK
415        args.min_words = Some(50); // OK
416        args.max_characters = Some(300); // Violation
417        args.min_characters = Some(100); // OK
418        let count = Count {
419            words: 100,
420            characters: 500,
421        };
422
423        let result = check_limits(&args, &count);
424        assert!(result.is_err());
425        let errors = result.unwrap_err();
426        assert_eq!(errors.len(), 1);
427        assert!(errors[0].contains("Character count exceeds maximum"));
428    }
429}