1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with
// this file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Some utilities functions for processing texts.
//!
//! These functions were originally written for
//! [Crowbook](https://github.com/lise-henry/crowbook), but have
//! been published on a separate crate and under a less restrictive
//! license (MPL instead of LGPL) so they can be used elsewhere.
//!
//! # Example
//!
//! ```
//! use crowbook_text_processing::{
//!     FrenchFormatter,
//!     escape_html,
//!     escape_tex,
//!     remove_whitespaces,
//!     typographic_quotes,
//!     ellipsis,
//! };
//!
//! let s = " Some  string with  too much   whitespaces & around 1% \
//!          characters that might cause trouble to HTML or LaTeX.";
//! // Remove unnecessary whitespaces (but doesn't trim at is can have meaning)
//! let new_s = remove_whitespaces(s);
//! // Escape forHTML
//! println!("for HTML: {}", escape_html(new_s.clone()));
//! // Escape for LaTeX
//! println!("for LaTeX: {}", escape_tex(new_s));
//!
//! // Replace quotes with typographic quotation marks
//! let s = r#"Some "quoted string" and 'another one'."#;
//! let new_s = typographic_quotes(s);
//! assert_eq!(&new_s, "Some “quoted string” and ‘another one’.");
//!
//! // Replace three consecutive dots with ellipsis character
//! let s = ellipsis("Foo...");
//! assert_eq!(&s, "Foo…");
//!
//! // Format whitespaces according to french typographic rules, using
//! // the appropriate non-breaking spaces where needed
//! let s = " Une chaîne en français ! On voudrait un résultat \
//!          « typographiquement correct ».";
//! let french = FrenchFormatter::new();
//! println!("for text: {}", french.format(s));
//! println!("for LaTeX: {}", escape_tex(french.format_tex(s)));
//! ```
//! # Requirements
//!
//! * `rustc >= 1.6.0`
//!
//! # Semantic versioning
//!
//! While not yet at version `1.0`, this crates tries to follows semantic
//! versioning in the following way:
//!
//! * an increase of `x` in `0.x.y` means breaking changes.
//! * an increase of `y` in `0.x.y` means non-breaking changes.
extern crate regex;
#[macro_use]
extern crate lazy_static;

pub mod escape;
pub mod clean;
pub mod french;

mod common;

pub use escape::{escape_html, escape_tex};
pub use clean::remove_whitespaces;
pub use clean::typographic_quotes;
pub use clean::ellipsis;
pub use french::FrenchFormatter;