text_parsing/
lib.rs

1//mod aho_corasick;
2
3mod locality;
4pub use locality::{
5    Snip, Local, Localize,
6};
7
8pub mod source;
9pub use source::{
10    Breaker,
11    Source, IntoSource,
12    StrSource,
13    OptSource,
14    Processed,
15    EmptySource,
16    ParserSource,
17    SourceExt,
18    SourceResult,
19    SourceEvent,
20
21    //Pipe, Filtered, IntoSeparator, Chain,
22    //Shift,
23};
24
25pub mod parser;
26pub use parser::{
27    Parser,
28    ParserExt,
29    PipeParser,
30    PipeParserExt,
31    IntoPipeParser,
32    ParserResult,
33    ParserEvent,
34
35    //Filter, Filtered, TryFilter, TryFiltered, IntoBreaker, TryIntoBreaker, PipeBreaker
36    // Pipe
37};
38
39mod state;
40pub use state::{
41    NextResult, Next,
42    StateMachine, Runtime,
43};
44
45pub mod entities {
46    mod entities;
47    mod parser;
48    mod state;
49
50    pub use parser::{Builder,EntityParser,PipedEntityParser};
51}
52
53pub mod tagger {
54    mod tags;
55    mod state;
56    mod parser;
57
58    pub use parser::{Builder,TagParser};
59    pub use tags::{Tag,TagName,Closing,SpecTag};
60}
61
62pub mod paragraph {
63    mod parser;
64    mod state;
65
66    pub use parser::{Builder,Paragraphs};
67}
68
69
70pub enum Error {
71    EofInTag(Vec<Local<SourceEvent>>),
72    EndBeforeBegin,
73    NoBegin,
74}
75
76impl std::fmt::Debug for Error {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            Error::EndBeforeBegin => f.debug_struct("EndBeforeBegin"),
80            Error::NoBegin => f.debug_struct("NoBegin"),
81            Error::EofInTag(v) => {
82                let mut dbg = f.debug_struct("EofInTag");
83                let mut iter = v.into_iter();
84                if let Some(lse) = iter.next() {
85                    let (local,se) = lse.into_inner();
86                    let first = local;
87                    let mut last = local;
88                    let mut s = String::new();
89                    push_s(se,&mut s);
90                    for lse in iter {
91                        let (local,se) = lse.into_inner();
92                        last = local;
93                        push_s(se,&mut s);
94                    }                    
95                    if let Ok(lc) = Local::from_segment(first,last) {
96                        dbg.field("chars", &lc.chars())
97                            .field("bytes", &lc.bytes());
98                    }
99                    dbg.field("data", &s);                    
100                }
101                dbg
102            }
103        }.finish()
104    }
105}
106
107fn push_s(se: SourceEvent, s: &mut String) {
108    match se {
109        SourceEvent::Char(c) => s.push(c),
110        SourceEvent::Breaker(Breaker::None) => {},
111        SourceEvent::Breaker(_) => s.push(' '),
112    }
113}