from_regex/
lib.rs

1extern crate from_regex_macros;
2pub use from_regex_macros::*;
3pub use lazy_static::lazy_static;
4
5pub use regex::{self, Captures, Regex};
6pub use segmap::{self, SegmentMap};
7pub use std::str::FromStr;
8
9// TODO: String vs &str in capture fields
10// TODO: only need clone for search. And not really even for that
11
12/// Try to construct an instance of this type from a string
13pub trait FromRegex: Sized {
14    /// Try to construct an instance of this type from a string
15    fn from_regex(s: &str) -> Option<Self>;
16
17    /// Search through a string and return all instances of this type matched
18    fn matches(s: &str) -> Vec<Self> {
19        Self::match_locations(s)
20            .into_iter()
21            .map(|(_, v)| v)
22            .collect()
23    }
24
25    /// Search through a string and return all instances of this type matched,
26    /// As well as the ranges at which they occur.
27    fn match_locations(s: &str) -> SegmentMap<usize, Self>;
28}
29
30// TODO: Search trait? to split matches/match_locations out...
31
32// #[cfg(feature = "from_str")]
33// impl<T: FromRegex> std::str::FromStr for T {
34//     type Err = FromRegexError<T::CustomError>;
35//     fn from_str(s: &str) -> Result<Self, Self::Err> {
36//         T::from_regex(s)
37//     }
38// }
39
40pub trait TextMap<V> {
41    fn merge_only_longest<I: IntoIterator<Item = (segmap::Segment<usize>, V)>>(&mut self, other: I);
42}
43
44impl<V: Clone + Eq> TextMap<V> for SegmentMap<usize, V> {
45    fn merge_only_longest<I: IntoIterator<Item = (segmap::Segment<usize>, V)>>(
46        &mut self,
47        other: I,
48    ) {
49        for (range, value) in other {
50            // Check the easy case (insert into an empty space)
51            if let Some(value) = self.insert_if_empty(range, value) {
52                // Check using start and end (if we're keeping whichever is longer,
53                // one of them must be overlapped by a longer segment)
54                let to_remove =
55                    if let (Some(start), Some(end)) = (range.start_value(), range.end_value()) {
56                        let len = end - start;
57
58                        // Check if this range is larger than the overlapping range preceeding it
59                        let before = self.get_range_value(start).map(|(r, _)| r);
60                        let larger_than_before = if let Some(before) = before {
61                            before
62                                .start_value()
63                                .zip(before.end_value())
64                                .map(|(a, b)| len > (b - a))
65                                .unwrap_or_default()
66                        } else {
67                            // If no range before, treat this as larger (i.e. insert it)
68                            true
69                        };
70
71                        // Likewise for the range after
72                        let after = self.get_range_value(end).map(|(r, _)| r);
73                        let larger_than_after = if let Some(after) = after {
74                            after
75                                .start_value()
76                                .zip(after.end_value())
77                                .map(|(a, b)| len > (b - a))
78                                .unwrap_or_default()
79                        } else {
80                            true
81                        };
82
83                        // Return cloned ranges so they aren't borrowed
84                        if larger_than_before && larger_than_after {
85                            Some((before.cloned(), after.cloned()))
86                        } else {
87                            None
88                        }
89                    } else {
90                        None
91                    };
92
93                if let Some((before, after)) = to_remove {
94                    if let Some(before) = before {
95                        self.clear_range(before);
96                    }
97                    if let Some(after) = after {
98                        self.clear_range(after);
99                    }
100
101                    // Add the current range (and overwrite any ranges it fully encompasses)
102                    self.set(range, value);
103                }
104            }
105        }
106    }
107}
108
109#[cfg(test)]
110mod tests;