from_regex/lib.rs
1extern crate from_regex_macros;
2pub use from_regex_macros::*;
3pub use lazy_static::lazy_static;
4
5pub use regex::{self, Captures, Regex};
6pub use segmap::{self, SegmentMap};
7pub use std::str::FromStr;
8
9// TODO: String vs &str in capture fields
10// TODO: only need clone for search. And not really even for that
11
12/// Try to construct an instance of this type from a string
13pub trait FromRegex: Sized {
14 /// Try to construct an instance of this type from a string
15 fn from_regex(s: &str) -> Option<Self>;
16
17 /// Search through a string and return all instances of this type matched
18 fn matches(s: &str) -> Vec<Self> {
19 Self::match_locations(s)
20 .into_iter()
21 .map(|(_, v)| v)
22 .collect()
23 }
24
25 /// Search through a string and return all instances of this type matched,
26 /// As well as the ranges at which they occur.
27 fn match_locations(s: &str) -> SegmentMap<usize, Self>;
28}
29
30// TODO: Search trait? to split matches/match_locations out...
31
32// #[cfg(feature = "from_str")]
33// impl<T: FromRegex> std::str::FromStr for T {
34// type Err = FromRegexError<T::CustomError>;
35// fn from_str(s: &str) -> Result<Self, Self::Err> {
36// T::from_regex(s)
37// }
38// }
39
40pub trait TextMap<V> {
41 fn merge_only_longest<I: IntoIterator<Item = (segmap::Segment<usize>, V)>>(&mut self, other: I);
42}
43
44impl<V: Clone + Eq> TextMap<V> for SegmentMap<usize, V> {
45 fn merge_only_longest<I: IntoIterator<Item = (segmap::Segment<usize>, V)>>(
46 &mut self,
47 other: I,
48 ) {
49 for (range, value) in other {
50 // Check the easy case (insert into an empty space)
51 if let Some(value) = self.insert_if_empty(range, value) {
52 // Check using start and end (if we're keeping whichever is longer,
53 // one of them must be overlapped by a longer segment)
54 let to_remove =
55 if let (Some(start), Some(end)) = (range.start_value(), range.end_value()) {
56 let len = end - start;
57
58 // Check if this range is larger than the overlapping range preceeding it
59 let before = self.get_range_value(start).map(|(r, _)| r);
60 let larger_than_before = if let Some(before) = before {
61 before
62 .start_value()
63 .zip(before.end_value())
64 .map(|(a, b)| len > (b - a))
65 .unwrap_or_default()
66 } else {
67 // If no range before, treat this as larger (i.e. insert it)
68 true
69 };
70
71 // Likewise for the range after
72 let after = self.get_range_value(end).map(|(r, _)| r);
73 let larger_than_after = if let Some(after) = after {
74 after
75 .start_value()
76 .zip(after.end_value())
77 .map(|(a, b)| len > (b - a))
78 .unwrap_or_default()
79 } else {
80 true
81 };
82
83 // Return cloned ranges so they aren't borrowed
84 if larger_than_before && larger_than_after {
85 Some((before.cloned(), after.cloned()))
86 } else {
87 None
88 }
89 } else {
90 None
91 };
92
93 if let Some((before, after)) = to_remove {
94 if let Some(before) = before {
95 self.clear_range(before);
96 }
97 if let Some(after) = after {
98 self.clear_range(after);
99 }
100
101 // Add the current range (and overwrite any ranges it fully encompasses)
102 self.set(range, value);
103 }
104 }
105 }
106 }
107}
108
109#[cfg(test)]
110mod tests;