_formatparse/parser/
findall_iter.rs1use crate::parser::format_parser::FormatParser;
2use crate::parser::matching::{match_with_captures, match_with_captures_raw, CapturedMatchContext};
3use formatparse_core::FieldType;
4use pyo3::prelude::*;
5use pyo3::IntoPyObjectExt;
6use std::collections::HashMap;
7use std::sync::Arc;
8
9#[pyclass(module = "_formatparse", name = "FindallIter")]
17pub struct FindallIter {
18 parser: Arc<FormatParser>,
19 haystack: String,
20 case_sensitive: bool,
21 evaluate_result: bool,
22 fast_path: bool,
23 extra_types: HashMap<String, PyObject>,
24 last_end: usize,
25 search_pos: usize,
26}
27
28impl FindallIter {
29 pub fn new(
30 parser: Arc<FormatParser>,
31 haystack: String,
32 case_sensitive: bool,
33 evaluate_result: bool,
34 extra_types: HashMap<String, PyObject>,
35 ) -> Self {
36 let has_custom_converters = !extra_types.is_empty();
37 let has_nested_dicts = parser.fields.has_nested_dict_fields.iter().any(|&b| b);
38 let has_nested_format_fields = parser
39 .fields
40 .field_specs
41 .iter()
42 .any(|s| matches!(s.field_type, FieldType::Nested));
43 let fast_path = !has_custom_converters
44 && evaluate_result
45 && !has_nested_dicts
46 && !has_nested_format_fields;
47 Self {
48 parser,
49 haystack,
50 case_sensitive,
51 evaluate_result,
52 fast_path,
53 extra_types,
54 last_end: 0,
55 search_pos: 0,
56 }
57 }
58}
59
60#[pymethods]
61impl FindallIter {
62 fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
63 slf
64 }
65
66 fn __next__(mut slf: PyRefMut<'_, Self>, py: Python<'_>) -> PyResult<Option<PyObject>> {
67 if slf.fast_path {
68 loop {
69 if slf.search_pos > slf.haystack.len() {
70 return Ok(None);
71 }
72 let search_regex = slf.parser.get_search_regex(slf.case_sensitive);
73 let Some(caps) = search_regex
74 .captures_from_pos(&slf.haystack, slf.search_pos)
75 .map_err(crate::error::fancy_regex_match_error)?
76 else {
77 return Ok(None);
78 };
79 let Some(m0) = caps.get(0) else {
80 return Err(pyo3::exceptions::PyRuntimeError::new_err(
81 "regex match missing capture group 0",
82 ));
83 };
84 let match_start = m0.start();
85 let match_end = m0.end();
86
87 if match_start < slf.last_end {
88 slf.search_pos = slf.last_end.max(match_start.saturating_add(1));
89 continue;
90 }
91
92 let slices = slf.parser.fields.capture_slices();
93
94 match match_with_captures_raw(&caps, &slf.haystack, match_start, &slices) {
95 Ok(Some(raw_data)) => {
96 slf.last_end = match_end;
97 if match_start == match_end {
98 slf.last_end += 1;
99 }
100 slf.search_pos = slf.last_end;
101 let pr = raw_data.to_parse_result(py)?;
102 return Ok(Some(pr.into_py_any(py)?));
103 }
104 Ok(None) => {
105 slf.search_pos = match_start.saturating_add(1);
106 continue;
107 }
108 Err(_) => {
109 slf.fast_path = false;
110 if slf.last_end == 0 {
111 slf.search_pos = 0;
112 }
113 break;
114 }
115 }
116 }
117 }
118
119 loop {
120 if slf.search_pos > slf.haystack.len() {
121 return Ok(None);
122 }
123 let search_regex = slf.parser.get_search_regex(slf.case_sensitive);
124 let Some(caps) = search_regex
125 .captures_from_pos(&slf.haystack, slf.search_pos)
126 .map_err(crate::error::fancy_regex_match_error)?
127 else {
128 return Ok(None);
129 };
130 let Some(m0) = caps.get(0) else {
131 return Err(pyo3::exceptions::PyRuntimeError::new_err(
132 "regex match missing capture group 0",
133 ));
134 };
135 let match_start = m0.start();
136 let match_end = m0.end();
137
138 if match_start < slf.last_end {
139 slf.search_pos = slf.last_end.max(match_start.saturating_add(1));
140 continue;
141 }
142
143 let ctx = CapturedMatchContext {
144 pattern: &slf.parser.pattern,
145 fields: slf.parser.fields.capture_slices(),
146 py,
147 custom_converters: &slf.extra_types,
148 evaluate_result: slf.evaluate_result,
149 };
150
151 match match_with_captures(&caps, &ctx)? {
152 Some(result) => {
153 slf.last_end = match_end;
154 if match_start == match_end {
155 slf.last_end += 1;
156 }
157 slf.search_pos = slf.last_end;
158 return Ok(Some(result));
159 }
160 None => {
161 slf.search_pos = match_start.saturating_add(1);
162 continue;
163 }
164 }
165 }
166 }
167
168 fn __repr__(&self) -> String {
169 "<FindallIter>".to_string()
170 }
171}