1use crate::parser::matching::FieldCaptureSlices;
2use crate::result::ParseResult;
3use fancy_regex::Regex;
4use formatparse_core::count_capturing_groups;
5use formatparse_core::parser::MAX_FIELDS;
6use formatparse_core::{FieldSpec, FieldType};
7use pyo3::exceptions::PyValueError;
8use pyo3::prelude::*;
9use std::collections::HashMap;
10use std::sync::Arc;
11
12pub(crate) struct CompiledFields {
14 pub field_specs: Vec<FieldSpec>,
15 pub field_names: Vec<Option<String>>,
16 pub normalized_names: Vec<Option<String>>,
17 pub custom_type_groups: Vec<usize>,
18 pub has_nested_dict_fields: Vec<bool>,
19 pub nested_parsers: Vec<Option<Arc<FormatParser>>>,
20 pub field_count: usize,
21}
22
23impl CompiledFields {
24 pub fn capture_slices(&self) -> FieldCaptureSlices<'_> {
25 FieldCaptureSlices {
26 field_specs: &self.field_specs,
27 field_names: &self.field_names,
28 normalized_names: &self.normalized_names,
29 custom_type_groups: &self.custom_type_groups,
30 has_nested_dict_fields: &self.has_nested_dict_fields,
31 nested_parsers: &self.nested_parsers,
32 }
33 }
34}
35
36#[pyclass(module = "_formatparse")]
37pub struct FormatParser {
49 #[pyo3(get)]
50 pub pattern: String,
53 pub(crate) regex: Regex,
54 pub(crate) regex_str: String, pub(crate) regex_case_insensitive: Option<Regex>,
56 pub(crate) search_regex: Regex, pub(crate) search_regex_case_insensitive: Option<Regex>, pub(crate) fields: CompiledFields,
59 #[allow(dead_code)]
60 pub(crate) name_mapping: std::collections::HashMap<String, String>, pub(crate) stored_extra_types: Option<HashMap<String, PyObject>>, pub(crate) allows_empty_default_string_match: bool, }
64
65impl FormatParser {
66 pub(crate) fn matches_pattern_cache_request(
70 &self,
71 py: Python<'_>,
72 normalized_pattern: &str,
73 extra_types: &Option<HashMap<String, PyObject>>,
74 ) -> bool {
75 if self.pattern != normalized_pattern {
76 return false;
77 }
78 let requested = crate::extract_extra_types_identity(py, extra_types);
79 let stored = crate::extract_extra_types_identity(py, &self.stored_extra_types);
80 requested == stored
81 }
82
83 pub fn new(pattern: &str) -> PyResult<Self> {
84 Self::new_with_extra_types(pattern, None)
85 }
86
87 pub fn new_with_extra_types(
88 pattern: &str,
89 extra_types: Option<HashMap<String, PyObject>>,
90 ) -> PyResult<Self> {
91 let pattern_owned = crate::pattern_normalize::prepare_compiled_pattern(pattern)?;
92 let custom_patterns = Python::with_gil(|py| -> PyResult<HashMap<String, String>> {
93 let mut patterns = HashMap::new();
94 if let Some(ref extra_types_map) = extra_types {
95 for (name, converter_obj) in extra_types_map {
96 let converter_ref = converter_obj.bind(py);
98 if let Ok(pattern_attr) = converter_ref.getattr("pattern") {
99 if let Ok(pattern_str) = pattern_attr.extract::<String>() {
100 patterns.insert(name.clone(), pattern_str);
101 }
102 }
103 }
104 }
105 Ok(patterns)
106 })?;
107
108 let (
109 regex_str_with_anchors,
110 regex_str,
111 field_specs,
112 field_names,
113 normalized_names,
114 name_mapping,
115 allows_empty_default_string_match,
116 ) = formatparse_core::parser::pattern::parse_pattern(
117 &pattern_owned,
118 &custom_patterns,
119 true,
120 0,
121 )
122 .map_err(crate::parser::pattern::pattern_compile_error_to_py)?;
123
124 let (regex_str_search_anchored, _, _, _, _, _, _) =
127 formatparse_core::parser::pattern::parse_pattern(
128 &pattern_owned,
129 &custom_patterns,
130 false,
131 0,
132 )
133 .map_err(crate::parser::pattern::pattern_compile_error_to_py)?;
134
135 if field_specs.len() > MAX_FIELDS {
137 return Err(PyValueError::new_err(format!(
138 "Pattern contains {} fields, which exceeds the maximum allowed count of {}",
139 field_specs.len(),
140 MAX_FIELDS
141 )));
142 }
143
144 let nested_parsers: Vec<Option<Arc<FormatParser>>> =
145 Python::with_gil(|py| -> PyResult<_> {
146 let mut out = Vec::with_capacity(field_specs.len());
147 for spec in &field_specs {
148 if matches!(spec.field_type, FieldType::Nested) {
149 let sub = spec.nested_subpattern.as_ref().ok_or_else(|| {
150 PyValueError::new_err("internal error: nested field missing subpattern")
151 })?;
152 let cloned_et = extra_types.as_ref().map(|m| {
153 m.iter()
154 .map(|(k, v)| (k.clone(), v.clone_ref(py)))
155 .collect::<HashMap<_, _>>()
156 });
157 out.push(Some(Arc::new(FormatParser::new_with_extra_types(
158 sub, cloned_et,
159 )?)));
160 } else {
161 out.push(None);
162 }
163 }
164 Ok(out)
165 })?;
166 let custom_type_groups = Python::with_gil(|py| -> PyResult<Vec<usize>> {
169 let mut groups = Vec::with_capacity(field_specs.len());
170 let empty_map = std::collections::HashMap::new();
171 let custom_converters = extra_types
172 .as_ref()
173 .map(|et| et as &HashMap<String, PyObject>)
174 .unwrap_or(&empty_map);
175
176 for spec in &field_specs {
177 let pattern_groups = if matches!(spec.field_type, FieldType::Nested) {
178 spec.nested_regex_body
179 .as_ref()
180 .map(|b| count_capturing_groups(b))
181 .unwrap_or(0)
182 } else if !custom_converters.is_empty() {
183 crate::parser::matching::validate_custom_type_pattern(
184 spec,
185 custom_converters,
186 py,
187 )?
188 } else {
189 0
190 };
191 groups.push(pattern_groups);
192 }
193 Ok(groups)
194 })?;
195
196 let has_nested_dict_fields: Vec<bool> = field_names
199 .iter()
200 .map(|name_opt| name_opt.as_ref().map(|n| n.contains('[')).unwrap_or(false))
201 .collect();
202
203 let regex = formatparse_core::build_regex(®ex_str_with_anchors)
205 .map_err(crate::error::core_error_to_py_err)?;
206
207 let regex_search_anchored = formatparse_core::build_regex(®ex_str_search_anchored)
208 .map_err(crate::error::core_error_to_py_err)?;
209
210 let regex_case_insensitive =
212 formatparse_core::build_case_insensitive_regex(®ex_str_with_anchors);
213
214 let search_regex =
216 formatparse_core::build_search_regex(regex_search_anchored.as_str(), true)
217 .map_err(crate::error::core_error_to_py_err)?;
218 let search_regex_case_insensitive =
219 formatparse_core::build_search_regex(regex_search_anchored.as_str(), false).ok();
220
221 let field_count = field_specs.len();
222 Ok(Self {
223 pattern: pattern_owned,
224 regex,
225 regex_str,
226 regex_case_insensitive,
227 search_regex,
228 search_regex_case_insensitive,
229 fields: CompiledFields {
230 field_specs,
231 field_names,
232 normalized_names,
233 custom_type_groups,
234 has_nested_dict_fields,
235 nested_parsers,
236 field_count,
237 },
238 name_mapping,
239 stored_extra_types: extra_types,
240 allows_empty_default_string_match,
241 })
242 }
243
244 pub fn search_pattern(
245 &self,
246 string: &str,
247 case_sensitive: bool,
248 extra_types: Option<HashMap<String, PyObject>>,
249 evaluate_result: bool,
250 ) -> PyResult<Option<PyObject>> {
251 let search_regex = if case_sensitive {
253 &self.search_regex
254 } else {
255 self.search_regex_case_insensitive
256 .as_ref()
257 .unwrap_or(&self.search_regex)
258 };
259
260 Python::with_gil(|py| {
261 if search_regex
262 .captures(string)
263 .map_err(crate::error::fancy_regex_match_error)?
264 .is_some()
265 {
266 let extra_types_ref = if let Some(ref et) = extra_types {
267 et
268 } else {
269 &HashMap::new()
270 };
271 let f = &self.fields;
272 return crate::parser::matching::match_with_regex(
273 search_regex,
274 &crate::parser::matching::RegexMatchContext {
275 string,
276 pattern: &self.pattern,
277 field_specs: &f.field_specs,
278 field_names: &f.field_names,
279 normalized_names: &f.normalized_names,
280 nested_parsers: &f.nested_parsers,
281 py,
282 custom_converters: extra_types_ref,
283 evaluate_result,
284 },
285 );
286 }
287 Ok(None)
288 })
289 }
290
291 pub(crate) fn parse_internal(
292 &self,
293 string: &str,
294 case_sensitive: bool,
295 extra_types: Option<&HashMap<String, PyObject>>,
296 evaluate_result: bool,
297 ) -> PyResult<Option<PyObject>> {
298 Python::with_gil(|py| {
299 let empty = HashMap::<String, PyObject>::new();
300 let extra_types_ref = extra_types.unwrap_or(&empty);
301
302 let regex = if case_sensitive {
304 &self.regex
305 } else {
306 self.regex_case_insensitive.as_ref().unwrap_or(&self.regex)
307 };
308
309 let f = &self.fields;
310 if string.is_empty()
311 && self.allows_empty_default_string_match
312 && !f.field_specs.is_empty()
313 {
314 if let Some(obj) = crate::parser::matching::match_empty_default_string_parse(
315 &self.pattern,
316 &f.field_specs,
317 &f.field_names,
318 &f.normalized_names,
319 py,
320 extra_types_ref,
321 evaluate_result,
322 )? {
323 return Ok(Some(obj));
324 }
325 }
326
327 crate::parser::matching::match_with_regex(
328 regex,
329 &crate::parser::matching::RegexMatchContext {
330 string,
331 pattern: &self.pattern,
332 field_specs: &f.field_specs,
333 field_names: &f.field_names,
334 normalized_names: &f.normalized_names,
335 nested_parsers: &f.nested_parsers,
336 py,
337 custom_converters: extra_types_ref,
338 evaluate_result,
339 },
340 )
341 })
342 }
343
344 pub(crate) fn get_search_regex(&self, case_sensitive: bool) -> &Regex {
346 if case_sensitive {
347 &self.search_regex
348 } else {
349 self.search_regex_case_insensitive
350 .as_ref()
351 .unwrap_or(&self.search_regex)
352 }
353 }
354
355 pub(crate) fn parse_nested_capture(
357 &self,
358 py: Python<'_>,
359 slice: &str,
360 custom_converters: &HashMap<String, PyObject>,
361 ) -> PyResult<Option<Py<ParseResult>>> {
362 use pyo3::types::PyAnyMethods;
363 let mut merged = HashMap::new();
364 if let Some(ref stored) = self.stored_extra_types {
365 for (k, v) in stored {
366 merged.insert(k.clone(), v.clone_ref(py));
367 }
368 }
369 for (k, v) in custom_converters {
370 merged.insert(k.clone(), v.clone_ref(py));
371 }
372 let opt = self.parse_internal(slice, true, Some(&merged), true)?;
373 let Some(obj) = opt else {
374 return Ok(None);
375 };
376 let bound = obj.bind(py);
377 let pr = bound.downcast::<ParseResult>().map_err(|_| {
378 PyValueError::new_err("internal error: nested parse did not return ParseResult")
379 })?;
380 Ok(Some(pr.clone().unbind()))
381 }
382}
383
384impl Clone for FormatParser {
385 fn clone(&self) -> Self {
386 Python::with_gil(|py| Self {
387 pattern: self.pattern.clone(),
388 regex: self.regex.clone(),
389 regex_str: self.regex_str.clone(),
390 regex_case_insensitive: self.regex_case_insensitive.clone(),
391 search_regex: self.search_regex.clone(),
392 search_regex_case_insensitive: self.search_regex_case_insensitive.clone(),
393 fields: CompiledFields {
394 field_specs: self.fields.field_specs.clone(),
395 field_names: self.fields.field_names.clone(),
396 normalized_names: self.fields.normalized_names.clone(),
397 custom_type_groups: self.fields.custom_type_groups.clone(),
398 has_nested_dict_fields: self.fields.has_nested_dict_fields.clone(),
399 nested_parsers: self.fields.nested_parsers.clone(),
400 field_count: self.fields.field_count,
401 },
402 name_mapping: self.name_mapping.clone(),
403 stored_extra_types: self.stored_extra_types.as_ref().map(|m| {
404 m.iter()
405 .map(|(k, v)| (k.clone(), v.clone_ref(py)))
406 .collect()
407 }),
408 allows_empty_default_string_match: self.allows_empty_default_string_match,
409 })
410 }
411}