opencv_binding_generator/
string_ext.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::iter;
4
5use once_cell::sync::Lazy;
6use regex::bytes::{CaptureLocations, Regex};
7
8use crate::CppNameStyle;
9
10pub trait StringExt {
11	fn replacen_in_place(&mut self, from: &str, limit: usize, to: &str) -> bool;
12	fn replace_in_place(&mut self, from: &str, to: &str) -> bool;
13	fn replacen_in_place_regex(&mut self, from: &Regex, limit: usize, to: &str) -> bool;
14	fn replace_in_place_regex(&mut self, from: &Regex, to: &str) -> bool;
15	fn replacen_in_place_regex_cb<'a>(
16		&mut self,
17		from: &Regex,
18		limit: usize,
19		replacer: impl FnMut(&str, &CaptureLocations) -> Option<Cow<'a, str>> + 'a,
20	) -> bool;
21	fn replace_in_place_regex_cb<'a>(
22		&mut self,
23		from: &Regex,
24		replacer: impl FnMut(&str, &CaptureLocations) -> Option<Cow<'a, str>> + 'a,
25	) -> bool;
26	fn extend_join(&mut self, it: impl Iterator<Item = impl AsRef<str>>, sep: &str);
27	fn extend_sep(&mut self, sep: &str, s: &str);
28	fn push_indented_lines(&mut self, indent: Indent, val: &str);
29	fn bump_counter(&mut self);
30	fn cleanup_name(&mut self);
31}
32
33impl StringExt for String {
34	fn replacen_in_place(&mut self, from: &str, limit: usize, to: &str) -> bool {
35		if from.is_empty() {
36			return false;
37		}
38		let mut idx = 0;
39		let mut count = 0;
40		while let Some(start_idx) = self[idx..].find(from).map(|i| i + idx) {
41			let end_idx = start_idx + from.len();
42			self.replace_range(start_idx..end_idx, to);
43			idx = start_idx + to.len();
44			count += 1;
45			if count == limit {
46				break;
47			}
48		}
49		count != 0
50	}
51
52	fn replace_in_place(&mut self, from: &str, to: &str) -> bool {
53		self.replacen_in_place(from, 0, to)
54	}
55
56	fn replacen_in_place_regex(&mut self, from: &Regex, limit: usize, to: &str) -> bool {
57		let mut idx = 0;
58		if to.chars().any(|c| c == '$') {
59			enum Elem<'a> {
60				CaptureGroup(usize),
61				Literal(&'a str),
62			}
63
64			#[inline(always)]
65			fn compile_captures(rep: &str) -> Vec<Elem> {
66				let mut out = Vec::with_capacity(10);
67				let mut last_idx = 0;
68				for (idx, _) in rep.match_indices('$') {
69					if let Some((mut next_idx, next_char)) = rep[idx..].char_indices().nth(1) {
70						next_idx += idx;
71						if next_char == '$' {
72							out.push(Elem::Literal(&rep[last_idx..next_idx]));
73							last_idx = next_idx + 1;
74							continue;
75						}
76						if let Some(mut num_end_idx) = rep[next_idx..]
77							.char_indices()
78							.take_while(|(_, c)| c.is_ascii_digit())
79							.map(|(i, _)| i)
80							.last()
81						{
82							num_end_idx += next_idx + 1;
83							out.push(Elem::Literal(&rep[last_idx..idx]));
84							out.push(Elem::CaptureGroup(
85								rep[next_idx..num_end_idx].parse().expect("Can't parse as group number"),
86							));
87							last_idx = num_end_idx;
88						}
89					} else {
90						break;
91					}
92				}
93				out.push(Elem::Literal(&rep[last_idx..]));
94				out
95			}
96
97			let rep = compile_captures(to);
98			self.replacen_in_place_regex_cb(from, limit, |s, caps| {
99				let cap_len = rep.iter().fold(0, |acc, x| {
100					acc + match x {
101						Elem::CaptureGroup(n) => {
102							if let Some((start, end)) = caps.get(*n) {
103								end - start
104							} else {
105								0
106							}
107						}
108						Elem::Literal(s) => s.len(),
109					}
110				});
111				let out = rep.iter().fold(String::with_capacity(cap_len), |out, x| {
112					out + match x {
113						Elem::CaptureGroup(n) => {
114							if let Some((start, end)) = caps.get(*n) {
115								&s[start..end]
116							} else {
117								""
118							}
119						}
120						Elem::Literal(s) => s,
121					}
122				});
123				Some(out.into())
124			})
125		} else {
126			let mut count = 0;
127			while let Some((start_idx, end_idx)) = from.find_at(self.as_bytes(), idx).map(|m| (m.start(), m.end())) {
128				if start_idx == end_idx {
129					return false;
130				}
131				self.replace_range(start_idx..end_idx, to);
132				idx = start_idx + to.len();
133				count += 1;
134				if count == limit {
135					break;
136				}
137			}
138			count != 0
139		}
140	}
141
142	fn replace_in_place_regex(&mut self, from: &Regex, to: &str) -> bool {
143		self.replacen_in_place_regex(from, 0, to)
144	}
145
146	fn replacen_in_place_regex_cb<'a>(
147		&mut self,
148		from: &Regex,
149		limit: usize,
150		mut replacer: impl FnMut(&str, &CaptureLocations) -> Option<Cow<'a, str>> + 'a,
151	) -> bool {
152		let mut idx = 0;
153		let mut caps = from.capture_locations();
154		let mut count = 0;
155		while let Some((start_idx, end_idx)) = from
156			.captures_read_at(&mut caps, self.as_bytes(), idx)
157			.map(|m| (m.start(), m.end()))
158		{
159			if start_idx == end_idx {
160				return false;
161			}
162			if let Some(repl) = replacer(self, &caps) {
163				self.replace_range(start_idx..end_idx, &repl);
164				idx = start_idx + repl.len();
165			} else {
166				idx = end_idx;
167			}
168			count += 1;
169			if count == limit {
170				break;
171			}
172		}
173		count != 0
174	}
175
176	fn replace_in_place_regex_cb<'a>(
177		&mut self,
178		from: &Regex,
179		replacer: impl FnMut(&str, &CaptureLocations) -> Option<Cow<'a, str>> + 'a,
180	) -> bool {
181		self.replacen_in_place_regex_cb(from, 0, replacer)
182	}
183
184	fn extend_join(&mut self, it: impl IntoIterator<Item = impl AsRef<str>>, sep: &str) {
185		let mut it = it.into_iter();
186		let first = it.find(|e| !e.as_ref().is_empty());
187		if let Some(first) = first {
188			let first = first.as_ref();
189			if !first.is_empty() {
190				let needed_cap = it.size_hint().1.unwrap_or(8) * (first.len() + sep.len());
191				if needed_cap > self.capacity() {
192					self.reserve(needed_cap - self.capacity());
193				}
194				self.push_str(first);
195				it.for_each(|part| {
196					let part = part.as_ref();
197					if !part.is_empty() {
198						self.push_str(sep);
199						self.push_str(part.as_ref());
200					}
201				})
202			}
203		}
204	}
205
206	fn extend_sep(&mut self, sep: &str, s: &str) {
207		if !self.is_empty() {
208			self.reserve(s.len() + sep.len());
209			self.push_str(sep);
210		}
211		self.push_str(s);
212	}
213
214	fn push_indented_lines(&mut self, indent: Indent, val: &str) {
215		let mut lines = val.lines_with_nl();
216		if let Some(line) = lines.next() {
217			self.push_str(line);
218		}
219		for line in lines {
220			// there is more than just a newline in the buffer
221			if line.len() > 1 {
222				self.extend(iter::repeat(indent.symbol).take(indent.len));
223			}
224			self.push_str(line);
225		}
226	}
227
228	fn bump_counter(&mut self) {
229		let idx = self
230			.rfind(|c: char| !c.is_ascii_digit())
231			.map_or_else(|| self.len(), |idx| idx + 1);
232		match self[idx..].parse::<u32>() {
233			// parsing an empty string yields an error so that makes sure that [idx - 1] doesn't panic
234			Ok(counter) if self.as_bytes()[idx - 1] == b'_' => self.replace_range(idx.., &(counter + 1).to_string()),
235			_ => self.push_str("_1"),
236		}
237	}
238
239	fn cleanup_name(&mut self) {
240		// todo aho-corasick?
241		self.replace_in_place(" ", "_");
242		self.replace_in_place(">=", "GE");
243		self.replace_in_place("<=", "LE");
244		self.replace_in_place("<", "L");
245		self.replace_in_place(">", "G");
246		self.replace_in_place("(", "_");
247		self.replace_in_place(")", "_");
248		self.replace_in_place("*", "X");
249		self.replace_in_place("&", "R");
250		self.replace_in_place(",", "_");
251		self.replace_in_place("[", "_");
252		self.replace_in_place("]", "_");
253		self.replace_in_place("::", "_");
254		self.replace_in_place("+", "A");
255		self.replace_in_place("-", "S");
256		self.replace_in_place("/", "D");
257		self.replace_in_place("==", "EQ");
258		self.replace_in_place("!=", "NE");
259		self.replace_in_place("|", "OR");
260		self.replace_in_place("^", "XOR");
261		self.replace_in_place("~", "NOTB");
262		self.replace_in_place("=", "ST");
263	}
264}
265
266pub struct LinesWithNl<'s> {
267	string: &'s str,
268	len: usize,
269	idx: usize,
270}
271
272impl<'s> Iterator for LinesWithNl<'s> {
273	type Item = &'s str;
274
275	fn next(&mut self) -> Option<Self::Item> {
276		if self.idx > self.len {
277			None
278		} else {
279			let slice = &self.string[self.idx..];
280			Some(if let Some(new_line_idx) = slice.find('\n') {
281				self.idx += new_line_idx + 1;
282				&slice[..=new_line_idx]
283			} else {
284				self.idx = self.len + 1;
285				slice
286			})
287		}
288	}
289}
290
291#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
292pub struct Indent {
293	pub len: usize,
294	pub symbol: char,
295}
296
297impl Default for Indent {
298	fn default() -> Self {
299		Self { len: 0, symbol: '\t' }
300	}
301}
302
303#[derive(Clone, Copy, Debug)]
304enum Compiled<'s> {
305	IntpLineStart(&'s str),
306	IntpLiteral(&'s str),
307	IntpLineEnd(&'s str),
308	LiteralLine(&'s str),
309	Var(&'s str),
310}
311
312#[derive(Clone, Debug)]
313pub struct CompiledInterpolation<'s> {
314	elems: Vec<Compiled<'s>>,
315}
316
317impl CompiledInterpolation<'_> {
318	#[inline]
319	pub fn interpolate(&self, params: &HashMap<&str, impl AsRef<str>>) -> String {
320		let mut out = String::new();
321		self.interpolate_into(&mut out, params);
322		out
323	}
324
325	pub fn interpolate_into(&self, out: &mut String, params: &HashMap<&str, impl AsRef<str>>) {
326		#[inline(always)]
327		fn remove_trailing_empty_line(out: &mut String) -> bool {
328			let last_line_start = out.rfind('\n').map_or(0, |i| i + 1);
329			if out[last_line_start..].chars().all(char::is_whitespace) {
330				out.drain(last_line_start..);
331				true
332			} else {
333				false
334			}
335		}
336
337		const INVALID_PARAM_NAME: &str = "<parameter not found>";
338
339		let result_len = self.elems.iter().fold(0, |len, elem| {
340			len + match elem {
341				Compiled::IntpLineStart(s) | Compiled::IntpLiteral(s) => s.len(),
342				Compiled::IntpLineEnd(s) | Compiled::LiteralLine(s) => s.len() + 1,
343				Compiled::Var(name) => params
344					.get(name)
345					.map_or_else(|| INVALID_PARAM_NAME.len(), |x| x.as_ref().len()),
346			}
347		});
348		out.reserve(result_len);
349		let mut line_indent = Indent::default();
350		// interpolate vars keeping indent
351		for elem in &self.elems {
352			match elem {
353				Compiled::IntpLineStart(s) => {
354					line_indent = s.detect_indent();
355					out.push_str(s);
356				}
357				Compiled::IntpLiteral(s) => out.push_str(s),
358				Compiled::Var(name) => {
359					out.push_indented_lines(line_indent, params.get(name).map_or(INVALID_PARAM_NAME, |x| x.as_ref()))
360				}
361				Compiled::IntpLineEnd(s) => {
362					out.push_str(s);
363					if !remove_trailing_empty_line(out) {
364						out.push('\n');
365					}
366				}
367				Compiled::LiteralLine(s) => {
368					line_indent = s.detect_indent();
369					out.push_str(s);
370					out.push('\n');
371				}
372			}
373		}
374		if let Some((n, '\n')) = out.char_indices().next_back() {
375			out.drain(n..);
376		}
377	}
378}
379
380pub trait StrExt {
381	fn cpp_name_to_rust_fn_case(&self) -> Cow<str>;
382	fn lines_with_nl(&self) -> LinesWithNl;
383	fn detect_indent(&self) -> Indent;
384	fn compile_interpolation(&self) -> CompiledInterpolation;
385	fn trim_start_idx(&self) -> usize;
386	fn trim_end_idx(&self) -> usize;
387	/// For `cv::rapid::Rapid` returns `Rapid`
388	fn localname(&self) -> &str;
389	/// For `cv::rapid::Rapid` returns `cv::rapid`
390	fn namespace(&self) -> &str;
391	/// For `crate::rapid::Rapid` and `rapid::Rapid` returns `rapid`
392	fn module(&self) -> &str;
393	fn cpp_name_from_fullname(&self, style: CppNameStyle) -> &str;
394	fn capitalize_first_ascii_letter(&self) -> Option<(char, &str)>;
395}
396
397impl StrExt for str {
398	fn cpp_name_to_rust_fn_case(&self) -> Cow<str> {
399		let mut out = String::with_capacity(self.len() + 8);
400		#[derive(Copy, Clone)]
401		enum State {
402			StartOrLastUnderscore,
403			LastLowercase,
404			LastUppercase,
405		}
406		let mut state = State::StartOrLastUnderscore;
407		let mut chars = self.as_bytes().iter().peekable();
408		while let Some(&cur_c) = chars.next() {
409			let (add_c, new_state) = match cur_c {
410				_ if cur_c.is_ascii_uppercase() => {
411					match state {
412						State::StartOrLastUnderscore => {}
413						State::LastLowercase => out.push('_'),
414						State::LastUppercase => {
415							// SVDValue => svd_value
416							if chars.peek().is_some_and(|next_c| next_c.is_ascii_lowercase()) {
417								out.push('_');
418							}
419						}
420					}
421					(cur_c.to_ascii_lowercase(), State::LastUppercase)
422				}
423				b'_' => (b'_', State::StartOrLastUnderscore),
424				_ => (cur_c, State::LastLowercase),
425			};
426			out.push(char::from(add_c));
427			state = new_state;
428		}
429		out.replacen_in_place("pn_p", 1, "pnp");
430		out.replacen_in_place("p3_p", 1, "p3p");
431		out.replacen_in_place("_u_mat", 1, "_umat");
432		out.replacen_in_place("i_d3_d", 1, "id_3d_");
433		out.replacen_in_place("d3_d", 1, "d3d");
434		out.replacen_in_place("2_d", 1, "_2d");
435		out.replacen_in_place("3_d", 1, "_3d");
436		out.replacen_in_place("open_gl", 1, "opengl");
437		out.replacen_in_place("open_cl", 1, "opencl");
438		out.replacen_in_place("open_vx", 1, "openvx");
439		out.replacen_in_place("aruco_3detect", 1, "aruco3_detect");
440		out.into()
441	}
442
443	fn lines_with_nl(&self) -> LinesWithNl {
444		LinesWithNl {
445			string: self,
446			len: self.len(),
447			idx: 0,
448		}
449	}
450
451	fn detect_indent(&self) -> Indent {
452		self
453			.char_indices()
454			.take_while(|&(_, c)| c == ' ' || c == '\t')
455			.last()
456			.map_or_else(Indent::default, |(idx, chr)| Indent {
457				len: idx + 1,
458				symbol: chr,
459			})
460	}
461
462	fn compile_interpolation(&self) -> CompiledInterpolation {
463		static VARS: Lazy<Regex> = Lazy::new(|| Regex::new(r"\{\{\s*([^{}]+?)\s*}}").expect("Can't compile regex"));
464
465		// trim leading newline
466		let tpl = self.strip_prefix('\n').unwrap_or(self);
467
468		// find minimum common indent
469		let mut common_indent_len: Option<usize> = None;
470		for line in tpl.lines_with_nl() {
471			let Indent { len: new_indent, .. } = if let Some(len) = common_indent_len {
472				line[..len.min(line.len())].detect_indent()
473			} else {
474				line.detect_indent()
475			};
476			// only take lines with something else than only whitespace into account
477			if !line[new_indent..].trim_start().is_empty() {
478				common_indent_len = Some(new_indent);
479			}
480		}
481
482		let mut elems = Vec::with_capacity(16);
483		// interpolate vars keeping indent
484		if let Some(common_indent_len) = common_indent_len {
485			for line in tpl.lines() {
486				let line = &line[common_indent_len.min(line.len())..];
487				let mut last_idx = 0;
488				for cap in VARS.captures_iter(line.as_bytes()) {
489					if let (Some(whole), Some(var)) = (cap.get(0), cap.get(1)) {
490						if last_idx == 0 {
491							elems.push(Compiled::IntpLineStart(&line[last_idx..whole.start()]));
492						} else {
493							elems.push(Compiled::IntpLiteral(&line[last_idx..whole.start()]));
494						}
495						last_idx = whole.end();
496						elems.push(Compiled::Var(&line[var.start()..var.end()]));
497					}
498				}
499				if last_idx == 0 {
500					elems.push(Compiled::LiteralLine(&line[last_idx..]));
501				} else {
502					elems.push(Compiled::IntpLineEnd(&line[last_idx..]));
503				}
504			}
505		} else {
506			elems.push(Compiled::LiteralLine(""));
507		}
508
509		CompiledInterpolation { elems }
510	}
511
512	fn trim_start_idx(&self) -> usize {
513		self
514			.char_indices()
515			.find(|(_, c)| !c.is_whitespace())
516			.map_or_else(|| self.len(), |(i, _)| i)
517	}
518
519	fn trim_end_idx(&self) -> usize {
520		self
521			.char_indices()
522			.rfind(|(_, c)| !c.is_whitespace())
523			.map_or(0, |(i, _)| i + 1)
524	}
525
526	fn localname(&self) -> &str {
527		self.rsplit("::").next().unwrap_or(self)
528	}
529
530	fn namespace(&self) -> &str {
531		self.rsplit_once("::").map_or(self, |(left, _right)| left)
532	}
533
534	fn module(&self) -> &str {
535		self
536			.strip_prefix("crate::")
537			.unwrap_or(self)
538			.split("::")
539			.next()
540			.unwrap_or(self)
541	}
542
543	fn cpp_name_from_fullname(&self, style: CppNameStyle) -> &str {
544		match style {
545			CppNameStyle::Declaration => self.localname(),
546			CppNameStyle::Reference => self,
547		}
548	}
549
550	fn capitalize_first_ascii_letter(&self) -> Option<(char, &str)> {
551		// MSRV: replace with `split_at_checked()` when MSRV is 1.80
552		// self.split_at_checked(1).map(|(first_letter, rest)| {
553		// 	let [first_letter]: [u8; 1] = first_letter.as_bytes().try_into().expect("first part of split_at(1)");
554		// 	(char::from(first_letter.to_ascii_uppercase()), rest)
555		// })
556		if self.is_empty() || !self.is_ascii() {
557			return None;
558		}
559		let (first_letter, rest) = self.split_at(1);
560		let [first_letter]: [u8; 1] = first_letter.as_bytes().try_into().expect("first part of split_at(1)");
561		Some((char::from(first_letter.to_ascii_uppercase()), rest))
562	}
563}