1use crate::{
21 buffer::{Buffer, GapBuffer},
22 dot::{Cur, Dot, Range},
23 parse::{self, ParseInput},
24 regex::{self, Haystack, Regex, RevRegex},
25};
26use std::fmt;
27
28pub type Error = parse::Error<ErrorKind>;
29
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum ErrorKind {
32 InvalidRegex(regex::Error),
33 InvalidSuffix,
34 NotAnAddress,
35 UnclosedDelimiter,
36 UnexpectedCharacter(char),
37 UnexpectedEof,
38 ZeroIndexedLineOrColumn,
39}
40
41impl fmt::Display for ErrorKind {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 match self {
44 Self::InvalidRegex(err) => write!(f, "invalid regular expression: {err}"),
45 Self::InvalidSuffix => write!(f, "invalid suffix"),
46 Self::NotAnAddress => write!(f, "not an address"),
47 Self::UnclosedDelimiter => write!(f, "unclosed delimiter"),
48 Self::UnexpectedCharacter(c) => write!(f, "unexpected character {c:?}"),
49 Self::UnexpectedEof => write!(f, "unexpecterd EOF"),
50 Self::ZeroIndexedLineOrColumn => write!(f, "zero indexed line or column"),
51 }
52 }
53}
54
55#[allow(clippy::large_enum_variant)]
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum Addr {
61 Explicit(Dot),
62 Simple(SimpleAddr),
63 Compound(SimpleAddr, SimpleAddr),
64}
65
66impl Addr {
67 pub fn from_dot(dot: Dot, b: &Buffer) -> Self {
68 match dot {
69 Dot::Cur { c } => {
70 let (y, x) = c.as_yx(b);
71 Self::Simple(AddrBase::LineAndColumn(y, x).into())
72 }
73
74 Dot::Range { r } => {
75 let (y1, x1) = r.start.as_yx(b);
76 let (y2, x2) = r.end.as_yx(b);
77
78 Self::Compound(
79 AddrBase::LineAndColumn(y1, x1).into(),
80 AddrBase::LineAndColumn(y2, x2).into(),
81 )
82 }
83 }
84 }
85
86 pub fn full() -> Self {
87 Addr::Compound(AddrBase::Bof.into(), AddrBase::Eof.into())
88 }
89
90 pub fn parse(s: &str) -> Result<Self, Error> {
92 Parser::new(&ParseInput::new(s)).parse()
93 }
94
95 pub(crate) fn parse_from_input(input: &ParseInput<'_>) -> Result<Self, Error> {
97 Parser::new(input).parse()
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct SimpleAddr {
103 base: AddrBase,
104 suffixes: Vec<AddrBase>, }
106
107#[derive(Debug, Clone, PartialEq, Eq)]
113pub enum AddrBase {
114 Current,
116 CurrentLine,
118 Bol,
120 Eol,
122 Bof,
124 Eof,
126 Line(usize),
128 RelativeLine(isize),
130 Char(usize),
132 RelativeChar(isize),
134 LineAndColumn(usize, usize),
136 Regex(Regex),
138 RegexBack(RevRegex),
140}
141
142impl From<AddrBase> for SimpleAddr {
143 fn from(base: AddrBase) -> Self {
144 Self {
145 base,
146 suffixes: Vec::new(),
147 }
148 }
149}
150
151enum Dir {
152 Fwd,
153 Bck,
154}
155
156impl AddrBase {
157 fn is_valid_suffix(&self) -> bool {
158 use AddrBase::*;
159 matches!(
160 self,
161 Bol | Eol | CurrentLine | RelativeLine(_) | RelativeChar(_) | Regex(_) | RegexBack(_)
162 )
163 }
164}
165
166#[derive(Debug)]
169struct Parser<'a> {
170 input: &'a ParseInput<'a>,
171}
172
173impl<'a> Parser<'a> {
174 fn new(input: &'a ParseInput<'a>) -> Self {
175 Self { input }
176 }
177
178 fn parse(&self) -> Result<Addr, Error> {
182 let start = match self.parse_simple() {
183 Ok(addr) => Some(addr),
184 Err(e) if self.input.at_bof() && self.input.try_char() == Some(',') => None,
185 Err(e) => return Err(e),
186 };
187
188 if self.input.at_eof() || self.input.char() == ' ' {
189 Ok(Addr::Simple(
192 start.ok_or_else(|| self.error(ErrorKind::NotAnAddress))?,
193 ))
194 } else if self.input.char() == ',' {
195 self.input.advance(); let start = start.unwrap_or(AddrBase::Bof.into());
198 let next_is_eof_or_whitespace = self
199 .input
200 .try_char()
201 .map(|ch| ch.is_whitespace())
202 .unwrap_or(true);
203
204 let end = if next_is_eof_or_whitespace {
205 AddrBase::Eof.into()
206 } else {
207 self.parse_simple()?
208 };
209
210 Ok(Addr::Compound(start, end))
211 } else {
212 Err(self.error(ErrorKind::NotAnAddress))
213 }
214 }
215
216 fn error(&self, kind: ErrorKind) -> Error {
217 Error::new(kind, self.input.text(), self.input.span())
218 }
219
220 fn parse_simple(&self) -> Result<SimpleAddr, Error> {
221 let base = self.parse_base()?;
222 let mut suffixes = Vec::new();
223
224 while !self.input.at_eof() {
225 if !"-+".contains(self.input.char()) {
226 break;
227 }
228 let addr = self.parse_base()?;
229 if !addr.is_valid_suffix() {
230 return Err(self.error(ErrorKind::InvalidSuffix));
231 }
232 suffixes.push(addr);
233 }
234
235 Ok(SimpleAddr { base, suffixes })
236 }
237
238 fn parse_base(&self) -> Result<AddrBase, Error> {
239 if self.input.at_eof() {
240 return Err(self.error(ErrorKind::UnexpectedEof));
241 }
242
243 let dir = match self.input.char() {
244 '-' => {
245 self.input.advance();
246 if self.input.at_eof() {
247 return Ok(AddrBase::Bol);
248 }
249 Some(Dir::Bck)
250 }
251
252 '+' => {
253 self.input.advance();
254 if self.input.at_eof() {
255 return Ok(AddrBase::Eol);
256 }
257 Some(Dir::Fwd)
258 }
259
260 _ => None,
261 };
262
263 match (self.input.char(), dir) {
264 ('.' | '0' | '$', Some(_)) => Err(self.error(ErrorKind::NotAnAddress)),
265
266 ('-', Some(Dir::Fwd)) | ('+', Some(Dir::Bck)) => {
267 self.input.advance();
268 Ok(AddrBase::CurrentLine)
269 }
270
271 ('.', None) => {
272 self.input.advance();
273 Ok(AddrBase::Current)
274 }
275
276 ('0', None) => {
277 self.input.advance();
278 Ok(AddrBase::Bof)
279 }
280
281 ('$', None) => {
282 self.input.advance();
283 Ok(AddrBase::Eof)
284 }
285
286 ('#', dir) => {
287 self.input.advance();
288 if self.input.at_eof() || !self.input.char().is_ascii_digit() {
289 return Err(self.error(ErrorKind::NotAnAddress));
290 }
291
292 let ix = self.try_parse_num()?;
293 match dir {
294 None => Ok(AddrBase::Char(ix)),
295 Some(Dir::Fwd) => Ok(AddrBase::RelativeChar(ix as isize)),
296 Some(Dir::Bck) => Ok(AddrBase::RelativeChar(-(ix as isize))),
297 }
298 }
299
300 (c, dir) if c.is_ascii_digit() => {
301 let line = self.try_parse_num()?;
302 if line == 0 {
303 return Err(self.error(ErrorKind::ZeroIndexedLineOrColumn));
304 }
305
306 match (self.input.try_char(), dir) {
307 (Some(':'), Some(_)) => Err(self.error(ErrorKind::NotAnAddress)),
308
309 (Some(':'), None) => {
310 self.input.advance();
311 if self.input.at_eof() {
312 Err(self.error(ErrorKind::NotAnAddress))
313 } else if !self.input.char().is_ascii_digit() {
314 Err(self.error(ErrorKind::UnexpectedCharacter(self.input.char())))
315 } else {
316 match self.try_parse_num()? {
317 0 => Err(self.error(ErrorKind::ZeroIndexedLineOrColumn)),
318 col => Ok(AddrBase::LineAndColumn(line - 1, col - 1)),
319 }
320 }
321 }
322
323 (_, None) => Ok(AddrBase::Line(line - 1)),
324 (_, Some(Dir::Fwd)) => Ok(AddrBase::RelativeLine(line as isize)),
325 (_, Some(Dir::Bck)) => Ok(AddrBase::RelativeLine(-(line as isize))),
326 }
327 }
328
329 ('/', dir) => self.parse_delimited_regex(dir.unwrap_or(Dir::Fwd)),
330
331 (_, Some(Dir::Fwd)) => Ok(AddrBase::Eol),
332 (_, Some(Dir::Bck)) => Ok(AddrBase::Bol),
333
334 _ => Err(self.error(ErrorKind::NotAnAddress)),
335 }
336 }
337
338 fn try_parse_num(&self) -> Result<usize, Error> {
339 assert!(self.input.char().is_ascii_digit());
340 let mut s = self.input.char().to_string();
341 self.input.advance();
342
343 loop {
344 if self.input.at_eof() || !self.input.char().is_ascii_digit() {
345 break;
346 }
347 s.push(self.input.char());
348 self.input.advance();
349 }
350
351 s.parse().map_err(|_| self.error(ErrorKind::NotAnAddress))
352 }
353
354 fn parse_delimited_regex(&self, dir: Dir) -> Result<AddrBase, Error> {
355 assert_eq!(self.input.char(), '/');
356 let mut s = String::new();
357 let mut prev = '/';
358 self.input.advance(); while !self.input.at_eof() {
361 let ch = self.input.char();
362 if ch == '/' && prev != '\\' {
363 self.input.advance(); return match dir {
365 Dir::Fwd => Ok(AddrBase::Regex(
366 Regex::compile(&s).map_err(|e| self.error(ErrorKind::InvalidRegex(e)))?,
367 )),
368 Dir::Bck => Ok(AddrBase::RegexBack(
369 RevRegex::compile(&s)
370 .map_err(|e| self.error(ErrorKind::InvalidRegex(e)))?,
371 )),
372 };
373 }
374
375 self.input.advance();
376 s.push(ch);
377 prev = ch;
378 }
379
380 Err(self.error(ErrorKind::UnclosedDelimiter))
381 }
382}
383
384pub trait Address: Haystack + Sized {
386 fn current_dot(&self) -> Dot;
390 fn len_bytes(&self) -> usize;
391 fn len_chars(&self) -> usize;
392 fn line_to_char(&self, line_idx: usize) -> Option<usize>;
393 fn char_to_line(&self, char_idx: usize) -> Option<usize>;
394 fn char_to_line_end(&self, char_idx: usize) -> Option<usize>;
395 fn char_to_line_start(&self, char_idx: usize) -> Option<usize>;
396
397 fn max_iter(&self) -> usize {
398 self.len_chars()
399 }
400
401 fn map_addr(&self, a: &Addr) -> Dot {
402 let maybe_dot = match a {
403 Addr::Explicit(d) => Some(*d),
404 Addr::Simple(a) => self.map_simple_addr(a, self.current_dot()),
405 Addr::Compound(from, to) => self.map_compound_addr(from, to),
406 };
407
408 let mut dot = maybe_dot.unwrap_or_default();
409 dot.clamp_idx(self.max_iter());
410
411 dot
412 }
413
414 fn full_line(&self, line_idx: usize) -> Option<Dot> {
415 let from = self.line_to_char(line_idx)?;
416 let to = self.char_to_line_end(from)?.saturating_sub(1);
417
418 Some(Dot::from_char_indices(from, to))
419 }
420
421 fn map_addr_base(&self, addr_base: &AddrBase, cur_dot: Dot) -> Option<Dot> {
422 use AddrBase::*;
423
424 let dot = match addr_base {
425 Current => cur_dot,
426 Bof => Cur { idx: 0 }.into(),
427 Eof => Cur::new(self.max_iter()).into(),
428
429 Bol => {
430 let Range { start, end, .. } = cur_dot.as_range();
431 let from = self.char_to_line_start(start.idx)?;
432 Dot::from_char_indices(from, end.idx)
433 }
434
435 Eol => {
436 let Range { start, end, .. } = cur_dot.as_range();
437 let to = self.char_to_line_end(end.idx)?;
438 Dot::from_char_indices(start.idx, to)
439 }
440
441 CurrentLine => {
442 let Range { start, end, .. } = cur_dot.as_range();
443 let from = self.char_to_line_start(start.idx)?;
444 let to = self.char_to_line_end(end.idx)?;
445 Dot::from_char_indices(from, to)
446 }
447
448 Line(line_idx) => self.full_line(*line_idx)?,
449 RelativeLine(offset) => {
450 let mut line_idx = self.char_to_line(cur_dot.active_cur().idx)?;
451 line_idx = (line_idx as isize + *offset) as usize;
452 self.full_line(line_idx)?
453 }
454
455 Char(idx) => Cur { idx: *idx }.into(),
456 RelativeChar(offset) => {
457 let mut c = cur_dot.active_cur();
458 c.idx = (c.idx as isize + *offset) as usize;
459 c.into()
460 }
461
462 LineAndColumn(line, col) => {
463 let idx = self.line_to_char(*line)?;
464 Cur { idx: idx + *col }.into()
465 }
466
467 Regex(re) => {
468 let from = cur_dot.last_cur().idx;
469 let m = re.find_from(self, from)?;
470 let (byte_from, byte_to) = m.loc();
471 let from = self.byte_to_char(byte_from).unwrap();
472 let to = self.byte_to_char(byte_to).unwrap();
473
474 Dot::from_char_indices(from, to.saturating_sub(1))
475 }
476
477 RegexBack(re) => {
478 let from = cur_dot.first_cur().idx;
479 let m = re.find_rev_from(self, from)?;
480 let (byte_from, byte_to) = m.loc();
481 let from = self.byte_to_char(byte_from).unwrap();
482 let to = self.byte_to_char(byte_to).unwrap();
483
484 Dot::from_char_indices(from, to.saturating_sub(1))
485 }
486 };
487
488 Some(dot)
489 }
490
491 fn map_simple_addr(&self, addr: &SimpleAddr, cur_dot: Dot) -> Option<Dot> {
492 let mut dot = self.map_addr_base(&addr.base, cur_dot)?;
493
494 for suffix in addr.suffixes.iter() {
495 dot = self.map_addr_base(suffix, dot)?;
496 }
497
498 Some(dot)
499 }
500
501 fn map_compound_addr(&self, from: &SimpleAddr, to: &SimpleAddr) -> Option<Dot> {
502 let c1 = self.map_simple_addr(from, self.current_dot())?.first_cur();
503 let c2 = self.map_simple_addr(to, self.current_dot())?.last_cur();
504
505 Some(Range::from_cursors(c1, c2, false).into())
506 }
507}
508
509impl Address for GapBuffer {
510 fn current_dot(&self) -> Dot {
511 Dot::from_char_indices(0, self.len_chars().saturating_sub(1))
512 }
513
514 fn len_bytes(&self) -> usize {
515 self.len()
516 }
517
518 fn len_chars(&self) -> usize {
519 self.len_chars()
520 }
521
522 fn line_to_char(&self, line_idx: usize) -> Option<usize> {
523 self.try_line_to_char(line_idx)
524 }
525
526 fn char_to_line(&self, char_idx: usize) -> Option<usize> {
527 self.try_char_to_line(char_idx)
528 }
529
530 fn char_to_line_end(&self, char_idx: usize) -> Option<usize> {
531 let line_idx = self.try_char_to_line(char_idx)?;
532 match self.try_line_to_char(line_idx + 1) {
533 None => Some(self.len_chars() - 1),
534 Some(idx) => Some(idx),
535 }
536 }
537
538 fn char_to_line_start(&self, char_idx: usize) -> Option<usize> {
539 let line_idx = self.try_char_to_line(char_idx)?;
540 Some(self.line_to_char(line_idx))
541 }
542}
543
544impl Address for Buffer {
545 fn current_dot(&self) -> Dot {
546 self.dot
547 }
548
549 fn len_bytes(&self) -> usize {
550 self.txt.len()
551 }
552
553 fn len_chars(&self) -> usize {
554 self.txt.len_chars()
555 }
556
557 fn line_to_char(&self, line_idx: usize) -> Option<usize> {
558 self.txt.try_line_to_char(line_idx)
559 }
560
561 fn char_to_line(&self, char_idx: usize) -> Option<usize> {
562 self.txt.try_char_to_line(char_idx)
563 }
564
565 fn char_to_line_end(&self, char_idx: usize) -> Option<usize> {
566 let line_idx = self.txt.try_char_to_line(char_idx)?;
567 match self.txt.try_line_to_char(line_idx + 1) {
568 None => Some(self.txt.len_chars() - 1),
569 Some(idx) => Some(idx),
570 }
571 }
572
573 fn char_to_line_start(&self, char_idx: usize) -> Option<usize> {
574 let line_idx = self.txt.try_char_to_line(char_idx)?;
575 Some(self.txt.line_to_char(line_idx))
576 }
577}
578
579#[cfg(test)]
580mod tests {
581 use super::*;
582 use super::{Addr::*, AddrBase::*};
583 use crate::regex::{Regex, RevRegex};
584 use simple_test_case::test_case;
585
586 fn re(s: &str) -> Regex {
587 Regex::compile(s).unwrap()
588 }
589
590 fn re_rev(s: &str) -> RevRegex {
591 RevRegex::compile(s).unwrap()
592 }
593
594 #[test_case(".", Simple(Current.into()); "current dot")]
596 #[test_case("-", Simple(Bol.into()); "beginning of line")]
597 #[test_case("+", Simple(Eol.into()); "end of line")]
598 #[test_case("-+", Simple(CurrentLine.into()); "current line minus plus")]
599 #[test_case("+-", Simple(CurrentLine.into()); "current line plus minus")]
600 #[test_case("0", Simple(Bof.into()); "beginning of file")]
601 #[test_case("$", Simple(Eof.into()); "end of file")]
602 #[test_case("3", Simple(Line(2).into()); "single line")]
603 #[test_case("+42", Simple(RelativeLine(42).into()); "relative line forward")]
604 #[test_case("-12", Simple(RelativeLine(-12).into()); "relative line backward")]
605 #[test_case("#3", Simple(Char(3).into()); "char")]
606 #[test_case("+#42", Simple(RelativeChar(42).into()); "relative char forward")]
607 #[test_case("-#12", Simple(RelativeChar(-12).into()); "relative char backward")]
608 #[test_case("3:9", Simple(LineAndColumn(2, 8).into()); "line and column cursor")]
609 #[test_case("/foo/", Simple(Regex(re("foo")).into()); "regex")]
610 #[test_case("+/baz/", Simple(Regex(re("baz")).into()); "regex explicit forward")]
611 #[test_case("-/bar/", Simple(RegexBack(re_rev("bar")).into()); "regex back")]
612 #[test_case(
614 "#5+",
615 Simple(SimpleAddr { base: Char(5), suffixes: vec![Eol] });
616 "char to eol"
617 )]
618 #[test_case(
619 "#5-",
620 Simple(SimpleAddr { base: Char(5), suffixes: vec![Bol] });
621 "char to bol"
622 )]
623 #[test_case(
624 "5+#3",
625 Simple(SimpleAddr { base: Line(4), suffixes: vec![RelativeChar(3)] });
626 "line plus char"
627 )]
628 #[test_case(
629 "5-#3",
630 Simple(SimpleAddr { base: Line(4), suffixes: vec![RelativeChar(-3)] });
631 "line minus char"
632 )]
633 #[test_case(",", Compound(Bof.into(), Eof.into()); "full")]
635 #[test_case("5,", Compound(Line(4).into(), Eof.into()); "from n")]
636 #[test_case("50,", Compound(Line(49).into(), Eof.into()); "from n multi digit")]
637 #[test_case("5,9", Compound(Line(4).into(), Line(8).into()); "from n to m")]
638 #[test_case("25,90", Compound(Line(24).into(), Line(89).into()); "from n to m multi digit")]
639 #[test_case("/foo/,/bar/", Compound(Regex(re("foo")).into(), Regex(re("bar")).into()); "regex range")]
640 #[test_case(
642 "-/\\s/+#1,/\\s/-#1",
643 Compound(
644 SimpleAddr { base: RegexBack(re_rev("\\s")), suffixes: vec![RelativeChar(1)] },
645 SimpleAddr { base: Regex(re("\\s")), suffixes: vec![RelativeChar(-1)] },
646 );
647 "regex range with suffixes"
648 )]
649 #[test]
650 fn parse_works(s: &str, expected: Addr) {
651 let addr = Addr::parse(s).expect("valid input");
652 assert_eq!(addr, expected);
653 }
654
655 #[test_case("0", Dot::default(), "t"; "bof")]
656 #[test_case("2", Dot::from_char_indices(15, 26), "and another\n"; "line 2")]
657 #[test_case("2:1", Cur { idx: 15 }.into(), "a"; "line 2 col 1")]
658 #[test_case("2:2", Cur { idx: 16 }.into(), "n"; "line 2 col 2")]
659 #[test_case("-1", Dot::from_char_indices(0, 14), "this is a line\n"; "line 1 relative to 2")]
660 #[test_case("/something/", Dot::from_char_indices(33, 41), "something"; "regex forward")]
661 #[test_case("-/line/", Dot::from_char_indices(10, 13), "line"; "regex back")]
662 #[test_case("-/his/", Dot::from_char_indices(1, 3), "his"; "regex back 2")]
663 #[test_case("-/a/,/a/", Dot::from_char_indices(15, 19), "and a"; "regex range")]
664 #[test_case("-/\\s/+#1,/\\s/-#1", Dot::from_char_indices(15, 17), "and"; "regex range boundaries")]
665 #[test]
666 fn map_addr_works(s: &str, expected: Dot, expected_contents: &str) {
667 let mut b = Buffer::new_unnamed(
668 0,
669 "this is a line\nand another\n- [ ] something to do\n",
670 Default::default(),
671 );
672 b.dot = Cur::new(16).into();
673
674 let addr = Addr::parse(s).expect("valid addr");
675 b.dot = b.map_addr(&addr);
676
677 assert_eq!(b.dot, expected, ">{}<", b.dot_contents());
678 assert_eq!(b.dot_contents(), expected_contents);
679 }
680
681 #[test_case("99999999999999999999"; "line number overflow")]
682 #[test_case("#99999999999999999999"; "char index overflow")]
683 #[test_case("+#99999999999999999999"; "relative char forward overflow")]
684 #[test_case("-#99999999999999999999"; "relative char back overflow")]
685 #[test_case("5:99999999999999999999"; "column number overflow")]
686 #[test_case("99999999999999999999,100"; "range start overflow")]
687 #[test_case("1,99999999999999999999"; "range end overflow")]
688 #[test_case("99999999999999999999:5"; "line in line col overflow")]
689 #[test]
690 fn giant_address_integers_error(s: &str) {
691 let res = Addr::parse(s);
692 assert!(res.is_err(), "expected error, got {res:?}");
693 }
694
695 #[test_case("#"; "char address at eof")]
696 #[test_case("1,#"; "compound with eof after hash")]
697 #[test_case("#,5"; "compound with incomplete char start")]
698 #[test_case("+#"; "relative forward at eof")]
699 #[test_case("-#"; "relative back at eof")]
700 #[test]
701 fn incomplete_char_addresses_error(s: &str) {
702 let res = Addr::parse(s);
703 assert!(res.is_err(), "expected error, got {res:?}");
704 }
705}