1use crate::diffing::diff_utils::generate_diff;
2use crate::diffing::patching::create_patched_content;
3use crate::models::{StreamYieldItem, UnparsedBlock};
4use regex::Regex;
5use std::collections::HashMap;
6use std::path::Path;
7use std::sync::LazyLock;
8
9pub struct StreamParser<'a> {
10 buffer: String,
11 current_file: Option<String>,
12 yield_queue: std::collections::VecDeque<StreamYieldItem>,
14 baseline: &'a HashMap<String, String>,
16 overlay: HashMap<String, String>,
18 discovered_baseline: HashMap<String, String>,
20 last_char_was_newline: bool,
23}
24
25impl<'a> StreamParser<'a> {
26 pub fn get_pending_content(&self) -> String {
27 self.buffer.clone()
28 }
29
30 pub fn is_pending_displayable(&self) -> bool {
31 let pending = &self.buffer;
32 if pending.is_empty() {
33 return false;
34 }
35
36 let tail_is_at_line_start = if pending.rfind('\n').is_some() {
37 true
38 } else {
39 self.last_char_was_newline
40 };
41
42 if !tail_is_at_line_start {
43 return true;
44 }
45
46 let last_line = pending.split('\n').next_back().unwrap_or("");
47 let trimmed = last_line.trim_start();
48
49 if !trimmed.is_empty()
52 && ("File:".starts_with(trimmed)
53 || (trimmed.starts_with("File:") && !pending.ends_with('\n')))
54 {
55 return false;
56 }
57
58 if self.current_file.is_some() {
61 if pending.contains("<<<<<<< SEARCH") {
65 return false;
66 }
67
68 if !trimmed.is_empty() && "<<<<<<< SEARCH".starts_with(trimmed) {
73 return false;
74 }
75 }
76
77 true
78 }
79
80 pub fn new(original_contents: &'a HashMap<String, String>) -> Self {
81 Self {
82 buffer: String::new(),
83 current_file: None,
84 yield_queue: std::collections::VecDeque::new(),
85 baseline: original_contents,
86 overlay: HashMap::new(),
87 discovered_baseline: HashMap::new(),
88 last_char_was_newline: true,
90 }
91 }
92
93 pub fn feed(&mut self, chunk: &str) {
96 self.buffer.push_str(chunk);
97 }
98
99 pub fn feed_complete(&mut self, content: &str) {
101 self.feed(content);
102 if !content.ends_with('\n') {
103 self.feed("\n");
104 }
105 }
106
107 pub fn parse_and_resolve(&mut self, chunk: &str, session_root: &Path) -> Vec<StreamYieldItem> {
109 self.feed(chunk);
110 let raw_yields: Vec<_> = self.by_ref().collect();
111 self.process_yields(raw_yields, session_root)
112 }
113
114 pub fn final_resolve(
117 &mut self,
118 session_root: &Path,
119 ) -> (String, Vec<crate::models::DisplayItem>, Vec<String>) {
120 let (_, raw_yields, _) = self.finish("");
122
123 let processed = self.process_yields(raw_yields, session_root);
125
126 let warnings = self.collect_warnings(&processed);
128 let diff = self.build_final_unified_diff();
129 let display_items = processed
130 .into_iter()
131 .filter_map(|y| y.to_display_item(true))
132 .collect();
133
134 (diff, display_items, warnings)
135 }
136
137 fn update_newline_state(&mut self, item: &StreamYieldItem) {
138 match item {
139 StreamYieldItem::Text(s) => self.last_char_was_newline = s.ends_with('\n'),
140 StreamYieldItem::Unparsed(u) => self.last_char_was_newline = u.text.ends_with('\n'),
141 StreamYieldItem::FileHeader(_) => self.last_char_was_newline = true, StreamYieldItem::Patch(p) => self.last_char_was_newline = p.raw_block.ends_with('\n'),
143 StreamYieldItem::DiffBlock(d) => {
144 self.last_char_was_newline = d.unified_diff.ends_with('\n')
145 }
146 StreamYieldItem::Warning(_) => {} StreamYieldItem::IncompleteBlock(b) => self.last_char_was_newline = b.ends_with('\n'),
148 }
149 }
150
151 fn check_header_match(&self, m: regex::Match, start_of_buffer_is_start_of_line: bool) -> bool {
152 m.start() > 0 || start_of_buffer_is_start_of_line
155 }
156}
157
158static FILE_HEADER_RE: LazyLock<Regex> =
159 LazyLock::new(|| Regex::new(r"(?m)^(?P<line>[ \t]*File:[ \t]*(?P<path>.*?)\r?\n)").unwrap());
160
161impl<'a> Iterator for StreamParser<'a> {
162 type Item = StreamYieldItem;
163
164 fn next(&mut self) -> Option<Self::Item> {
165 loop {
166 if let Some(item) = self.yield_queue.pop_front() {
168 self.update_newline_state(&item);
169 return Some(item);
170 }
171
172 if self.buffer.is_empty() {
173 return None;
174 }
175
176 if let Some(llm_file_path) = self.current_file.clone() {
178 let mut next_header_idx = self.buffer.len();
180 for m in FILE_HEADER_RE.find_iter(&self.buffer) {
181 if self.check_header_match(m, self.last_char_was_newline) {
183 next_header_idx = m.start();
184 break;
185 }
186 }
187
188 let chunk_limit = next_header_idx;
190 if chunk_limit > 0 || (chunk_limit == 0 && self.last_char_was_newline) {
191 let (chunk_items, consumed_bytes) =
192 self.process_file_chunk(&llm_file_path, &self.buffer[..chunk_limit]);
193
194 if consumed_bytes > 0 {
195 self.buffer.drain(..consumed_bytes);
196 }
197
198 if !chunk_items.is_empty() {
199 self.yield_queue.extend(chunk_items);
200 continue;
201 }
202
203 if consumed_bytes == 0 {
205 if next_header_idx < self.buffer.len() {
207 self.current_file = None;
208 continue;
209 } else {
210 return None;
212 }
213 }
214 } else {
215 self.current_file = None;
217 continue;
218 }
219 }
220
221 if let Some(caps) = FILE_HEADER_RE.captures(&self.buffer) {
224 let mat = caps.get(0).unwrap();
225 if self.check_header_match(mat, self.last_char_was_newline) {
226 if mat.start() > 0 {
227 let text = self.buffer[..mat.start()].to_string();
228 self.buffer.drain(..mat.start());
229 let item = StreamYieldItem::Text(text);
230 self.update_newline_state(&item);
231 return Some(item);
232 }
233
234 let path_str = caps
235 .name("path")
236 .unwrap()
237 .as_str()
238 .trim()
239 .trim_matches(|c| c == '*' || c == '`')
240 .to_string();
241 self.current_file = Some(path_str.clone());
242 self.buffer.drain(..mat.end());
243 let item = StreamYieldItem::FileHeader(crate::models::FileHeader {
244 llm_file_path: path_str,
245 });
246 self.update_newline_state(&item);
247 return Some(item);
248 }
249 }
251
252 let text = &self.buffer;
254 let mut limit = text.len();
255
256 for m in FILE_HEADER_RE.find_iter(text) {
258 if self.check_header_match(m, self.last_char_was_newline) {
259 limit = m.start();
260 break;
261 }
262 }
263
264 if let Some(search_idx) = text[..limit].find("<<<<<<< SEARCH") {
266 let ls = text[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
267 if ls > 0 || self.last_char_was_newline {
268 limit = limit.min(ls);
269 }
270 }
271
272 if self.is_incomplete(&text[..limit]) {
274 if let Some(last_newline) = text[..limit].rfind('\n') {
275 limit = last_newline + 1;
276 } else {
277 limit = 0;
278 }
279 }
280
281 if limit > 0 {
282 let text_yield = self.buffer[..limit].to_string();
283 self.buffer.drain(..limit);
284 let item = StreamYieldItem::Text(text_yield);
285 self.update_newline_state(&item);
286 return Some(item);
287 }
288
289 return None;
290 }
291 }
292}
293
294impl<'a> StreamParser<'a> {
295 fn is_incomplete(&self, text: &str) -> bool {
296 let last_line = match text.rfind('\n') {
298 Some(idx) => &text[idx + 1..],
299 None => {
300 if !self.last_char_was_newline {
302 return false;
303 }
304 text
305 }
306 };
307
308 if !last_line.is_empty() && last_line.chars().all(|c| c.is_whitespace()) {
312 return true;
313 }
314
315 let trimmed = last_line.trim_start();
316
317 if !trimmed.is_empty()
319 && ("File:".starts_with(trimmed)
320 || (trimmed.starts_with("File:") && !text.ends_with('\n')))
321 {
322 return true;
323 }
324
325 if self.current_file.is_some() && !trimmed.is_empty() {
328 if "<<<<<<< SEARCH".starts_with(trimmed) {
329 return true;
330 }
331 if "=======".starts_with(trimmed) {
332 return true;
333 }
334 if ">>>>>>> REPLACE".starts_with(trimmed) {
335 return true;
336 }
337 }
338
339 if self.current_file.is_some()
341 && let Some(idx) = text.find("<<<<<<< SEARCH")
342 {
343 let line_start = text[..idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
344 if line_start == 0 && !text.contains('\n') && !self.last_char_was_newline {
345 } else {
347 let indent = &text[line_start..idx];
348 if indent.chars().all(|c| c.is_whitespace()) && !text.contains(">>>>>>> REPLACE") {
349 return true;
350 }
351 }
352 }
353
354 false
355 }
356
357 fn process_file_chunk(&self, llm_path: &str, chunk: &str) -> (Vec<StreamYieldItem>, usize) {
358 let mut items = Vec::new();
359 let mut cursor = 0;
360 let search_pattern = "<<<<<<< SEARCH";
361 let sep_pattern = "=======";
362 let replace_pattern = ">>>>>>> REPLACE";
363
364 while cursor < chunk.len() {
365 let search_idx = match chunk[cursor..].find(search_pattern) {
366 Some(i) => cursor + i,
367 None => break,
368 };
369
370 let line_start = chunk[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
372 let indent_slice = &chunk[line_start..search_idx];
373
374 if !indent_slice.chars().all(|c| c.is_whitespace()) {
376 items.push(StreamYieldItem::Text(
378 chunk[cursor..search_idx + 1].to_string(),
379 ));
380 cursor = search_idx + 1;
381 continue;
382 }
383
384 let block_search_start = search_idx + search_pattern.len();
385 let block_search_start_content =
386 block_search_start + consume_line_ending(&chunk[block_search_start..]);
387
388 let (sep_line_start, sep_line_end) =
389 match find_marker_with_indent(chunk, sep_pattern, block_search_start, indent_slice)
390 {
391 Some(pair) => pair,
392 None => {
393 let backtrack_pos = line_start.max(cursor);
394 if backtrack_pos > cursor {
395 items.push(StreamYieldItem::Text(
396 chunk[cursor..backtrack_pos].to_string(),
397 ));
398 }
399 return (items, backtrack_pos);
400 }
401 };
402
403 let block_replace_start_content =
404 sep_line_end + consume_line_ending(&chunk[sep_line_end..]);
405
406 let (replace_line_start, _replace_line_end) =
407 match find_marker_with_indent(chunk, replace_pattern, sep_line_end, indent_slice) {
408 Some(pair) => pair,
409 None => {
410 let backtrack_pos = line_start.max(cursor);
411 if backtrack_pos > cursor {
412 items.push(StreamYieldItem::Text(
413 chunk[cursor..backtrack_pos].to_string(),
414 ));
415 }
416 return (items, backtrack_pos);
417 }
418 };
419
420 if search_idx > cursor {
421 items.push(StreamYieldItem::Text(chunk[cursor..search_idx].to_string()));
422 }
423
424 let final_end = replace_line_start + indent_slice.len() + replace_pattern.len();
425
426 let mut search_content = &chunk[block_search_start_content..sep_line_start];
427 if search_content.ends_with('\r') {
428 search_content = &search_content[..search_content.len() - 1];
429 }
430
431 let mut replace_content = &chunk[block_replace_start_content..replace_line_start];
432 if replace_content.ends_with('\r') {
433 replace_content = &replace_content[..replace_content.len() - 1];
434 }
435
436 items.push(StreamYieldItem::Patch(crate::models::AIPatch {
437 llm_file_path: llm_path.to_string(),
438 search_content: search_content.to_string(),
439 replace_content: replace_content.to_string(),
440 indent: indent_slice.to_string(),
441 raw_block: chunk[search_idx..final_end].to_string(),
442 }));
443
444 cursor = final_end;
445 }
446
447 if cursor < chunk.len() {
448 let tail = &chunk[cursor..];
449 if !self.is_incomplete(tail) {
450 items.push(StreamYieldItem::Text(tail.to_string()));
451 cursor = chunk.len();
452 }
453 }
454
455 (items, cursor)
456 }
457
458 pub fn handle_patch(
459 &mut self,
460 patch: &crate::models::AIPatch,
461 _root: &Path,
462 ) -> (Option<StreamYieldItem>, Vec<String>) {
463 let mut warnings = Vec::new();
464
465 let resolution = self.resolve_path(&patch.llm_file_path, _root, &patch.search_content);
466
467 if let Some(w) = resolution.0 {
468 warnings.push(w.clone());
469 }
470
471 if let Some((path, fallback)) = resolution.1 {
472 if let Some(fb) = fallback {
473 self.overlay
474 .entry(path.clone())
475 .or_insert_with(|| fb.clone());
476 self.discovered_baseline.entry(path.clone()).or_insert(fb);
477 }
478
479 let original = self
480 .overlay
481 .get(&path)
482 .map(|s| s.as_str())
483 .or_else(|| self.baseline.get(&path).map(|s| s.as_str()))
484 .unwrap_or("");
485
486 let mut applied = None;
487
488 if let Some(res) =
490 create_patched_content(original, &patch.search_content, &patch.replace_content)
491 {
492 applied = Some(res);
493 }
494 else if patch.search_content.contains('\r') {
496 let search_normalized = patch.search_content.replace('\r', "");
497 if let Some(res) =
498 create_patched_content(original, &search_normalized, &patch.replace_content)
499 {
500 applied = Some(res);
501 }
502 }
503
504 if let Some(new_content) = applied {
505 let diff = generate_diff(&path, Some(original), Some(&new_content));
506 self.overlay.insert(path.clone(), new_content.clone());
507 (
508 Some(StreamYieldItem::DiffBlock(
509 crate::models::ProcessedDiffBlock {
510 llm_file_path: patch.llm_file_path.clone(),
511 unified_diff: diff,
512 },
513 )),
514 warnings,
515 )
516 } else {
517 warnings.push(format!(
518 "The SEARCH block from the AI could not be found in '{}'. Patch skipped.",
519 path
520 ));
521
522 (
523 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
524 text: patch.raw_block.clone(),
525 })),
526 warnings,
527 )
528 }
529 } else {
530 warnings.push(format!(
531 "File '{}' from the AI does not match any file in context. Patch skipped.",
532 patch.llm_file_path
533 ));
534
535 (
536 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
537 text: patch.raw_block.clone(),
538 })),
539 warnings,
540 )
541 }
542 }
543
544 pub fn finish(&mut self, last_chunk: &str) -> (String, Vec<StreamYieldItem>, Vec<String>) {
545 self.feed(last_chunk);
547
548 if self.is_incomplete(&self.buffer)
550 && self.buffer.contains("<<<<<<< SEARCH")
551 && self.buffer.contains(">>>>>>> REPLACE")
552 {
553 self.buffer.push('\n');
554 }
555
556 let mut items: Vec<_> = self.by_ref().collect();
557
558 if !self.buffer.is_empty() {
560 let looks_like_marker = self.is_incomplete(&self.buffer);
561
562 if looks_like_marker {
563 items.push(StreamYieldItem::Unparsed(UnparsedBlock {
564 text: self.buffer.clone(),
565 }));
566 } else {
567 items.push(StreamYieldItem::Text(self.buffer.clone()));
568 }
569 self.buffer.clear();
570 }
571
572 let diff = self.build_final_unified_diff();
573
574 let warnings = self.collect_warnings(&items);
575
576 (diff, items, warnings)
577 }
578
579 pub fn collect_warnings(&self, items: &[StreamYieldItem]) -> Vec<String> {
580 items
581 .iter()
582 .filter_map(|i| match i {
583 StreamYieldItem::Warning(w) => Some(w.text.clone()),
584 _ => None,
585 })
586 .collect()
587 }
588
589 pub fn process_yields(
591 &mut self,
592 items: Vec<StreamYieldItem>,
593 session_root: &Path,
594 ) -> Vec<StreamYieldItem> {
595 let mut processed = Vec::with_capacity(items.len());
596 for item in items {
597 if let StreamYieldItem::Patch(ref patch) = item {
598 let (resolved, warnings) = self.handle_patch(patch, session_root);
599 for w in warnings {
600 processed.push(StreamYieldItem::Warning(crate::models::WarningMessage {
601 text: w,
602 }));
603 }
604 if let Some(res) = resolved {
605 processed.push(res);
606 }
607 } else {
608 processed.push(item);
609 }
610 }
611 processed
612 }
613
614 pub fn build_final_unified_diff(&self) -> String {
615 let mut diffs = String::new();
616 let keys: std::collections::BTreeSet<_> = self
617 .discovered_baseline
618 .keys()
619 .chain(self.overlay.keys())
620 .collect();
621
622 for k in keys {
623 let old = self
624 .discovered_baseline
625 .get(k)
626 .map(|s| s.as_str())
627 .or_else(|| self.baseline.get(k).map(|s| s.as_str()));
628 let new = self.overlay.get(k).map(|s| s.as_str());
629
630 if old != new {
631 let d = generate_diff(k, old, new);
632 diffs.push_str(&d);
633 }
634 }
635 diffs
636 }
637
638 fn resolve_path(
639 &self,
640 llm_path: &str,
641 root: &Path,
642 search_block: &str,
643 ) -> (Option<String>, Option<(String, Option<String>)>) {
644 if self.overlay.contains_key(llm_path) || self.baseline.contains_key(llm_path) {
645 return (None, Some((llm_path.to_string(), None)));
646 }
647 if search_block.trim().is_empty() {
648 return (None, Some((llm_path.to_string(), None)));
649 }
650 let abs_path = root.join(llm_path);
651 if abs_path.exists()
652 && let Ok(canon) = abs_path.canonicalize()
653 && let Ok(root_canon) = root.canonicalize()
654 && canon.starts_with(root_canon)
655 && let Ok(content) = std::fs::read_to_string(&abs_path)
656 {
657 let msg = format!(
658 "File '{}' was not in the session context but was found on disk.",
659 llm_path
660 );
661 return (Some(msg), Some((llm_path.to_string(), Some(content))));
662 }
663 (None, None)
664 }
665}
666
667fn consume_line_ending(s: &str) -> usize {
668 if s.starts_with("\r\n") {
669 2
670 } else if s.starts_with('\n') {
671 1
672 } else {
673 0
674 }
675}
676
677fn find_marker_with_indent(
678 chunk: &str,
679 marker: &str,
680 start_pos: usize,
681 expected_indent: &str,
682) -> Option<(usize, usize)> {
683 let mut search_pos = start_pos;
684 while let Some(i) = chunk[search_pos..].find(marker) {
685 let found_idx = search_pos + i;
686 let line_start = chunk[..found_idx]
687 .rfind('\n')
688 .map(|idx| idx + 1)
689 .unwrap_or(0);
690 if chunk[line_start..found_idx] == *expected_indent {
691 let after = &chunk[found_idx + marker.len()..];
692 let line_end = after
693 .find('\n')
694 .map(|idx| found_idx + marker.len() + idx)
695 .unwrap_or(chunk.len());
696 if chunk[found_idx + marker.len()..line_end]
698 .chars()
699 .all(|c| c.is_whitespace() && c != '\n')
700 {
701 return Some((line_start, line_end));
702 }
703 }
704 search_pos = found_idx + marker.len();
705 }
706 None
707}