1use crate::diffing::diff_utils::generate_diff;
2use crate::diffing::patching::create_patched_content;
3use crate::models::{StreamYieldItem, UnparsedBlock};
4use regex::Regex;
5use std::collections::HashMap;
6use std::path::Path;
7use std::sync::LazyLock;
8
9pub struct StreamParser<'a> {
10 buffer: String,
11 current_file: Option<String>,
12 yield_queue: std::collections::VecDeque<StreamYieldItem>,
14 baseline: &'a HashMap<String, String>,
16 overlay: HashMap<String, String>,
18 discovered_baseline: HashMap<String, String>,
20 last_char_was_newline: bool,
23}
24
25impl<'a> StreamParser<'a> {
26 pub fn get_pending_content(&self) -> String {
27 self.buffer.clone()
28 }
29
30 pub fn is_pending_displayable(&self) -> bool {
31 let pending = &self.buffer;
32 if pending.is_empty() {
33 return false;
34 }
35
36 let tail_is_at_line_start = if pending.rfind('\n').is_some() {
37 true
38 } else {
39 self.last_char_was_newline
40 };
41
42 if !tail_is_at_line_start {
43 return true;
44 }
45
46 let last_line = pending.split('\n').next_back().unwrap_or("");
47 let trimmed = last_line.trim_start();
48
49 if !trimmed.is_empty()
52 && ("File:".starts_with(trimmed)
53 || (trimmed.starts_with("File:") && !pending.ends_with('\n')))
54 {
55 return false;
56 }
57
58 if self.current_file.is_some() {
61 if pending.contains("<<<<<<< SEARCH") {
65 return false;
66 }
67
68 if !trimmed.is_empty() && "<<<<<<< SEARCH".starts_with(trimmed) {
73 return false;
74 }
75 }
76
77 true
78 }
79
80 pub fn new(original_contents: &'a HashMap<String, String>) -> Self {
81 Self {
82 buffer: String::new(),
83 current_file: None,
84 yield_queue: std::collections::VecDeque::new(),
85 baseline: original_contents,
86 overlay: HashMap::new(),
87 discovered_baseline: HashMap::new(),
88 last_char_was_newline: true,
90 }
91 }
92
93 pub fn feed(&mut self, chunk: &str) {
96 self.buffer.push_str(chunk);
97 }
98
99 pub fn feed_complete(&mut self, content: &str) {
101 self.feed(content);
102 if !content.ends_with('\n') {
103 self.feed("\n");
104 }
105 }
106
107 pub fn parse_and_resolve(&mut self, chunk: &str, session_root: &Path) -> Vec<StreamYieldItem> {
109 self.feed(chunk);
110 let raw_yields: Vec<_> = self.by_ref().collect();
111 self.process_yields(raw_yields, session_root)
112 }
113
114 pub fn final_resolve(
117 &mut self,
118 session_root: &Path,
119 ) -> (String, Vec<crate::models::DisplayItem>, Vec<String>) {
120 let (_, raw_yields, _) = self.finish("");
122
123 let processed = self.process_yields(raw_yields, session_root);
125
126 let warnings = self.collect_warnings(&processed);
128 let diff = self.build_final_unified_diff();
129 let display_items = processed
130 .into_iter()
131 .filter_map(|y| y.to_display_item(true))
132 .collect();
133
134 (diff, display_items, warnings)
135 }
136
137 fn update_newline_state(&mut self, item: &StreamYieldItem) {
138 match item {
139 StreamYieldItem::Text(s) => self.last_char_was_newline = s.ends_with('\n'),
140 StreamYieldItem::Unparsed(u) => self.last_char_was_newline = u.text.ends_with('\n'),
141 StreamYieldItem::FileHeader(_) => self.last_char_was_newline = true, StreamYieldItem::Patch(p) => self.last_char_was_newline = p.raw_block.ends_with('\n'),
143 StreamYieldItem::DiffBlock(d) => {
144 self.last_char_was_newline = d.unified_diff.ends_with('\n')
145 }
146 StreamYieldItem::Warning(_) => {} StreamYieldItem::IncompleteBlock(b) => self.last_char_was_newline = b.ends_with('\n'),
148 }
149 }
150
151 fn check_header_match(&self, m: regex::Match, start_of_buffer_is_start_of_line: bool) -> bool {
152 m.start() > 0 || start_of_buffer_is_start_of_line
155 }
156}
157
158static FILE_HEADER_RE: LazyLock<Regex> =
159 LazyLock::new(|| Regex::new(r"(?m)^(?P<line>[ \t]*File:[ \t]*(?P<path>.*?)\r?\n)").unwrap());
160
161impl<'a> Iterator for StreamParser<'a> {
162 type Item = StreamYieldItem;
163
164 fn next(&mut self) -> Option<Self::Item> {
165 loop {
166 if let Some(item) = self.yield_queue.pop_front() {
168 self.update_newline_state(&item);
169 return Some(item);
170 }
171
172 if self.buffer.is_empty() {
173 return None;
174 }
175
176 if let Some(llm_file_path) = self.current_file.clone() {
178 let mut next_header_idx = self.buffer.len();
180 for m in FILE_HEADER_RE.find_iter(&self.buffer) {
181 if self.check_header_match(m, self.last_char_was_newline) {
183 next_header_idx = m.start();
184 break;
185 }
186 }
187
188 let chunk_limit = next_header_idx;
190 if chunk_limit > 0 || (chunk_limit == 0 && self.last_char_was_newline) {
191 let (chunk_items, consumed_bytes) =
192 self.process_file_chunk(&llm_file_path, &self.buffer[..chunk_limit]);
193
194 if consumed_bytes > 0 {
195 self.buffer.drain(..consumed_bytes);
196 }
197
198 if !chunk_items.is_empty() {
199 self.yield_queue.extend(chunk_items);
200 continue;
201 }
202
203 if consumed_bytes == 0 {
205 if next_header_idx < self.buffer.len() {
207 self.current_file = None;
208 continue;
209 } else {
210 return None;
212 }
213 }
214 } else {
215 self.current_file = None;
217 continue;
218 }
219 }
220
221 if let Some(caps) = FILE_HEADER_RE.captures(&self.buffer) {
224 let mat = caps.get(0).unwrap();
225 if self.check_header_match(mat, self.last_char_was_newline) {
226 if mat.start() > 0 {
227 let text = self.buffer[..mat.start()].to_string();
228 self.buffer.drain(..mat.start());
229 let item = StreamYieldItem::Text(text);
230 self.update_newline_state(&item);
231 return Some(item);
232 }
233
234 let path_str = caps
235 .name("path")
236 .unwrap()
237 .as_str()
238 .trim()
239 .trim_matches(|c| c == '*' || c == '`')
240 .to_string();
241 self.current_file = Some(path_str.clone());
242 self.buffer.drain(..mat.end());
243 let item = StreamYieldItem::FileHeader(crate::models::FileHeader {
244 llm_file_path: path_str,
245 });
246 self.update_newline_state(&item);
247 return Some(item);
248 }
249 }
251
252 let text = &self.buffer;
254 let mut limit = text.len();
255
256 for m in FILE_HEADER_RE.find_iter(text) {
258 if self.check_header_match(m, self.last_char_was_newline) {
259 limit = m.start();
260 break;
261 }
262 }
263
264 if let Some(search_idx) = text[..limit].find("<<<<<<< SEARCH") {
266 let ls = text[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
267 if ls > 0 || self.last_char_was_newline {
268 limit = limit.min(ls);
269 }
270 }
271
272 if self.is_incomplete(&text[..limit]) {
274 if let Some(last_newline) = text[..limit].rfind('\n') {
275 limit = last_newline + 1;
276 } else {
277 limit = 0;
278 }
279 }
280
281 if limit > 0 {
282 let text_yield = self.buffer[..limit].to_string();
283 self.buffer.drain(..limit);
284 let item = StreamYieldItem::Text(text_yield);
285 self.update_newline_state(&item);
286 return Some(item);
287 }
288
289 return None;
290 }
291 }
292}
293
294impl<'a> StreamParser<'a> {
295 fn is_incomplete(&self, text: &str) -> bool {
296 let last_line = match text.rfind('\n') {
298 Some(idx) => &text[idx + 1..],
299 None => {
300 if !self.last_char_was_newline {
302 return false;
303 }
304 text
305 }
306 };
307
308 if !last_line.is_empty() && last_line.chars().all(|c| c.is_whitespace()) {
312 return true;
313 }
314
315 let trimmed = last_line.trim_start();
316
317 if !trimmed.is_empty()
319 && ("File:".starts_with(trimmed)
320 || (trimmed.starts_with("File:") && !text.ends_with('\n')))
321 {
322 return true;
323 }
324
325 if self.current_file.is_some() && !trimmed.is_empty() {
328 if "<<<<<<< SEARCH".starts_with(trimmed) {
329 return true;
330 }
331 if "=======".starts_with(trimmed) {
332 return true;
333 }
334 if ">>>>>>> REPLACE".starts_with(trimmed) {
335 return true;
336 }
337 }
338
339 if self.current_file.is_some()
341 && let Some(idx) = text.find("<<<<<<< SEARCH")
342 {
343 let line_start = text[..idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
344 if line_start == 0 && !text.contains('\n') && !self.last_char_was_newline {
345 } else {
347 let indent = &text[line_start..idx];
348 if indent.chars().all(|c| c.is_whitespace()) && !text.contains(">>>>>>> REPLACE") {
349 return true;
350 }
351 }
352 }
353
354 false
355 }
356
357 fn process_file_chunk(&self, llm_path: &str, chunk: &str) -> (Vec<StreamYieldItem>, usize) {
358 let mut items = Vec::new();
359 let mut cursor = 0;
360 let search_pattern = "<<<<<<< SEARCH";
361 let sep_pattern = "=======";
362 let replace_pattern = ">>>>>>> REPLACE";
363
364 while cursor < chunk.len() {
365 let search_idx = match chunk[cursor..].find(search_pattern) {
366 Some(i) => cursor + i,
367 None => break,
368 };
369
370 let line_start = chunk[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
372 let indent_slice = &chunk[line_start..search_idx];
373
374 if !indent_slice.chars().all(|c| c.is_whitespace()) {
376 items.push(StreamYieldItem::Text(
378 chunk[cursor..search_idx + 1].to_string(),
379 ));
380 cursor = search_idx + 1;
381 continue;
382 }
383
384 let block_search_start = search_idx + search_pattern.len();
385 let block_search_start_content =
386 block_search_start + consume_line_ending(&chunk[block_search_start..]);
387
388 let (sep_line_start, sep_line_end) =
389 match find_marker_with_indent(chunk, sep_pattern, block_search_start, indent_slice)
390 {
391 Some(pair) => pair,
392 None => {
393 let backtrack_pos = line_start.max(cursor);
394 if backtrack_pos > cursor {
395 items.push(StreamYieldItem::Text(
396 chunk[cursor..backtrack_pos].to_string(),
397 ));
398 }
399 return (items, backtrack_pos);
400 }
401 };
402
403 let block_replace_start_content =
404 sep_line_end + consume_line_ending(&chunk[sep_line_end..]);
405
406 let (replace_line_start, _replace_line_end) =
407 match find_marker_with_indent(chunk, replace_pattern, sep_line_end, indent_slice) {
408 Some(pair) => pair,
409 None => {
410 let backtrack_pos = line_start.max(cursor);
411 if backtrack_pos > cursor {
412 items.push(StreamYieldItem::Text(
413 chunk[cursor..backtrack_pos].to_string(),
414 ));
415 }
416 return (items, backtrack_pos);
417 }
418 };
419
420 if search_idx > cursor {
421 items.push(StreamYieldItem::Text(chunk[cursor..search_idx].to_string()));
422 }
423
424 let final_end = replace_line_start + indent_slice.len() + replace_pattern.len();
425
426 let mut search_content = &chunk[block_search_start_content..sep_line_start];
427 if search_content.ends_with('\r') {
428 search_content = &search_content[..search_content.len() - 1];
429 }
430
431 let mut replace_content = &chunk[block_replace_start_content..replace_line_start];
432 if replace_content.ends_with('\r') {
433 replace_content = &replace_content[..replace_content.len() - 1];
434 }
435
436 items.push(StreamYieldItem::Patch(crate::models::AIPatch {
437 llm_file_path: llm_path.to_string(),
438 search_content: search_content.to_string(),
439 replace_content: replace_content.to_string(),
440 indent: indent_slice.to_string(),
441 raw_block: chunk[search_idx..final_end].to_string(),
442 }));
443
444 cursor = final_end;
445 }
446
447 if cursor < chunk.len() {
448 let tail = &chunk[cursor..];
449 let mut tail_limit = tail.len();
450
451 if self.is_incomplete(tail) {
452 if let Some(last_newline) = tail.rfind('\n') {
453 tail_limit = last_newline + 1;
454 } else {
455 tail_limit = 0;
456 }
457 }
458
459 if tail_limit > 0 {
460 items.push(StreamYieldItem::Text(tail[..tail_limit].to_string()));
461 cursor += tail_limit;
462 }
463 }
464
465 (items, cursor)
466 }
467
468 pub fn handle_patch(
469 &mut self,
470 patch: &crate::models::AIPatch,
471 _root: &Path,
472 ) -> (Option<StreamYieldItem>, Vec<String>) {
473 let mut warnings = Vec::new();
474
475 let resolution = self.resolve_path(&patch.llm_file_path, _root, &patch.search_content);
476
477 if let Some(w) = resolution.0 {
478 warnings.push(w.clone());
479 }
480
481 if let Some((path, fallback)) = resolution.1 {
482 if let Some(fb) = fallback {
483 self.overlay
484 .entry(path.clone())
485 .or_insert_with(|| fb.clone());
486 self.discovered_baseline.entry(path.clone()).or_insert(fb);
487 }
488
489 let original = self
490 .overlay
491 .get(&path)
492 .map(|s| s.as_str())
493 .or_else(|| self.baseline.get(&path).map(|s| s.as_str()))
494 .unwrap_or("");
495
496 let mut applied = None;
497
498 if let Some(res) =
500 create_patched_content(original, &patch.search_content, &patch.replace_content)
501 {
502 applied = Some(res);
503 }
504 else if patch.search_content.contains('\r') {
506 let search_normalized = patch.search_content.replace('\r', "");
507 if let Some(res) =
508 create_patched_content(original, &search_normalized, &patch.replace_content)
509 {
510 applied = Some(res);
511 }
512 }
513
514 if let Some(new_content) = applied {
515 let diff = generate_diff(&path, Some(original), Some(&new_content));
516 self.overlay.insert(path.clone(), new_content.clone());
517 (
518 Some(StreamYieldItem::DiffBlock(
519 crate::models::ProcessedDiffBlock {
520 llm_file_path: patch.llm_file_path.clone(),
521 unified_diff: diff,
522 },
523 )),
524 warnings,
525 )
526 } else {
527 warnings.push(format!(
528 "The SEARCH block from the AI could not be found in '{}'. Patch skipped.",
529 path
530 ));
531
532 (
533 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
534 text: patch.raw_block.clone(),
535 })),
536 warnings,
537 )
538 }
539 } else {
540 warnings.push(format!(
541 "File '{}' from the AI does not match any file in context. Patch skipped.",
542 patch.llm_file_path
543 ));
544
545 (
546 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
547 text: patch.raw_block.clone(),
548 })),
549 warnings,
550 )
551 }
552 }
553
554 pub fn finish(&mut self, last_chunk: &str) -> (String, Vec<StreamYieldItem>, Vec<String>) {
555 self.feed(last_chunk);
557
558 let mut items: Vec<_> = self.by_ref().collect();
559
560 if self.is_incomplete(&self.buffer)
562 && self.buffer.contains("<<<<<<< SEARCH")
563 && self.buffer.contains(">>>>>>> REPLACE")
564 {
565 self.buffer.push('\n');
566 items.extend(self.by_ref());
567 }
568
569 if !self.buffer.is_empty() {
571 let looks_like_marker = self.is_incomplete(&self.buffer);
572
573 if looks_like_marker {
574 items.push(StreamYieldItem::Unparsed(UnparsedBlock {
575 text: self.buffer.clone(),
576 }));
577 } else {
578 items.push(StreamYieldItem::Text(self.buffer.clone()));
579 }
580 self.buffer.clear();
581 }
582
583 let diff = self.build_final_unified_diff();
584
585 let warnings = self.collect_warnings(&items);
586
587 (diff, items, warnings)
588 }
589
590 pub fn collect_warnings(&self, items: &[StreamYieldItem]) -> Vec<String> {
591 items
592 .iter()
593 .filter_map(|i| match i {
594 StreamYieldItem::Warning(w) => Some(w.text.clone()),
595 _ => None,
596 })
597 .collect()
598 }
599
600 pub fn process_yields(
602 &mut self,
603 items: Vec<StreamYieldItem>,
604 session_root: &Path,
605 ) -> Vec<StreamYieldItem> {
606 let mut processed = Vec::with_capacity(items.len());
607 for item in items {
608 if let StreamYieldItem::Patch(ref patch) = item {
609 let (resolved, warnings) = self.handle_patch(patch, session_root);
610 for w in warnings {
611 processed.push(StreamYieldItem::Warning(crate::models::WarningMessage {
612 text: w,
613 }));
614 }
615 if let Some(res) = resolved {
616 processed.push(res);
617 }
618 } else {
619 processed.push(item);
620 }
621 }
622 processed
623 }
624
625 pub fn build_final_unified_diff(&self) -> String {
626 let mut diffs = String::new();
627 let keys: std::collections::BTreeSet<_> = self
628 .discovered_baseline
629 .keys()
630 .chain(self.overlay.keys())
631 .collect();
632
633 for k in keys {
634 let old = self
635 .discovered_baseline
636 .get(k)
637 .map(|s| s.as_str())
638 .or_else(|| self.baseline.get(k).map(|s| s.as_str()));
639 let new = self.overlay.get(k).map(|s| s.as_str());
640
641 if old != new {
642 let d = generate_diff(k, old, new);
643 diffs.push_str(&d);
644 }
645 }
646 diffs
647 }
648
649 fn resolve_path(
650 &self,
651 llm_path: &str,
652 root: &Path,
653 search_block: &str,
654 ) -> (Option<String>, Option<(String, Option<String>)>) {
655 if self.overlay.contains_key(llm_path) || self.baseline.contains_key(llm_path) {
656 return (None, Some((llm_path.to_string(), None)));
657 }
658 if search_block.trim().is_empty() {
659 return (None, Some((llm_path.to_string(), None)));
660 }
661 let abs_path = root.join(llm_path);
662 if abs_path.exists()
663 && let Ok(canon) = abs_path.canonicalize()
664 && let Ok(root_canon) = root.canonicalize()
665 && canon.starts_with(root_canon)
666 && let Ok(content) = std::fs::read_to_string(&abs_path)
667 {
668 let msg = format!(
669 "File '{}' was not in the session context but was found on disk.",
670 llm_path
671 );
672 return (Some(msg), Some((llm_path.to_string(), Some(content))));
673 }
674 (None, None)
675 }
676}
677
678fn consume_line_ending(s: &str) -> usize {
679 if s.starts_with("\r\n") {
680 2
681 } else if s.starts_with('\n') {
682 1
683 } else {
684 0
685 }
686}
687
688fn find_marker_with_indent(
689 chunk: &str,
690 marker: &str,
691 start_pos: usize,
692 expected_indent: &str,
693) -> Option<(usize, usize)> {
694 let mut search_pos = start_pos;
695 while let Some(i) = chunk[search_pos..].find(marker) {
696 let found_idx = search_pos + i;
697 let line_start = chunk[..found_idx]
698 .rfind('\n')
699 .map(|idx| idx + 1)
700 .unwrap_or(0);
701 if chunk[line_start..found_idx] == *expected_indent {
702 let after = &chunk[found_idx + marker.len()..];
703 let line_end = after
704 .find('\n')
705 .map(|idx| found_idx + marker.len() + idx)
706 .unwrap_or(chunk.len());
707 if chunk[found_idx + marker.len()..line_end]
709 .chars()
710 .all(|c| c.is_whitespace() && c != '\n')
711 {
712 return Some((line_start, line_end));
713 }
714 }
715 search_pos = found_idx + marker.len();
716 }
717 None
718}