1use crate::diffing::diff_utils::generate_diff;
2use crate::diffing::patching::create_patched_content;
3use crate::models::{StreamYieldItem, UnparsedBlock};
4use regex::Regex;
5use std::collections::HashMap;
6use std::path::Path;
7use std::sync::LazyLock;
8
9pub struct StreamParser<'a> {
10 buffer: String,
11 current_file: Option<String>,
12 yield_queue: std::collections::VecDeque<StreamYieldItem>,
14 baseline: &'a HashMap<String, String>,
16 overlay: HashMap<String, String>,
18 discovered_baseline: HashMap<String, String>,
20}
21
22impl<'a> StreamParser<'a> {
23 pub fn get_pending_content(&self) -> String {
24 self.buffer.clone()
25 }
26
27 pub fn new(original_contents: &'a HashMap<String, String>) -> Self {
28 Self {
29 buffer: String::new(),
30 current_file: None,
31 yield_queue: std::collections::VecDeque::new(),
32 baseline: original_contents,
33 overlay: HashMap::new(),
34 discovered_baseline: HashMap::new(),
35 }
36 }
37
38 pub fn feed(&mut self, chunk: &str) {
41 self.buffer.push_str(chunk);
42 }
43
44 pub fn parse_and_resolve(&mut self, chunk: &str, session_root: &Path) -> Vec<StreamYieldItem> {
46 self.feed(chunk);
47 let raw_yields: Vec<_> = self.by_ref().collect();
48 self.process_yields(raw_yields, session_root)
49 }
50
51 pub fn final_resolve(
54 &mut self,
55 session_root: &Path,
56 ) -> (String, Vec<crate::models::DisplayItem>, Vec<String>) {
57 let (_, raw_yields, _) = self.finish("");
59
60 let processed = self.process_yields(raw_yields, session_root);
62
63 let warnings = self.collect_warnings(&processed);
65 let diff = self.build_final_unified_diff();
66 let display_items = processed
67 .into_iter()
68 .filter_map(|y| y.to_display_item(true))
69 .collect();
70
71 (diff, display_items, warnings)
72 }
73}
74
75static FILE_HEADER_RE: LazyLock<Regex> =
76 LazyLock::new(|| Regex::new(r"(?m)^(?P<line>[ \t]*File:[ \t]*(?P<path>.*?)\r?\n)").unwrap());
77
78impl<'a> Iterator for StreamParser<'a> {
79 type Item = StreamYieldItem;
80
81 fn next(&mut self) -> Option<Self::Item> {
82 loop {
83 if let Some(item) = self.yield_queue.pop_front() {
85 return Some(item);
86 }
87
88 if self.buffer.is_empty() {
89 return None;
90 }
91
92 if let Some(llm_file_path) = self.current_file.clone() {
94 let next_header_idx = FILE_HEADER_RE
95 .find(&self.buffer)
96 .map(|m| m.start())
97 .unwrap_or(self.buffer.len());
98
99 if next_header_idx > 0 {
100 let (chunk_items, consumed_bytes) =
101 self.process_file_chunk(&llm_file_path, &self.buffer[..next_header_idx]);
102 self.buffer.drain(..consumed_bytes);
103
104 if !chunk_items.is_empty() {
105 self.yield_queue.extend(chunk_items);
106 continue;
107 }
108
109 if consumed_bytes > 0 {
110 continue;
111 }
112
113 if next_header_idx == self.buffer.len() {
115 return None;
116 }
117 }
118
119 if next_header_idx < self.buffer.len() {
120 self.current_file = None;
121 continue;
122 }
123 }
124
125 if let Some(caps) = FILE_HEADER_RE.captures(&self.buffer) {
127 let mat = caps.get(0).unwrap();
128 if mat.start() > 0 {
129 let text = self.buffer[..mat.start()].to_string();
130 self.buffer.drain(..mat.start());
131 return Some(StreamYieldItem::Text(text));
132 }
133
134 let path_str = caps
135 .name("path")
136 .unwrap()
137 .as_str()
138 .trim()
139 .trim_matches(|c| c == '*' || c == '`')
140 .to_string();
141 self.current_file = Some(path_str.clone());
142 self.buffer.drain(..mat.end());
143 return Some(StreamYieldItem::FileHeader(crate::models::FileHeader {
144 llm_file_path: path_str,
145 }));
146 }
147
148 let text = &self.buffer;
150 let mut stable_len = text.len();
151
152 if self.is_incomplete(text) {
153 if let Some(m) = FILE_HEADER_RE.find(text) {
154 stable_len = m.start();
155 } else if let Some(search_idx) = text.find("<<<<<<< SEARCH") {
156 stable_len = text[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
157 } else if let Some(last_newline) = text.rfind('\n') {
158 let last_line = &text[last_newline + 1..];
159 if self.is_incomplete(last_line) {
160 stable_len = last_newline + 1;
161 }
162 } else {
163 stable_len = 0;
164 }
165 }
166
167 if stable_len > 0 {
168 let text_yield = self.buffer[..stable_len].to_string();
169 self.buffer.drain(..stable_len);
170 return Some(StreamYieldItem::Text(text_yield));
171 }
172
173 return None;
174 }
175 }
176}
177
178impl<'a> StreamParser<'a> {
179 fn is_incomplete(&self, text: &str) -> bool {
180 if let Some(idx) = text.find("<<<<<<< SEARCH") {
182 let line_start = text[..idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
183 let indent = &text[line_start..idx];
184 if indent.chars().all(|c| c.is_whitespace()) && !text.contains(">>>>>>> REPLACE") {
185 return true;
186 }
187 }
188
189 if let Some(last_line) = text.split('\n').next_back() {
191 let trimmed = last_line.trim_start();
192 if !trimmed.is_empty() {
193 if "File:".starts_with(trimmed) && trimmed.len() < "File:".len() {
196 return true;
197 }
198 if trimmed.starts_with("File:") && !text.ends_with('\n') {
199 return true;
200 }
201
202 for marker in ["<<<<<<< SEARCH", "=======", ">>>>>>> REPLACE"] {
204 if marker.starts_with(trimmed) && marker.len() > trimmed.len() {
205 return true;
206 }
207 }
208 }
209 }
210 false
211 }
212
213 fn process_file_chunk(&self, llm_path: &str, chunk: &str) -> (Vec<StreamYieldItem>, usize) {
214 let mut items = Vec::new();
215 let mut cursor = 0;
216 let search_pattern = "<<<<<<< SEARCH";
217 let sep_pattern = "=======";
218 let replace_pattern = ">>>>>>> REPLACE";
219
220 while cursor < chunk.len() {
221 let search_idx = match chunk[cursor..].find(search_pattern) {
222 Some(i) => cursor + i,
223 None => break,
224 };
225
226 let line_start = chunk[..search_idx].rfind('\n').map(|i| i + 1).unwrap_or(0);
228 let indent_slice = &chunk[line_start..search_idx];
229
230 if !indent_slice.chars().all(|c| c.is_whitespace()) {
232 items.push(StreamYieldItem::Text(
234 chunk[cursor..search_idx + 1].to_string(),
235 ));
236 cursor = search_idx + 1;
237 continue;
238 }
239
240 let block_search_start = search_idx + search_pattern.len();
241 let block_search_start_content =
242 block_search_start + consume_line_ending(&chunk[block_search_start..]);
243
244 let (sep_line_start, sep_line_end) =
245 match find_marker_with_indent(chunk, sep_pattern, block_search_start, indent_slice)
246 {
247 Some(pair) => pair,
248 None => {
249 if search_idx > cursor {
250 items
251 .push(StreamYieldItem::Text(chunk[cursor..search_idx].to_string()));
252 }
253 return (items, search_idx);
254 }
255 };
256
257 let block_replace_start_content =
258 sep_line_end + consume_line_ending(&chunk[sep_line_end..]);
259
260 let (replace_line_start, _replace_line_end) =
261 match find_marker_with_indent(chunk, replace_pattern, sep_line_end, indent_slice) {
262 Some(pair) => pair,
263 None => {
264 if search_idx > cursor {
265 items
266 .push(StreamYieldItem::Text(chunk[cursor..search_idx].to_string()));
267 }
268 return (items, search_idx);
269 }
270 };
271
272 if search_idx > cursor {
273 items.push(StreamYieldItem::Text(chunk[cursor..search_idx].to_string()));
274 }
275
276 let final_end = replace_line_start + indent_slice.len() + replace_pattern.len();
277
278 let search_content = &chunk[block_search_start_content..sep_line_start];
279 let replace_content = &chunk[block_replace_start_content..replace_line_start];
280
281 items.push(StreamYieldItem::Patch(crate::models::AIPatch {
282 llm_file_path: llm_path.to_string(),
283 search_content: search_content.to_string(),
284 replace_content: replace_content.to_string(),
285 indent: indent_slice.to_string(),
286 raw_block: chunk[search_idx..final_end].to_string(),
287 }));
288
289 cursor = final_end;
290 }
291
292 if cursor < chunk.len() {
293 let tail = &chunk[cursor..];
294 if !self.is_incomplete(tail) {
295 items.push(StreamYieldItem::Text(tail.to_string()));
296 cursor = chunk.len();
297 }
298 }
299
300 (items, cursor)
301 }
302
303 pub fn handle_patch(
304 &mut self,
305 patch: &crate::models::AIPatch,
306 _root: &Path,
307 ) -> (Option<StreamYieldItem>, Vec<String>) {
308 let mut warnings = Vec::new();
309
310 let resolution = self.resolve_path(&patch.llm_file_path, _root, &patch.search_content);
311
312 if let Some(w) = resolution.0 {
313 warnings.push(w.clone());
314 }
315
316 if let Some((path, fallback)) = resolution.1 {
317 if let Some(fb) = fallback {
318 self.overlay
319 .entry(path.clone())
320 .or_insert_with(|| fb.clone());
321 self.discovered_baseline.entry(path.clone()).or_insert(fb);
322 }
323
324 let original = self
325 .overlay
326 .get(&path)
327 .map(|s| s.as_str())
328 .or_else(|| self.baseline.get(&path).map(|s| s.as_str()))
329 .unwrap_or("");
330
331 if let Some(new_content) =
332 create_patched_content(original, &patch.search_content, &patch.replace_content)
333 {
334 let diff = generate_diff(&path, Some(original), Some(&new_content));
335 self.overlay.insert(path.clone(), new_content.clone());
336 (
337 Some(StreamYieldItem::DiffBlock(
338 crate::models::ProcessedDiffBlock {
339 llm_file_path: patch.llm_file_path.clone(),
340 unified_diff: diff,
341 },
342 )),
343 warnings,
344 )
345 } else {
346 warnings.push(format!(
347 "The SEARCH block from the AI could not be found in '{}'. Patch skipped.",
348 path
349 ));
350
351 (
352 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
353 text: patch.raw_block.clone(),
354 })),
355 warnings,
356 )
357 }
358 } else {
359 warnings.push(format!(
360 "File '{}' from the AI does not match any file in context. Patch skipped.",
361 patch.llm_file_path
362 ));
363
364 (
365 Some(StreamYieldItem::Unparsed(crate::models::UnparsedBlock {
366 text: patch.raw_block.clone(),
367 })),
368 warnings,
369 )
370 }
371 }
372
373 pub fn finish(&mut self, last_chunk: &str) -> (String, Vec<StreamYieldItem>, Vec<String>) {
374 self.feed(last_chunk);
376
377 if self.is_incomplete(&self.buffer)
379 && self.buffer.contains("<<<<<<< SEARCH")
380 && self.buffer.contains(">>>>>>> REPLACE")
381 {
382 self.buffer.push('\n');
383 }
384
385 let mut items: Vec<_> = self.by_ref().collect();
386
387 if !self.buffer.is_empty() {
389 let looks_like_marker = self.is_incomplete(&self.buffer);
390
391 if looks_like_marker {
392 items.push(StreamYieldItem::Unparsed(UnparsedBlock {
393 text: self.buffer.clone(),
394 }));
395 } else {
396 items.push(StreamYieldItem::Text(self.buffer.clone()));
397 }
398 self.buffer.clear();
399 }
400
401 let diff = self.build_final_unified_diff();
402
403 let warnings = self.collect_warnings(&items);
404
405 (diff, items, warnings)
406 }
407
408 pub fn collect_warnings(&self, items: &[StreamYieldItem]) -> Vec<String> {
409 items
410 .iter()
411 .filter_map(|i| match i {
412 StreamYieldItem::Warning(w) => Some(w.text.clone()),
413 _ => None,
414 })
415 .collect()
416 }
417
418 pub fn process_yields(
420 &mut self,
421 items: Vec<StreamYieldItem>,
422 session_root: &Path,
423 ) -> Vec<StreamYieldItem> {
424 let mut processed = Vec::with_capacity(items.len());
425 for item in items {
426 if let StreamYieldItem::Patch(ref patch) = item {
427 let (resolved, warnings) = self.handle_patch(patch, session_root);
428 for w in warnings {
429 processed.push(StreamYieldItem::Warning(crate::models::WarningMessage {
430 text: w,
431 }));
432 }
433 if let Some(res) = resolved {
434 processed.push(res);
435 }
436 } else {
437 processed.push(item);
438 }
439 }
440 processed
441 }
442
443 pub fn build_final_unified_diff(&self) -> String {
444 let mut diffs = String::new();
445 let keys: std::collections::BTreeSet<_> = self
446 .discovered_baseline
447 .keys()
448 .chain(self.overlay.keys())
449 .collect();
450
451 for k in keys {
452 let old = self
453 .discovered_baseline
454 .get(k)
455 .map(|s| s.as_str())
456 .or_else(|| self.baseline.get(k).map(|s| s.as_str()));
457 let new = self.overlay.get(k).map(|s| s.as_str());
458
459 if old != new {
460 let d = generate_diff(k, old, new);
461 diffs.push_str(&d);
462 }
463 }
464 diffs
465 }
466
467 fn resolve_path(
468 &self,
469 llm_path: &str,
470 root: &Path,
471 search_block: &str,
472 ) -> (Option<String>, Option<(String, Option<String>)>) {
473 if self.overlay.contains_key(llm_path) || self.baseline.contains_key(llm_path) {
474 return (None, Some((llm_path.to_string(), None)));
475 }
476 if search_block.trim().is_empty() {
477 return (None, Some((llm_path.to_string(), None)));
478 }
479 let abs_path = root.join(llm_path);
480 if abs_path.exists()
481 && let Ok(canon) = abs_path.canonicalize()
482 && let Ok(root_canon) = root.canonicalize()
483 && canon.starts_with(root_canon)
484 && let Ok(content) = std::fs::read_to_string(&abs_path)
485 {
486 let msg = format!(
487 "File '{}' was not in the session context but was found on disk.",
488 llm_path
489 );
490 return (Some(msg), Some((llm_path.to_string(), Some(content))));
491 }
492 (None, None)
493 }
494}
495
496fn consume_line_ending(s: &str) -> usize {
497 if s.starts_with("\r\n") {
498 2
499 } else if s.starts_with('\n') {
500 1
501 } else {
502 0
503 }
504}
505
506fn find_marker_with_indent(
507 chunk: &str,
508 marker: &str,
509 start_pos: usize,
510 expected_indent: &str,
511) -> Option<(usize, usize)> {
512 let mut search_pos = start_pos;
513 while let Some(i) = chunk[search_pos..].find(marker) {
514 let found_idx = search_pos + i;
515 let line_start = chunk[..found_idx]
516 .rfind('\n')
517 .map(|idx| idx + 1)
518 .unwrap_or(0);
519 if chunk[line_start..found_idx] == *expected_indent {
520 let after = &chunk[found_idx + marker.len()..];
521 let line_end = after
522 .find('\n')
523 .map(|idx| found_idx + marker.len() + idx)
524 .unwrap_or(chunk.len());
525 if chunk[found_idx + marker.len()..line_end]
527 .chars()
528 .all(|c| c.is_whitespace() && c != '\n')
529 {
530 return Some((line_start, line_end));
531 }
532 }
533 search_pos = found_idx + marker.len();
534 }
535 None
536}