1use anyhow::{Context, Result};
2use ropey::{Rope, RopeSlice};
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use super::history::{Action, History};
7use super::EncodingConfig;
8use crate::debug_log;
9
10pub struct RopeBuffer {
11 rope: Rope,
12 file_path: Option<PathBuf>,
13 modified: bool,
14 history: History,
15 in_undo_redo: bool, read_encoding: &'static encoding_rs::Encoding, save_encoding: &'static encoding_rs::Encoding, }
19
20impl RopeBuffer {
21 pub fn new() -> Self {
22 let system_enc = Self::get_system_ansi_encoding();
25
26 if cfg!(debug_assertions) {
28 eprintln!("[DEBUG] RopeBuffer::new()");
29 eprintln!("[DEBUG] System default encoding: {}", system_enc.name());
30 }
31
32 Self {
33 rope: Rope::new(),
34 file_path: None,
35 modified: false,
36 history: History::default(),
37 in_undo_redo: false,
38 read_encoding: system_enc,
39 save_encoding: system_enc,
40 }
41 }
42
43 pub fn get_system_ansi_encoding() -> &'static encoding_rs::Encoding {
45 #[cfg(target_os = "windows")]
51 {
52 use winapi::um::consoleapi::{GetConsoleCP, GetConsoleOutputCP};
53 use winapi::um::winnls::GetACP;
54
55 let console_input_cp = unsafe { GetConsoleCP() };
57 let console_output_cp = unsafe { GetConsoleOutputCP() };
58 let system_acp = unsafe { GetACP() };
59
60 if cfg!(debug_assertions) {
61 eprintln!("[DEBUG] Detecting system encoding on Windows:");
62 eprintln!(
63 "[DEBUG] Console Input CP (GetConsoleCP): {}",
64 console_input_cp
65 );
66 eprintln!(
67 "[DEBUG] Console Output CP (GetConsoleOutputCP): {}",
68 console_output_cp
69 );
70 eprintln!("[DEBUG] System ANSI CP (GetACP): {}", system_acp);
71 }
72
73 let cp = if console_output_cp != 0 {
75 if cfg!(debug_assertions) {
76 eprintln!("[DEBUG] Using Console Output CP: {}", console_output_cp);
77 }
78 console_output_cp
79 } else {
80 if cfg!(debug_assertions) {
81 eprintln!(
82 "[DEBUG] Console CP is 0, using System ANSI CP: {}",
83 system_acp
84 );
85 }
86 system_acp
87 };
88
89 let encoding = match cp {
90 65001 => {
91 if cfg!(debug_assertions) {
92 eprintln!("[DEBUG] Using UTF-8 (CP 65001)");
93 }
94 encoding_rs::UTF_8
95 }
96 936 => {
97 if cfg!(debug_assertions) {
98 eprintln!("[DEBUG] Using GBK (CP 936)");
99 }
100 encoding_rs::GBK
101 }
102 950 => {
103 if cfg!(debug_assertions) {
105 eprintln!("[DEBUG] Using Big5 (CP 950)");
106 }
107 if let Some(enc) = encoding_rs::Encoding::for_label(b"big5") {
108 enc
109 } else {
110 encoding_rs::UTF_8
111 }
112 }
113 932 => {
114 if cfg!(debug_assertions) {
115 eprintln!("[DEBUG] Using Shift_JIS (CP 932)");
116 }
117 encoding_rs::SHIFT_JIS
118 }
119 949 => {
120 if cfg!(debug_assertions) {
122 eprintln!("[DEBUG] Using EUC-KR (CP 949)");
123 }
124 if let Some(enc) = encoding_rs::Encoding::for_label(b"euc-kr") {
125 enc
126 } else {
127 encoding_rs::UTF_8
128 }
129 }
130 1252 => {
131 if cfg!(debug_assertions) {
132 eprintln!("[DEBUG] Using Windows-1252 (CP 1252)");
133 }
134 encoding_rs::WINDOWS_1252
135 }
136 _ => {
137 if cfg!(debug_assertions) {
138 eprintln!("[DEBUG] Unknown code page, using UTF-8 as fallback");
139 }
140 encoding_rs::UTF_8
141 }
142 };
143
144 encoding
145 }
146
147 #[cfg(not(target_os = "windows"))]
148 {
149 use std::env;
150
151 let locale_vars = ["LC_ALL", "LC_CTYPE", "LANG"];
154
155 if cfg!(debug_assertions) {
156 eprintln!("[DEBUG] Detecting system encoding on Unix-like system:");
157 }
158
159 for var in &locale_vars {
160 if let Ok(locale) = env::var(var) {
161 if cfg!(debug_assertions) {
162 eprintln!("[DEBUG] {} = {}", var, locale);
163 }
164
165 if let Some(charset_start) = locale.find('.') {
168 let charset = &locale[charset_start + 1..];
169
170 if cfg!(debug_assertions) {
171 eprintln!("[DEBUG] Detected charset: {}", charset);
172 }
173
174 match charset.to_uppercase().as_str() {
175 "UTF-8" => {
176 if cfg!(debug_assertions) {
177 eprintln!("[DEBUG] Using UTF-8");
178 }
179 return encoding_rs::UTF_8;
180 }
181 "GBK" | "GB2312" | "GB18030" => {
182 if cfg!(debug_assertions) {
183 eprintln!("[DEBUG] Using GBK");
184 }
185 return encoding_rs::GBK;
186 }
187 "BIG5" => {
188 if let Some(enc) = encoding_rs::Encoding::for_label(b"big5") {
189 if cfg!(debug_assertions) {
190 eprintln!("[DEBUG] Using Big5");
191 }
192 return enc;
193 }
194 }
195 "SHIFT_JIS" | "SJIS" => {
196 if cfg!(debug_assertions) {
197 eprintln!("[DEBUG] Using Shift_JIS");
198 }
199 return encoding_rs::SHIFT_JIS;
200 }
201 "EUC-KR" => {
202 if let Some(enc) = encoding_rs::Encoding::for_label(b"euc-kr") {
203 if cfg!(debug_assertions) {
204 eprintln!("[DEBUG] Using EUC-KR");
205 }
206 return enc;
207 }
208 }
209 _ => {
210 if cfg!(debug_assertions) {
211 eprintln!("[DEBUG] Unknown charset, continuing...");
212 }
213 } }
215 }
216 }
217 }
218
219 if cfg!(debug_assertions) {
221 eprintln!("[DEBUG] No valid locale found, using UTF-8 as fallback");
222 }
223 encoding_rs::UTF_8
224 }
225 }
226
227 fn detect_unicode(bytes: &[u8]) -> Option<(&'static encoding_rs::Encoding, usize)> {
229 if bytes.len() >= 3 && bytes[0..3] == [0xEF, 0xBB, 0xBF] {
230 Some((encoding_rs::UTF_8, 3))
232 } else if bytes.len() >= 2 && bytes[0..2] == [0xFF, 0xFE] {
233 Some((encoding_rs::UTF_16LE, 2))
235 } else if bytes.len() >= 2 && bytes[0..2] == [0xFE, 0xFF] {
236 Some((encoding_rs::UTF_16BE, 2))
238 } else {
239 let (_, _, had_errors) = encoding_rs::UTF_8.decode(bytes);
241 if !had_errors {
242 Some((encoding_rs::UTF_8, 0))
244 } else {
245 None
246 }
247 }
248 }
249
250 pub fn from_file_with_encoding(path: &Path, encoding_config: &EncodingConfig) -> Result<Self> {
259 let (rope, detected_encoding, modified) = if path.exists() {
261 let bytes = fs::read(path)
262 .with_context(|| format!("Failed to read file: {}", path.display()))?;
263
264 let (read_encoding, bom_length, detected_encoding_info) =
267 if let Some((bom_encoding, bom_len)) = Self::detect_unicode(&bytes) {
268 let detected_info = if bom_len > 0 {
270 format!("BOM detected: {}", bom_encoding.name())
271 } else {
272 "UTF-8 detected (no BOM)".to_string()
273 };
274 (bom_encoding, bom_len, Some((detected_info, bom_encoding)))
275 } else if let Some(specified_enc) = encoding_config.read_encoding {
276 (specified_enc, 0, None)
278 } else {
279 let system_enc = Self::get_system_ansi_encoding();
281 (system_enc, 0, None)
282 };
283
284 debug_log!(" File: {}", path.display());
287 if let Some((detected_info, detected_enc)) = &detected_encoding_info {
288 debug_log!(" Detected: {}", detected_info);
289 if let Some(specified_enc) = encoding_config.read_encoding {
290 if detected_enc.name() != specified_enc.name() {
291 debug_log!(" User specified: {} (bypassed)", specified_enc.name());
292 }
293 }
294 } else if let Some(specified_enc) = encoding_config.read_encoding {
295 debug_log!(" User specified: {}", specified_enc.name());
296 } else {
297 debug_log!(" System default: {}", read_encoding.name());
298 }
299 debug_log!(" Using decoding: {}", read_encoding.name());
300 let (decoded, _, had_errors) = read_encoding.decode(&bytes[bom_length..]);
304 if had_errors {
305 eprintln!(
306 "[WARN] Encoding errors detected in file: {}",
307 path.display()
308 );
309 }
310
311 (Rope::from_str(&decoded), read_encoding, false)
312 } else {
313 let encoding_to_use = encoding_config
316 .read_encoding
317 .unwrap_or_else(|| Self::get_system_ansi_encoding());
318
319 if cfg!(debug_assertions) {
320 eprintln!("[DEBUG] File does not exist, creating new buffer");
321 if encoding_config.read_encoding.is_some() {
322 eprintln!(
323 "[DEBUG] Using user-specified encoding: {}",
324 encoding_to_use.name()
325 );
326 } else {
327 eprintln!(
328 "[DEBUG] Using system default encoding: {}",
329 encoding_to_use.name()
330 );
331 }
332 }
333
334 (Rope::new(), encoding_to_use, true)
335 };
336
337 let save_encoding = encoding_config
339 .save_encoding
340 .or(encoding_config.read_encoding)
341 .unwrap_or(detected_encoding);
342
343 debug_log!(" Using encoding: {}", save_encoding.name());
346 Ok(Self {
349 rope,
350 file_path: Some(path.to_path_buf()),
351 modified,
352 history: History::default(),
353 in_undo_redo: false,
354 read_encoding: detected_encoding,
355 save_encoding,
356 })
357 }
358
359 pub fn insert_char(&mut self, pos: usize, ch: char) {
360 let pos = pos.min(self.rope.len_chars());
361
362 if !self.in_undo_redo {
364 self.history.push(Action::Insert {
365 pos,
366 text: ch.to_string(),
367 });
368 }
369
370 self.rope.insert_char(pos, ch);
371 self.modified = true;
372 }
373
374 pub fn insert(&mut self, pos: usize, text: &str) {
375 let pos = pos.min(self.rope.len_chars());
376
377 if !self.in_undo_redo {
379 self.history.push(Action::Insert {
380 pos,
381 text: text.to_string(),
382 });
383 }
384
385 self.rope.insert(pos, text);
386 self.modified = true;
387 }
388
389 pub fn delete_char(&mut self, pos: usize) {
390 if pos < self.rope.len_chars() {
391 let deleted_char = self.rope.char(pos).to_string();
393
394 if !self.in_undo_redo {
396 self.history.push(Action::Delete {
397 pos,
398 text: deleted_char,
399 });
400 }
401
402 self.rope.remove(pos..pos + 1);
403 self.modified = true;
404 }
405 }
406
407 pub fn delete_range(&mut self, start: usize, end: usize) {
408 if start < end && start < self.rope.len_chars() {
409 let end = end.min(self.rope.len_chars());
410
411 let deleted_text = self.rope.slice(start..end).to_string();
413
414 if !self.in_undo_redo {
416 self.history.push(Action::DeleteRange {
417 start,
418 end,
419 text: deleted_text,
420 });
421 }
422
423 self.rope.remove(start..end);
424 self.modified = true;
425 }
426 }
427
428 pub fn delete_line(&mut self, row: usize) {
429 if row < self.line_count() {
430 let start = self.rope.line_to_char(row);
431 let end = if row + 1 < self.line_count() {
432 self.rope.line_to_char(row + 1)
433 } else {
434 self.rope.len_chars()
435 };
436
437 let deleted_line = self.rope.slice(start..end).to_string();
439
440 if !self.in_undo_redo {
442 self.history.push(Action::DeleteRange {
443 start,
444 end,
445 text: deleted_line,
446 });
447 }
448
449 self.rope.remove(start..end);
450 self.modified = true;
451 }
452 }
453
454 pub fn line_count(&self) -> usize {
455 self.rope.len_lines()
456 }
457
458 pub fn line(&self, idx: usize) -> Option<RopeSlice<'_>> {
459 if idx < self.line_count() {
460 Some(self.rope.line(idx))
461 } else {
462 None
463 }
464 }
465
466 pub fn line_to_char(&self, line_idx: usize) -> usize {
467 self.rope.line_to_char(line_idx.min(self.line_count()))
468 }
469
470 pub fn char_to_line(&self, char_idx: usize) -> usize {
471 self.rope.char_to_line(char_idx.min(self.rope.len_chars()))
472 }
473
474 pub fn save(&mut self) -> Result<()> {
475 if let Some(path) = &self.file_path.clone() {
476 if cfg!(debug_assertions) {
477 eprintln!("[DEBUG] Saving file: {}", path.display());
478 eprintln!("[DEBUG] save_encoding: {}", self.save_encoding.name());
479 }
480
481 let contents = self.rope.to_string();
482 let (encoded, _, had_errors) = self.save_encoding.encode(&contents);
484 if had_errors {
485 eprintln!(
486 "[WARN] Encoding errors occurred while saving file: {}",
487 path.display()
488 );
489 }
490 std::fs::write(path, encoded)?;
491 self.modified = false;
492
493 if cfg!(debug_assertions) {
494 eprintln!(
495 "[DEBUG] File saved successfully with {} encoding",
496 self.save_encoding.name()
497 );
498 }
499
500 Ok(())
501 } else {
502 anyhow::bail!("No file path set")
503 }
504 }
505
506 #[allow(dead_code)]
507 pub fn save_to(&mut self, path: &Path) -> Result<()> {
508 let contents = self.rope.to_string();
509 let (encoded, _, had_errors) = self.save_encoding.encode(&contents);
511 if had_errors {
512 eprintln!(
513 "[WARN] Encoding errors occurred while saving file: {}",
514 path.display()
515 );
516 }
517 std::fs::write(path, encoded)?;
518 self.modified = false;
519 self.file_path = Some(path.to_path_buf());
520 Ok(())
521 }
522
523 #[allow(dead_code)]
524 pub fn save_as(&mut self, path: &Path) -> Result<()> {
525 let contents = self.rope.to_string();
526 let (encoded, _, had_errors) = self.save_encoding.encode(&contents);
528 if had_errors {
529 eprintln!(
530 "[WARN] Encoding errors occurred while saving file: {}",
531 path.display()
532 );
533 }
534 fs::write(path, encoded)
535 .with_context(|| format!("Failed to write file: {}", path.display()))?;
536 self.file_path = Some(path.to_path_buf());
537 self.modified = false;
538 Ok(())
539 }
540
541 pub fn is_modified(&self) -> bool {
542 self.modified
543 }
544
545 #[allow(dead_code)]
546 pub fn file_path(&self) -> Option<&Path> {
547 self.file_path.as_deref()
548 }
549
550 pub fn file_name(&self) -> String {
551 self.file_path
552 .as_ref()
553 .and_then(|p| p.file_name())
554 .and_then(|n| n.to_str())
555 .unwrap_or("[No Name]")
556 .to_string()
557 }
558
559 pub fn len_chars(&self) -> usize {
560 self.rope.len_chars()
561 }
562
563 pub fn get_line_content(&self, line_idx: usize) -> String {
564 if let Some(line) = self.line(line_idx) {
565 line.to_string()
566 } else {
567 String::new()
568 }
569 }
570
571 pub fn get_line_full(&self, line_idx: usize) -> String {
573 let line_start = self.line_to_char(line_idx);
574 let line_end = if line_idx + 1 < self.line_count() {
575 self.line_to_char(line_idx + 1)
576 } else {
577 self.rope.len_chars()
578 };
579 self.rope.slice(line_start..line_end).to_string()
580 }
581
582 pub fn undo(&mut self) -> Option<usize> {
584 if let Some(action) = self.history.undo() {
585 self.in_undo_redo = true;
586
587 let result_pos = match action {
588 Action::Insert { pos, text } => {
589 let char_count = text.chars().count();
591 self.rope.remove(pos..pos + char_count);
592 self.modified = true;
593 Some(pos)
594 }
595 Action::Delete { pos, text } => {
596 self.rope.insert(pos, &text);
598 self.modified = true;
599 Some(pos)
600 }
601 Action::DeleteRange { start, text, .. } => {
602 self.rope.insert(start, &text);
604 self.modified = true;
605 Some(start)
606 }
607 };
608
609 self.in_undo_redo = false;
610 result_pos
611 } else {
612 None
613 }
614 }
615
616 pub fn redo(&mut self) -> Option<usize> {
617 if let Some(action) = self.history.redo() {
618 self.in_undo_redo = true;
619
620 let result_pos = match action {
621 Action::Insert { pos, text } => {
622 self.rope.insert(pos, &text);
624 self.modified = true;
625 Some(pos + text.chars().count())
626 }
627 Action::Delete { pos, text } => {
628 let char_count = text.chars().count();
630 self.rope.remove(pos..pos + char_count);
631 self.modified = true;
632 Some(pos)
633 }
634 Action::DeleteRange { start, end, .. } => {
635 self.rope.remove(start..end);
637 self.modified = true;
638 Some(start)
639 }
640 };
641
642 self.in_undo_redo = false;
643 result_pos
644 } else {
645 None
646 }
647 }
648
649 #[allow(dead_code)]
650 pub fn can_undo(&self) -> bool {
651 self.history.can_undo()
652 }
653
654 #[allow(dead_code)]
655 pub fn can_redo(&self) -> bool {
656 self.history.can_redo()
657 }
658
659 pub fn set_read_encoding(&mut self, encoding: &'static encoding_rs::Encoding) {
661 self.read_encoding = encoding;
662 }
663
664 pub fn set_save_encoding(&mut self, encoding: &'static encoding_rs::Encoding) {
666 self.save_encoding = encoding;
667 self.modified = true;
669 }
670
671 #[allow(dead_code)]
673 pub fn save_encoding(&self) -> &'static encoding_rs::Encoding {
674 self.save_encoding
675 }
676
677 pub fn reload_with_encoding(&mut self, encoding: &'static encoding_rs::Encoding) -> Result<()> {
679 if let Some(path) = &self.file_path.clone() {
680 let encoding_config = EncodingConfig {
681 read_encoding: Some(encoding),
682 save_encoding: Some(encoding),
683 };
684 let new_buffer = Self::from_file_with_encoding(path, &encoding_config)?;
685
686 self.rope = new_buffer.rope;
688 self.read_encoding = new_buffer.read_encoding;
689 self.save_encoding = new_buffer.save_encoding;
690 self.modified = false;
691 self.history.clear(); Ok(())
694 } else {
695 anyhow::bail!("No file to reload")
696 }
697 }
698
699 pub fn change_encoding(&mut self, encoding: &'static encoding_rs::Encoding) {
701 self.read_encoding = encoding;
702 self.save_encoding = encoding;
703 }
705
706 pub fn has_file_path(&self) -> bool {
708 self.file_path.is_some()
709 }
710}
711
712#[cfg(test)]
713mod tests {
714 use super::*;
715 use std::fs;
716 use tempfile::TempDir;
717
718 #[test]
719 fn test_utf8_file_detection() {
720 let temp_dir = TempDir::new().unwrap();
721 let file_path = temp_dir.path().join("test_utf8.txt");
722
723 fs::write(&file_path, "Hello, 世界!").unwrap();
725
726 let buffer = RopeBuffer::from_file_with_encoding(
727 &file_path,
728 &EncodingConfig {
729 read_encoding: None,
730 save_encoding: None,
731 },
732 )
733 .unwrap();
734 assert_eq!(buffer.save_encoding().name(), "UTF-8");
735 }
736
737 #[test]
738 fn test_utf8_bom_detection() {
739 let temp_dir = TempDir::new().unwrap();
740 let file_path = temp_dir.path().join("test_utf8_bom.txt");
741
742 let mut content = vec![0xEF, 0xBB, 0xBF];
744 content.extend_from_slice("Hello, 世界!".as_bytes());
745 fs::write(&file_path, content).unwrap();
746
747 let buffer = RopeBuffer::from_file_with_encoding(
748 &file_path,
749 &EncodingConfig {
750 read_encoding: None,
751 save_encoding: None,
752 },
753 )
754 .unwrap();
755 assert_eq!(buffer.save_encoding().name(), "UTF-8");
756 }
757
758 #[test]
759 fn test_utf16le_bom_detection() {
760 let temp_dir = TempDir::new().unwrap();
761 let file_path = temp_dir.path().join("test_utf16le.txt");
762
763 let mut content = vec![0xFF, 0xFE]; let utf16_bytes: Vec<u8> = "Hello"
766 .encode_utf16()
767 .flat_map(|c| c.to_le_bytes())
768 .collect();
769 content.extend_from_slice(&utf16_bytes);
770 fs::write(&file_path, content).unwrap();
771
772 let buffer = RopeBuffer::from_file_with_encoding(
773 &file_path,
774 &EncodingConfig {
775 read_encoding: None,
776 save_encoding: None,
777 },
778 )
779 .unwrap();
780 assert_eq!(buffer.save_encoding().name(), "UTF-16LE");
781 }
782
783 #[test]
784 fn test_gbk_encoding_save() {
785 let temp_dir = TempDir::new().unwrap();
786 let file_path = temp_dir.path().join("test_gbk.txt");
787
788 let mut buffer = RopeBuffer::new();
790 buffer.set_save_encoding(encoding_rs::GBK);
791 buffer.insert(0, "Hello, 世界!");
792
793 buffer.save_to(&file_path).unwrap();
795
796 let saved_bytes = fs::read(&file_path).unwrap();
798 let (decoded, _, _) = encoding_rs::GBK.decode(&saved_bytes);
799 assert_eq!(decoded, "Hello, 世界!");
800 }
801
802 #[test]
803 fn test_encoding_override() {
804 let temp_dir = TempDir::new().unwrap();
805 let file_path = temp_dir.path().join("test_override.txt");
806
807 fs::write(&file_path, "Hello, 世界!").unwrap();
809
810 let mut buffer = RopeBuffer::from_file_with_encoding(
812 &file_path,
813 &EncodingConfig {
814 read_encoding: Some(encoding_rs::GBK),
815 save_encoding: None,
816 },
817 )
818 .unwrap();
819 buffer.set_save_encoding(encoding_rs::GBK);
820
821 buffer.save_to(&file_path).unwrap();
823
824 let saved_bytes = fs::read(&file_path).unwrap();
825 let (decoded, _, _) = encoding_rs::GBK.decode(&saved_bytes);
826 assert_eq!(decoded, "Hello, 世界!");
827 }
828
829 #[test]
830 fn test_ansi_encoding_save() {
831 let temp_dir = TempDir::new().unwrap();
832 let file_path = temp_dir.path().join("test_ansi.txt");
833
834 let mut buffer = RopeBuffer::new();
836 buffer.set_save_encoding(encoding_rs::WINDOWS_1252);
837 buffer.insert(0, "Hello, world! ©");
838
839 buffer.save_to(&file_path).unwrap();
841
842 let saved_bytes = fs::read(&file_path).unwrap();
844 let (decoded, _, _) = encoding_rs::WINDOWS_1252.decode(&saved_bytes);
845 assert_eq!(decoded, "Hello, world! ©");
846 }
847
848 #[test]
849 fn test_big5_encoding_save() {
850 let temp_dir = TempDir::new().unwrap();
851 let file_path = temp_dir.path().join("test_big5.txt");
852
853 let big5_encoding = encoding_rs::Encoding::for_label(b"big5").unwrap();
855
856 let mut buffer = RopeBuffer::new();
858 buffer.set_save_encoding(big5_encoding);
859 buffer.insert(0, "Hello, 世界!"); buffer.save_to(&file_path).unwrap();
863
864 let saved_bytes = fs::read(&file_path).unwrap();
866 let (decoded, _, _) = big5_encoding.decode(&saved_bytes);
867 assert!(decoded.contains("Hello"));
869 }
870}
871
872impl Default for RopeBuffer {
873 fn default() -> Self {
874 Self::new()
875 }
876}