json_repair_rs/utils/string_utils.rs
1use std::io;
2use std::io::{BufReader, Read};
3
4// use std::collections::HashMap;
5// use std::fs::File;
6// use std::io::{Seek, SeekFrom, Read, Write};
7// use std::path::Path;
8//
9// pub struct StringFileWrapper {
10// file: File,
11// length: usize,
12// buffers: HashMap<usize, String>,
13// buffer_length: usize,
14// }
15//
16// impl StringFileWrapper {
17// pub fn new(file: File, chunk_length: usize) -> Self {
18// let buffer_length = if chunk_length < 2 { 1_000_000 } else { chunk_length };
19// StringFileWrapper {
20// file,
21// length: 0,
22// buffers: HashMap::new(),
23// buffer_length,
24// }
25// }
26//
27// pub fn get_buffer(&mut self, index: usize) -> &str {
28// if !self.buffers.contains_key(&index) {
29// let mut buffer = vec![0; self.buffer_length];
30// self.file.seek(SeekFrom::Start((index * self.buffer_length) as u64)).unwrap();
31// let bytes_read = self.file.read(&mut buffer).unwrap();
32// let buffer_str = String::from_utf8_lossy(&buffer[..bytes_read]).into_owned();
33// self.buffers.insert(index, buffer_str);
34//
35// // Save memory by keeping max 2MB buffer chunks and min 2 chunks
36// if self.buffers.len() > std::cmp::max(2, 2_000_000 / self.buffer_length) {
37// let oldest_key = *self.buffers.keys().next().unwrap();
38// if oldest_key != index {
39// self.buffers.remove(&oldest_key);
40// }
41// }
42// }
43// &self.buffers[&index]
44// }
45//
46// pub fn len(&mut self) -> usize {
47// if self.length == 0 {
48// let current_position = self.file.seek(SeekFrom::Current(0)).unwrap();
49// self.length = self.file.seek(SeekFrom::End(0)).unwrap() as usize;
50// self.file.seek(SeekFrom::Start(current_position)).unwrap();
51// }
52// self.length
53// }
54//
55// pub fn get(&mut self, index: usize) -> Option<char> {
56// let buffer_index = index / self.buffer_length;
57// let buffer = self.get_buffer(buffer_index);
58// buffer.chars().nth(index % self.buffer_length)
59// }
60//
61// pub fn get_slice(&mut self, start: usize, end: usize) -> String {
62// let buffer_start = start / self.buffer_length;
63// let buffer_end = end / self.buffer_length;
64//
65// if buffer_start == buffer_end {
66// let buffer = self.get_buffer(buffer_start);
67// buffer[(start % self.buffer_length)..(end % self.buffer_length)].to_string()
68// } else {
69// let start_slice = self.get_buffer(buffer_start)[(start % self.buffer_length)..].to_string();
70// let end_slice = self.get_buffer(buffer_end)[..(end % self.buffer_length)].to_string();
71// let middle_slices: String = (buffer_start + 1..buffer_end)
72// .map(|i| self.get_buffer(i).to_string())
73// .collect();
74// start_slice + &middle_slices + &end_slice
75// }
76// }
77//
78// pub fn set(&mut self, index: usize, value: &str) {
79// let current_position = self.file.seek(SeekFrom::Current(0)).unwrap();
80// self.file.seek(SeekFrom::Start(index as u64)).unwrap();
81// self.file.write_all(value.as_bytes()).unwrap();
82// self.file.seek(SeekFrom::Start(current_position)).unwrap();
83// }
84// }
85//
86// fn main() {
87// let file = File::open("example.txt").unwrap();
88// let mut wrapper = StringFileWrapper::new(file, 1_000_000);
89//
90// println!("Length: {}", wrapper.len());
91// println!("Character at index 10: {:?}", wrapper.get(10));
92// println!("Slice from 10 to 20: {}", wrapper.get_slice(10, 20));
93//
94// wrapper.set(10, "new_value");
95// }
96// 定义 StringFileWrapper,用于包装字符串或文件
97pub struct StringFileWrapper {
98 pub(crate) content: String,
99 pub(crate) index: usize,
100}
101
102impl StringFileWrapper {
103 pub fn new<R: Read>(reader: R, chunk_length: usize) -> io::Result<Self> {
104 let mut content = String::new();
105 let mut buf_reader = BufReader::new(reader);
106 buf_reader.read_to_string(&mut content)?;
107 Ok(Self { content, index: 0 })
108 }
109
110 pub fn read_char(&mut self) -> Option<char> {
111 if self.index < self.content.len() {
112 let ch = self.content.chars().nth(self.index)?;
113 self.index += 1;
114 Some(ch)
115 } else {
116 None
117 }
118 }
119
120 pub fn peek_char(&self) -> Option<char> {
121 self.content.chars().nth(self.index)
122 }
123
124 pub fn len(&self) -> usize {
125 self.content.len()
126 }
127}
128