1use chrono::{DateTime, Utc};
4use line_ending::LineEnding;
5use std::fs;
6use std::path::Path;
7
8use crate::data::{RecordSection, SectionRole};
9use crate::types::Sentence;
10
11pub fn platform_newline() -> &'static str {
13 LineEnding::from_current_platform().as_str()
14}
15
16pub fn normalize_inline_whitespace<T: AsRef<str>>(text: T) -> String {
19 let mut normalized = String::new();
20 let mut seen_space = false;
21 for ch in text.as_ref().chars() {
22 if ch.is_whitespace() {
23 if !seen_space {
24 normalized.push(' ');
25 seen_space = true;
26 }
27 } else {
28 normalized.push(ch);
29 seen_space = false;
30 }
31 }
32 normalized.trim().to_string()
33}
34
35pub fn sentences(text: &str) -> Vec<Sentence> {
38 let mut results = Vec::new();
39 let normalized = LineEnding::normalize(text);
40 let paragraph_sep = LineEnding::LF.as_str().repeat(2);
41 for block in normalized.split(¶graph_sep) {
42 if block.trim().is_empty() {
43 continue;
44 }
45 let normalized = normalize_inline_whitespace(block);
46 if normalized.is_empty() {
47 continue;
48 }
49 push_block_sentences(&normalized, &mut results);
50 }
51
52 results
53}
54
55pub fn make_section(role: SectionRole, heading: Option<&str>, text: &str) -> RecordSection {
58 RecordSection {
59 role,
60 heading: heading.map(|h| h.to_string()),
61 text: text.to_string(),
62 sentences: sentences(text),
63 }
64}
65
66fn push_block_sentences(block: &str, results: &mut Vec<Sentence>) {
67 let chars: Vec<char> = block.chars().collect();
68 let mut buffer = String::new();
69
70 for (idx, ch) in chars.iter().enumerate() {
71 buffer.push(*ch);
72 if is_sentence_boundary(&chars, idx) {
73 let trimmed = buffer.trim();
74 if !trimmed.is_empty() {
75 results.push(trimmed.to_string());
76 }
77 buffer.clear();
78 }
79 }
80
81 let trailing = buffer.trim();
82 if !trailing.is_empty() {
83 results.push(trailing.to_string());
84 }
85}
86
87fn is_sentence_boundary(chars: &[char], idx: usize) -> bool {
88 match chars[idx] {
89 '.' => is_dot_boundary(chars, idx),
90 '!' | '?' => true,
91 _ => false,
92 }
93}
94
95fn is_dot_boundary(chars: &[char], idx: usize) -> bool {
96 if is_decimal_middle(chars, idx) || is_ticker_middle(chars, idx) {
97 return false;
98 }
99 if idx + 1 < chars.len() && chars[idx + 1] == '.' {
100 return false;
101 }
102 true
103}
104
105fn is_decimal_middle(chars: &[char], idx: usize) -> bool {
106 idx > 0
107 && idx + 1 < chars.len()
108 && chars[idx - 1].is_ascii_digit()
109 && chars[idx + 1].is_ascii_digit()
110}
111
112fn is_ticker_middle(chars: &[char], idx: usize) -> bool {
113 idx > 0
114 && idx + 1 < chars.len()
115 && is_ticker_char(chars[idx - 1])
116 && is_ticker_char(chars[idx + 1])
117}
118
119fn is_ticker_char(ch: char) -> bool {
120 ch.is_ascii_uppercase() || ch.is_ascii_digit()
121}
122
123pub fn is_text_file(path: &Path) -> bool {
129 path.extension()
130 .and_then(|ext| ext.to_str())
131 .map(|ext| ext.eq_ignore_ascii_case("txt"))
132 .unwrap_or(false)
133}
134
135pub fn file_mtime(path: &Path) -> Option<DateTime<Utc>> {
137 let metadata = fs::metadata(path).ok()?;
138 let modified = metadata.modified().ok()?;
139 Some(system_time_to_utc(modified))
140}
141
142pub fn file_times(path: &Path) -> (DateTime<Utc>, DateTime<Utc>) {
144 let metadata = fs::metadata(path).ok();
145 let updated_at = metadata
146 .as_ref()
147 .and_then(|meta| meta.modified().ok())
148 .map(system_time_to_utc)
149 .unwrap_or_else(Utc::now);
150 let created_at = metadata
151 .and_then(|meta| meta.created().ok())
152 .map(system_time_to_utc)
153 .unwrap_or(updated_at);
154 (created_at, updated_at)
155}
156
157fn system_time_to_utc(time: std::time::SystemTime) -> DateTime<Utc> {
158 DateTime::<Utc>::from(time)
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn normalize_inline_whitespace_collapses_runs() {
167 let nl = platform_newline();
168 let input = format!("Alpha{nl}{nl} Beta\tGamma");
169 assert_eq!(normalize_inline_whitespace(&input), "Alpha Beta Gamma");
170 }
171
172 #[test]
173 fn sentences_falls_back_to_full_text_when_needed() {
174 let text = format!(" {}", platform_newline());
175 let result = sentences(&text);
176 assert!(result.is_empty());
177
178 let text2 = "Single block without punctuation";
179 let result2 = sentences(text2);
180 assert_eq!(
181 result2,
182 vec![String::from("Single block without punctuation")]
183 );
184 }
185
186 #[test]
187 fn make_section_populates_sentences() {
188 let section = make_section(SectionRole::Context, Some("Summary"), "Line one. Line two!");
189 assert_eq!(section.heading.as_deref(), Some("Summary"));
190 assert_eq!(section.sentences.len(), 2);
191 assert_eq!(section.role, SectionRole::Context);
192 }
193
194 #[test]
195 fn sentences_keep_decimal_values_together() {
196 let text = "Price closed at 3.14. Outlook improved.";
197 let result = sentences(text);
198 assert_eq!(result, vec!["Price closed at 3.14.", "Outlook improved."]);
199 }
200
201 #[test]
202 fn sentences_keep_dot_tickers_together() {
203 let text = "BRK.B rallied while RDS.A lagged.";
204 let result = sentences(text);
205 assert_eq!(result, vec!["BRK.B rallied while RDS.A lagged."]);
206 }
207
208 #[test]
209 fn file_time_helpers_handle_existing_and_missing_paths() {
210 use tempfile::tempdir;
211 let temp = tempdir().unwrap();
212 let existing = temp.path().join("exists.txt");
213 std::fs::write(&existing, "hello").unwrap();
214
215 assert!(file_mtime(&existing).is_some());
216 let (created_at, updated_at) = file_times(&existing);
217 assert!(updated_at >= created_at);
218
219 let missing = temp.path().join("missing.txt");
220 assert!(file_mtime(&missing).is_none());
221 let (missing_created, missing_updated) = file_times(&missing);
222 assert!(missing_updated >= missing_created);
223 }
224
225 #[test]
226 fn sentences_treat_blank_line_as_boundary() {
227 let nl = platform_newline();
228 let text = format!("First line without punctuation{nl}{nl}Second line with more context.");
229 let result = sentences(&text);
230 assert_eq!(
231 result,
232 vec![
233 "First line without punctuation".to_string(),
234 "Second line with more context.".to_string()
235 ]
236 );
237 }
238
239 #[test]
240 fn sentences_keep_ellipsis_together() {
241 let text = "Wait... really? Yes.";
242 let result = sentences(text);
243 assert_eq!(result, vec!["Wait...", "really?", "Yes."]);
244 }
245
246 #[test]
247 fn is_text_file_matches_txt_case_insensitively() {
248 use std::path::PathBuf;
249 assert!(is_text_file(&PathBuf::from("hello.txt")));
250 assert!(is_text_file(&PathBuf::from("hello.TXT")));
251 assert!(is_text_file(&PathBuf::from("hello.Txt")));
252 assert!(!is_text_file(&PathBuf::from("hello.md")));
253 assert!(!is_text_file(&PathBuf::from("hello")));
254 }
255}