use regex::Regex;
pub fn norm_new_lines(s: &str) -> String {
s.replace("\r\n", "\n").replace('\r', "\n")
}
pub fn first_word(s: &str) -> &str {
s.split_whitespace().next().unwrap_or(s)
}
pub fn similar(a: &str, b: &str) -> f64 {
if a.is_empty() || b.is_empty() {
return 0.0;
}
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
if a_lower == b_lower {
return 100.0;
}
let max_len = a_lower.len().max(b_lower.len());
if max_len == 0 {
return 100.0;
}
let distance = levenshtein(&a_lower, &b_lower);
((max_len - distance) as f64 / max_len as f64) * 100.0
}
#[allow(clippy::needless_range_loop)]
pub fn levenshtein(a: &str, b: &str) -> usize {
let len_a = a.len();
let len_b = b.len();
if len_a == 0 {
return len_b;
}
if len_b == 0 {
return len_a;
}
let mut matrix = vec![vec![0usize; len_b + 1]; len_a + 1];
for i in 0..=len_a {
matrix[i][0] = i;
}
for j in 0..=len_b {
matrix[0][j] = j;
}
for i in 1..=len_a {
for j in 1..=len_b {
let cost = if a.as_bytes()[i - 1] == b.as_bytes()[j - 1] {
0
} else {
1
};
matrix[i][j] = (matrix[i - 1][j] + 1)
.min(matrix[i][j - 1] + 1)
.min(matrix[i - 1][j - 1] + cost);
}
}
matrix[len_a][len_b]
}
pub fn truncate(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
s.to_string()
} else {
format!("{}...", &s[..max_len.saturating_sub(3)])
}
}
pub fn ucfirst(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
Some(first) => first.to_uppercase().chain(chars).collect(),
None => String::new(),
}
}
pub fn lcfirst(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
Some(first) => first.to_lowercase().chain(chars).collect(),
None => String::new(),
}
}
pub fn substr(input: &str, start: usize, length: usize) -> String {
let runes: Vec<char> = input.chars().collect();
if start >= runes.len() {
return String::new();
}
let end = (start + length).min(runes.len());
runes[start..end].iter().collect()
}
pub fn is_multiline(text: &str) -> bool {
let text = norm_new_lines(text);
text.lines().count() > 1
}
pub fn split_text_into_chunks(text: &str, max_len: usize) -> Vec<String> {
let text = text.trim();
if max_len == 0 {
return vec![text.to_string()];
}
let mut chunks = Vec::new();
let mut runes: Vec<char> = text.chars().collect();
while runes.len() > max_len {
let window = &runes[..max_len];
let mut split_index = None;
for i in (0..window.len()).rev() {
if window[i] == '\n' {
split_index = Some(i);
break;
}
}
if split_index.is_none() {
for i in (0..window.len()).rev() {
if window[i] == ' ' {
split_index = Some(i);
break;
}
}
}
let split_index = split_index.unwrap_or(max_len);
let chunk: String = runes[..split_index].iter().collect();
let chunk = chunk.trim();
if !chunk.is_empty() {
chunks.push(chunk.to_string());
}
let remainder: String = runes[split_index..].iter().collect();
runes = remainder.trim().chars().collect();
}
let remainder: String = runes.iter().collect();
let remainder = remainder.trim();
if !remainder.is_empty() {
chunks.push(remainder.to_string());
}
chunks
}
const EMOJI_STRIP_PREFIXES: &[&str] = &["WRK ", "UA ", "US ", "CY ", "HOB ", "SRB ", "PL "];
pub fn emoji_prefix(emoji: &str, s: &str) -> String {
let mut s = s.to_string();
for prefix in EMOJI_STRIP_PREFIXES {
s = s.trim_start_matches(prefix).to_string();
}
if emoji.is_empty() {
return s;
}
format!("{} {}", emoji, s)
}
pub fn has_image(msg: &str) -> bool {
Regex::new(r"!\[.*?\]\(.*?\)").unwrap().is_match(msg)
}
pub fn strip_chat_timestamp(s: &str) -> String {
Regex::new(r"^`\d{2}:\d{2}` ")
.unwrap()
.replace(s, "")
.to_string()
}
pub fn extract_markdown_links(content: &str) -> Vec<(String, String)> {
let re = Regex::new(r"\[([^\]]*)\]\(([^)]+)\)").unwrap();
re.captures_iter(content)
.filter_map(|cap| {
let text = cap.get(1)?.as_str().to_string();
let path = cap.get(2)?.as_str().to_string();
if path.starts_with("http://") || path.starts_with("https://") {
return None;
}
Some((text, path))
})
.collect()
}
pub fn extract_headings(content: &str) -> Vec<String> {
let re = Regex::new(r"(?m)^(#{1,6})\s+(.+)$").unwrap();
re.captures_iter(content)
.filter_map(|cap| cap.get(2).map(|m| m.as_str().trim().to_string()))
.collect()
}
pub const MIN_SEARCH_SIMILARITY: i32 = 70;
pub fn today_chat_header() -> String {
use chrono::Local;
let now = Local::now();
format!("#### {} {}", now.format("%d %B,"), now.format("%A"))
}
pub fn today_journal_path() -> String {
use chrono::Local;
let now = Local::now();
format!("journal/{}.{}.md", now.format("%Y.%m"), now.format("%B"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_norm_newlines() {
assert_eq!(norm_new_lines("a\r\nb\r\nc"), "a\nb\nc");
assert_eq!(norm_new_lines("a\rb\rc"), "a\nb\nc");
}
#[test]
fn test_similar() {
assert!(similar("hello", "helo") > 70.0);
assert!(similar("test", "test") > 99.0);
assert_eq!(similar("", ""), 0.0);
}
#[test]
fn test_levenshtein() {
assert_eq!(levenshtein("kitten", "sitting"), 3);
assert_eq!(levenshtein("test", "test"), 0);
}
#[test]
fn test_truncate() {
assert_eq!(truncate("hello", 10), "hello");
assert_eq!(truncate("hello world", 8), "hello...");
}
#[test]
fn test_extract_links() {
let md =
"See [Rust](brain/Rust.md) and [Go](brain/Go.md) but not [ext](https://example.com)";
let links = extract_markdown_links(md);
assert_eq!(links.len(), 2);
assert_eq!(links[0].0, "Rust");
assert_eq!(links[0].1, "brain/Rust.md");
}
#[test]
fn test_extract_headings() {
let md = "# Title\n## Section\n### Sub\nsome text";
let headings = extract_headings(md);
assert_eq!(headings, vec!["Title", "Section", "Sub"]);
}
#[test]
fn test_ucfirst() {
assert_eq!(ucfirst("hello"), "Hello");
assert_eq!(ucfirst(""), "");
assert_eq!(ucfirst("Already"), "Already");
assert_eq!(ucfirst("über"), "Über");
}
#[test]
fn test_lcfirst() {
assert_eq!(lcfirst("Hello"), "hello");
assert_eq!(lcfirst(""), "");
assert_eq!(lcfirst("lower"), "lower");
}
#[test]
fn test_substr() {
assert_eq!(substr("Hello", 0, 3), "Hel");
assert_eq!(substr("Hello", 2, 3), "llo");
assert_eq!(substr("Hello", 3, 10), "lo");
assert_eq!(substr("Hello", 10, 2), "");
assert_eq!(substr("", 0, 5), "");
assert_eq!(substr("안녕하세요", 0, 2), "안녕");
}
#[test]
fn test_is_multiline() {
assert!(is_multiline("line one\nline two"));
assert!(!is_multiline("single line"));
assert!(is_multiline("a\r\nb"));
assert!(!is_multiline(""));
}
#[test]
fn test_split_text_into_chunks() {
let chunks = split_text_into_chunks("Hello", 5);
assert_eq!(chunks, vec!["Hello"]);
let chunks = split_text_into_chunks("This is a test to check the splitting of text", 10);
for chunk in &chunks {
assert!(
chunk.len() <= 10,
"chunk too long: '{}' ({})",
chunk,
chunk.len()
);
}
let chunks = split_text_into_chunks("Line one\nLine two\nLine three", 15);
assert_eq!(chunks, vec!["Line one", "Line two", "Line three"]);
let chunks = split_text_into_chunks("Hello world", 0);
assert_eq!(chunks, vec!["Hello world"]);
}
#[test]
fn test_emoji_prefix() {
assert_eq!(emoji_prefix("📝", "WRK Task"), "📝 Task");
assert_eq!(emoji_prefix("✅", "Task"), "✅ Task");
assert_eq!(emoji_prefix("", "Hello"), "Hello");
assert_eq!(emoji_prefix("🎉", "UA Celebration"), "🎉 Celebration");
}
#[test]
fn test_has_image() {
assert!(has_image("look: "));
assert!(!has_image("just text"));
}
}