1use crate::common::SenseEntry;
2use std::str::FromStr;
3
4pub fn parse_sense_line(line: &str) -> Result<SenseEntry, &'static str> {
6 let fields: Vec<&str> = line.split_whitespace().collect();
7 if fields.len() < 4 {
8 return Err("Invalid sense index line format: insufficient fields");
9 }
10
11 let sense_key = fields[0].to_string();
12 let synset_offset = u64::from_str(fields[1]).map_err(|_| "Invalid synset offset")?;
13 let sense_number = u32::from_str(fields[2]).map_err(|_| "Invalid sense number")?;
14 let tag_cnt = u32::from_str(fields[3]).map_err(|_| "Invalid tag count")?;
15
16 Ok(SenseEntry {
17 sense_key,
18 synset_offset,
19 sense_number,
20 tag_cnt,
21 })
22}
23
24pub fn parse_sense_key(sense_key: &str) -> Result<(String, String), &'static str> {
26 let parts: Vec<&str> = sense_key.split('%').collect();
27 if parts.len() != 2 {
28 return Err("Invalid sense key format: missing '%' separator");
29 }
30
31 let lemma = parts[0].to_string();
32 let lex_sense = parts[1].to_string();
33
34 Ok((lemma, lex_sense))
35}
36
37pub fn parse_lex_sense(lex_sense: &str) -> Result<(u8, u8, u8, String, u8), &'static str> {
39 let parts: Vec<&str> = lex_sense.split(':').collect();
40 if parts.len() != 5 {
41 return Err("Invalid lex_sense format: should have 5 colon-separated parts");
42 }
43
44 let ss_type = u8::from_str(parts[0]).map_err(|_| "Invalid ss_type")?;
45 let lex_filenum = u8::from_str(parts[1]).map_err(|_| "Invalid lex_filenum")?;
46 let lex_id = u8::from_str(parts[2]).map_err(|_| "Invalid lex_id")?;
47 let head_word = parts[3].to_string();
48 let head_id = if parts[4].is_empty() {
49 0
50 } else {
51 u8::from_str(parts[4]).map_err(|_| "Invalid head_id")?
52 };
53
54 Ok((ss_type, lex_filenum, lex_id, head_word, head_id))
55}
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60
61 #[test]
62 fn test_parse_sense_line() {
63 let line = "abandonment%1:04:03:: 00204439 1 3";
64 let sense_entry = parse_sense_line(line).unwrap();
65
66 assert_eq!(sense_entry.sense_key, "abandonment%1:04:03::");
67 assert_eq!(sense_entry.synset_offset, 204439);
68 assert_eq!(sense_entry.sense_number, 1);
69 assert_eq!(sense_entry.tag_cnt, 3);
70 }
71
72 #[test]
73 fn test_parse_sense_key() {
74 let (lemma, lex_sense) = parse_sense_key("abandonment%1:04:03::").unwrap();
75 assert_eq!(lemma, "abandonment");
76 assert_eq!(lex_sense, "1:04:03::");
77 }
78
79 #[test]
80 fn test_parse_lex_sense() {
81 let (ss_type, lex_filenum, lex_id, head_word, head_id) =
82 parse_lex_sense("1:04:03::").unwrap();
83
84 assert_eq!(ss_type, 1);
85 assert_eq!(lex_filenum, 4);
86 assert_eq!(lex_id, 3);
87 assert_eq!(head_word, "");
88 assert_eq!(head_id, 0);
89 }
90
91 #[test]
92 fn test_parse_lex_sense_with_head() {
93 let (ss_type, lex_filenum, lex_id, head_word, head_id) =
94 parse_lex_sense("5:00:00:discomposed:00").unwrap();
95
96 assert_eq!(ss_type, 5);
97 assert_eq!(lex_filenum, 0);
98 assert_eq!(lex_id, 0);
99 assert_eq!(head_word, "discomposed");
100 assert_eq!(head_id, 0);
101 }
102}