swift_mt_message/parser/
field_extractor.rs

1//! # Field Extractor
2//!
3//! Extracts individual fields from SWIFT message text.
4
5/// Extract field content from SWIFT message text
6///
7/// Returns the field content and the number of characters consumed
8pub fn extract_field_content(input: &str, tag: &str) -> Option<(String, usize)> {
9    let field_marker = format!(":{}:", tag);
10
11    // Find the field marker
12    let field_start = input.find(&field_marker)?;
13
14    // Start of content is after the field marker
15    let content_start = field_start + field_marker.len();
16
17    // Find the next field marker or end marker
18    let remaining = &input[content_start..];
19
20    // Look for the next field (starts with `:` and has format `:XX:` or `:XXX:`)
21    let content_end = find_next_field_boundary(remaining);
22
23    // Extract content
24    let (raw_content, has_trailing_newline) = if let Some(end) = content_end {
25        // end points to the newline before the next field
26        // Check if there's actually a newline at that position
27        let has_newline = remaining.as_bytes().get(end) == Some(&b'\n');
28        (remaining[..end].to_string(), has_newline)
29    } else {
30        // No next field found, take everything until end marker
31        // Look for block end markers in order of specificity:
32        // 1. "\n-}" - end of block with closing brace
33        // 2. "\n-\n" - trailer separator
34        // 3. "\n-" at end of string - simple block end
35        // 4. "-}" - end marker without newline
36        if let Some(end_pos) = remaining.find("\n-}") {
37            (remaining[..end_pos].to_string(), true)
38        } else if let Some(end_pos) = remaining.find("\n-\n") {
39            (remaining[..end_pos].to_string(), true)
40        } else if let Some(end_pos) = remaining.find("\n-") {
41            // Only treat "\n-" as end marker if it's at the end of the string
42            // or followed by "}" (to avoid matching "-" in field content like bullet points)
43            let after_marker = end_pos + 2; // position after "\n-"
44            if after_marker >= remaining.len() || remaining[after_marker..].starts_with('}') {
45                (remaining[..end_pos].to_string(), true)
46            } else {
47                // Take all remaining content
48                (remaining.to_string(), false)
49            }
50        } else if let Some(end_pos) = remaining.find("-}") {
51            (remaining[..end_pos].to_string(), false)
52        } else {
53            // Take all remaining content
54            (remaining.to_string(), false)
55        }
56    };
57
58    // Calculate consumed characters BEFORE trimming (to include newlines)
59    let raw_content_len = raw_content.len();
60
61    // Clean up the content (remove trailing newlines)
62    let content = raw_content.trim_end_matches('\n').trim_end_matches('\r');
63
64    // Calculate consumed characters including the newline after the content if present
65    let consumed = field_start
66        + field_marker.len()
67        + raw_content_len
68        + if has_trailing_newline { 1 } else { 0 };
69
70    Some((content.to_string(), consumed))
71}
72
73/// Find the boundary of the next field
74fn find_next_field_boundary(input: &str) -> Option<usize> {
75    let mut chars = input.char_indices();
76
77    while let Some((i, ch)) = chars.next() {
78        if ch == '\n' {
79            // Check if next character starts a field
80            if let Some((_, ':')) = chars.next() {
81                // This might be a field marker, verify the pattern
82                let rest = &input[i + 1..];
83                if is_field_marker(rest) {
84                    return Some(i);
85                }
86            }
87        }
88    }
89
90    None
91}
92
93/// Check if the text starts with a valid field marker pattern
94fn is_field_marker(input: &str) -> bool {
95    if !input.starts_with(':') {
96        return false;
97    }
98
99    // Find the closing colon
100    if let Some(close) = input[1..].find(':') {
101        let tag = &input[1..close + 1];
102
103        // Valid field tags are 2-4 characters, alphanumeric
104        if (2..=4).contains(&tag.len()) {
105            // Check if all characters are alphanumeric
106            return tag.chars().all(|c| c.is_alphanumeric());
107        }
108    }
109
110    false
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn test_extract_simple_field() {
119        let input = ":20:REF123\n:21:RELREF\n-";
120        let (content, consumed) = extract_field_content(input, "20").unwrap();
121        assert_eq!(content, "REF123");
122        assert_eq!(consumed, 11); // ":20:REF123\n" - includes the newline
123    }
124
125    #[test]
126    fn test_extract_multiline_field() {
127        let input = ":70:LINE1\nLINE2\nLINE3\n:71A:SHA\n-";
128        let (content, _consumed) = extract_field_content(input, "70").unwrap();
129        assert_eq!(content, "LINE1\nLINE2\nLINE3");
130    }
131
132    #[test]
133    fn test_extract_field_with_variant() {
134        let input = ":50K:JOHN DOE\n123 MAIN ST\n:59:BENEFICIARY\n-";
135        let (content, _) = extract_field_content(input, "50K").unwrap();
136        assert_eq!(content, "JOHN DOE\n123 MAIN ST");
137    }
138
139    #[test]
140    fn test_extract_last_field() {
141        let input = ":20:REF123\n:71A:SHA\n-";
142        let (content, _) = extract_field_content(input, "71A").unwrap();
143        assert_eq!(content, "SHA");
144    }
145
146    #[test]
147    fn test_field_not_found() {
148        let input = ":20:REF123\n:21:RELREF\n-";
149        let result = extract_field_content(input, "32A");
150        assert!(result.is_none());
151    }
152
153    #[test]
154    fn test_field_marker_detection() {
155        assert!(is_field_marker(":20:"));
156        assert!(is_field_marker(":32A:"));
157        assert!(is_field_marker(":50K:"));
158        assert!(!is_field_marker(":12345:")); // Too long
159        assert!(!is_field_marker(":X:")); // Too short
160        assert!(!is_field_marker("20:")); // No starting colon
161    }
162}