swift_mt_message/parser/
sequence_parser.rs

1//! Generic sequence parser for SWIFT MT messages with multiple sequences
2//!
3//! Many SWIFT MT messages have multiple sequences:
4//! - MT101: Sequence A (General Info), Sequence B (Transactions)
5//! - MT104: Sequence A (General Info), Sequence B (Transactions), Sequence C (Settlement)
6//! - MT107: Similar structure with multiple sequences
7//!
8//! This module provides generic parsing capabilities for such messages.
9
10use crate::errors::Result;
11use std::collections::HashMap;
12
13/// Type alias for field storage to reduce complexity
14pub type FieldMap = HashMap<String, Vec<(String, usize)>>;
15
16/// Configuration for sequence parsing
17#[derive(Debug, Clone)]
18pub struct SequenceConfig {
19    /// Field that marks the start of sequence B (usually "21")
20    pub sequence_b_marker: String,
21    /// Fields that belong exclusively to sequence C (if any)
22    pub sequence_c_fields: Vec<String>,
23    /// Whether sequence C exists for this message type
24    pub has_sequence_c: bool,
25}
26
27impl Default for SequenceConfig {
28    fn default() -> Self {
29        Self {
30            sequence_b_marker: "21".to_string(),
31            sequence_c_fields: vec![],
32            has_sequence_c: false,
33        }
34    }
35}
36
37/// Parsed sequences from a SWIFT message
38#[derive(Debug)]
39pub struct ParsedSequences {
40    /// Sequence A fields (general information)
41    pub sequence_a: FieldMap,
42    /// Sequence B fields (repetitive items like transactions)
43    pub sequence_b: FieldMap,
44    /// Sequence C fields (optional settlement/summary information)
45    pub sequence_c: FieldMap,
46}
47
48/// Split fields into sequences based on configuration
49pub fn split_into_sequences(fields: &FieldMap, config: &SequenceConfig) -> Result<ParsedSequences> {
50    let mut seq_a = HashMap::new();
51    let mut seq_b = HashMap::new();
52    let mut seq_c = HashMap::new();
53
54    // Get all fields sorted by position
55    let mut all_fields: Vec<(&str, &(String, usize))> = Vec::new();
56    for (tag, values) in fields {
57        for value in values {
58            all_fields.push((tag.as_str(), value));
59        }
60    }
61    all_fields.sort_by_key(|(_, (_, pos))| *pos);
62
63    // Find sequence boundaries
64    let mut first_b_marker_pos = None;
65    let mut _last_b_marker_pos = None;
66
67    // Special handling for MT935 which uses "23" or "25" as sequence markers
68    let secondary_marker = if config.sequence_b_marker == "23" {
69        Some("25")
70    } else {
71        None
72    };
73
74    // Fields that belong to sequence A even if they appear after sequence B
75    let sequence_a_fields = ["72", "77E", "79"];
76
77    for (tag, (_, pos)) in &all_fields {
78        if is_sequence_b_marker(tag, &config.sequence_b_marker)
79            || (secondary_marker.is_some() && *tag == secondary_marker.unwrap())
80        {
81            if first_b_marker_pos.is_none() {
82                first_b_marker_pos = Some(*pos);
83            }
84            _last_b_marker_pos = Some(*pos);
85        }
86    }
87
88    // Simpler approach: find all sequence B boundaries
89    // Sequence B starts at first field 21 and includes all fields until sequence C
90    // Special handling for MT204 where field 20 appears in both sequences
91    let is_mt204 = config.sequence_b_marker == "20"
92        && all_fields.iter().filter(|(tag, _)| *tag == "20").count() > 1;
93
94    let sequence_b_start_idx = if is_mt204 {
95        // For MT204: skip the first field 20 (which is in sequence A)
96        let mut found_first_20 = false;
97        all_fields.iter().position(|(tag, _)| {
98            if *tag == "20" {
99                if found_first_20 {
100                    true // This is the second field 20, which starts sequence B
101                } else {
102                    found_first_20 = true;
103                    false // Skip the first field 20
104                }
105            } else {
106                false
107            }
108        })
109    } else {
110        all_fields.iter().position(|(tag, _)| {
111            is_sequence_b_marker(tag, &config.sequence_b_marker)
112                || (secondary_marker.is_some() && *tag == secondary_marker.unwrap())
113        })
114    };
115
116    // Find where sequence C would start (if it exists)
117    // This is tricky: sequence C fields appear after ALL transactions
118    // We need to find the last occurrence of transaction-ending fields
119    let mut sequence_c_start_idx: Option<usize> = None;
120
121    if config.has_sequence_c && sequence_b_start_idx.is_some() {
122        // Special handling for MT940/MT942 where sequence B contains statement lines
123        // and sequence C contains closing balance and summary fields
124        if config.sequence_b_marker == "61" {
125            // For MT940/MT942, look for the first occurrence of a sequence C field
126            // that is NOT field 86 (since 86 can appear in both sequences)
127            let seq_c_markers = config
128                .sequence_c_fields
129                .iter()
130                .filter(|f| *f != "86")
131                .collect::<Vec<_>>();
132
133            if let Some(seq_b_start) = sequence_b_start_idx {
134                for (i, (tag, _)) in all_fields.iter().enumerate().skip(seq_b_start) {
135                    let base_tag = tag.trim_end_matches(char::is_alphabetic);
136                    if seq_c_markers.iter().any(|marker| base_tag == *marker) {
137                        sequence_c_start_idx = Some(i);
138                        break;
139                    }
140                }
141            }
142        } else {
143            // Look for sequence C fields that appear after transaction patterns
144            // Transaction patterns typically end with fields like 59, 70, 71A
145            let transaction_end_fields = ["59", "70", "71A", "77B", "36"];
146
147            // Find the last occurrence of any transaction-ending field
148            let mut last_trans_end_idx: Option<usize> = None;
149            for (i, (tag, _)) in all_fields.iter().enumerate() {
150                let base_tag = tag.trim_end_matches(char::is_alphabetic);
151                if transaction_end_fields.contains(&base_tag) {
152                    last_trans_end_idx = Some(i);
153                }
154            }
155
156            // Look for sequence C fields after the last transaction end
157            if let Some(last_end) = last_trans_end_idx {
158                for (i, (tag, _)) in all_fields.iter().enumerate().skip(last_end + 1) {
159                    if config.sequence_c_fields.contains(&tag.to_string()) {
160                        sequence_c_start_idx = Some(i);
161                        break;
162                    }
163                }
164            } else {
165                // If no transaction-ending fields found, look for sequence C fields
166                // after the sequence B start
167                if let Some(seq_b_start) = sequence_b_start_idx {
168                    for (i, (tag, _)) in all_fields.iter().enumerate().skip(seq_b_start) {
169                        if config.sequence_c_fields.contains(&tag.to_string()) {
170                            sequence_c_start_idx = Some(i);
171                            break;
172                        }
173                    }
174                }
175            }
176        }
177    }
178
179    // Distribute fields to sequences based on boundaries
180    for (i, (tag, (value, pos))) in all_fields.iter().enumerate() {
181        // Check if this field should always be in sequence A
182        if sequence_a_fields.contains(tag) {
183            seq_a
184                .entry(tag.to_string())
185                .or_insert_with(Vec::new)
186                .push((value.clone(), *pos));
187            continue;
188        }
189
190        if let Some(seq_b_start) = sequence_b_start_idx {
191            if i < seq_b_start {
192                // Before sequence B = Sequence A
193                seq_a
194                    .entry(tag.to_string())
195                    .or_insert_with(Vec::new)
196                    .push((value.clone(), *pos));
197            } else if let Some(seq_c_start) = sequence_c_start_idx {
198                if i >= seq_c_start {
199                    // After sequence C start = Sequence C
200                    seq_c
201                        .entry(tag.to_string())
202                        .or_insert_with(Vec::new)
203                        .push((value.clone(), *pos));
204                } else {
205                    // Between sequence B start and C start = Sequence B
206                    seq_b
207                        .entry(tag.to_string())
208                        .or_insert_with(Vec::new)
209                        .push((value.clone(), *pos));
210                }
211            } else {
212                // No sequence C, everything after sequence B start is sequence B
213                seq_b
214                    .entry(tag.to_string())
215                    .or_insert_with(Vec::new)
216                    .push((value.clone(), *pos));
217            }
218        } else {
219            // No sequence B found, everything is sequence A
220            seq_a
221                .entry(tag.to_string())
222                .or_insert_with(Vec::new)
223                .push((value.clone(), *pos));
224        }
225    }
226
227    Ok(ParsedSequences {
228        sequence_a: seq_a,
229        sequence_b: seq_b,
230        sequence_c: seq_c,
231    })
232}
233
234/// Parse repetitive sequence items (like transactions)
235pub fn parse_repetitive_sequence<T>(fields: &FieldMap, marker_field: &str) -> Result<Vec<FieldMap>>
236where
237    T: crate::SwiftMessageBody,
238{
239    let mut items = Vec::new();
240
241    // Get all fields sorted by position
242    let mut all_fields: Vec<(String, String, usize)> = Vec::new();
243    for (tag, values) in fields {
244        for (value, pos) in values {
245            all_fields.push((tag.clone(), value.clone(), *pos));
246        }
247    }
248    all_fields.sort_by_key(|(_, _, pos)| *pos);
249
250    // Group fields by item (each starting with marker field)
251    let mut current_item_fields: HashMap<String, Vec<(String, usize)>> = HashMap::new();
252    let mut in_item = false;
253
254    for (tag, value, pos) in all_fields {
255        // Check if this is the start of a new item
256        if is_sequence_b_marker(&tag, marker_field) {
257            // Save previous item if exists
258            if in_item && !current_item_fields.is_empty() {
259                items.push(current_item_fields.clone());
260                current_item_fields.clear();
261            }
262            in_item = true;
263        }
264
265        // Add field to current item if we're in one
266        if in_item {
267            current_item_fields
268                .entry(tag)
269                .or_default()
270                .push((value, pos));
271        }
272    }
273
274    // Save the last item
275    if in_item && !current_item_fields.is_empty() {
276        items.push(current_item_fields);
277    }
278
279    Ok(items)
280}
281
282/// Check if a field tag is a sequence B marker
283fn is_sequence_b_marker(tag: &str, marker: &str) -> bool {
284    // Handle simple markers like "21"
285    if tag == marker {
286        return true;
287    }
288
289    // Handle numbered markers (e.g., "21" but not "21R", "21C", etc.)
290    if marker == "21" && tag == "21" {
291        return true;
292    }
293
294    false
295}
296
297/// Get sequence configuration for a specific message type
298pub fn get_sequence_config(message_type: &str) -> SequenceConfig {
299    match message_type {
300        "MT101" => SequenceConfig {
301            sequence_b_marker: "21".to_string(),
302            sequence_c_fields: vec![],
303            has_sequence_c: false,
304        },
305        "MT104" => SequenceConfig {
306            sequence_b_marker: "21".to_string(),
307            sequence_c_fields: vec![
308                "32B".to_string(),
309                "19".to_string(),
310                "71F".to_string(),
311                "71G".to_string(),
312                "53".to_string(),
313            ],
314            has_sequence_c: true,
315        },
316        "MT107" => SequenceConfig {
317            sequence_b_marker: "21".to_string(),
318            sequence_c_fields: vec![],
319            has_sequence_c: false,
320        },
321        "MT204" => SequenceConfig {
322            // Note: MT204 has special handling since field 20 appears in both sequences
323            // The first 20 is for sequence A, subsequent 20s are for sequence B transactions
324            sequence_b_marker: "20".to_string(),
325            sequence_c_fields: vec![],
326            has_sequence_c: false,
327        },
328        _ => SequenceConfig::default(),
329    }
330}