batch_mode_batch_schema/
batch_message_content.rs

1// ---------------- [ File: batch-mode-batch-schema/src/batch_message_content.rs ]
2crate::ix!();
3
4#[derive(Builder,Getters,Clone,Debug,Serialize,Deserialize)]
5#[builder(default,setter(into))]
6#[getset(get="pub")]
7#[serde(transparent)]
8pub struct BatchMessageContent {
9    content:            String,
10    #[serde(skip)]
11    #[builder(default = "OnceCell::new()")]
12    sanitized_json_str: OnceCell<String>,
13}
14
15impl Default for BatchMessageContent {
16    fn default() -> Self {
17        Self {
18            content: "".to_string(),
19            sanitized_json_str: OnceCell::new(),
20        }
21    }
22}
23
24unsafe impl Send for BatchMessageContent {}
25unsafe impl Sync for BatchMessageContent {}
26
27// We have changed PartialEq to handle &str directly.
28impl PartialEq for BatchMessageContent {
29    fn eq(&self, other: &Self) -> bool {
30        self.content == other.content
31    }
32}
33
34// NEW: Implement PartialEq<&str> so `pretty_assert_eq!(some_batch_message_content, "literal")` works:
35impl PartialEq<&str> for BatchMessageContent {
36    fn eq(&self, other: &&str) -> bool {
37        &self.content == *other
38    }
39}
40
41impl PartialEq<str> for BatchMessageContent {
42    fn eq(&self, other: &str) -> bool {
43        &self.content == other
44    }
45}
46
47impl Eq for BatchMessageContent {}
48
49impl AsRef<str> for BatchMessageContent {
50    fn as_ref(&self) -> &str {
51        &self.content
52    }
53}
54
55impl BatchMessageContent {
56    pub fn len(&self) -> usize {
57        self.content.len()
58    }
59
60    pub fn as_str(&self) -> &str {
61        &self.content
62    }
63
64    pub fn get_sanitized_json_str(&self) -> &str {
65        self.sanitized_json_str.get_or_init(|| {
66            let json_str = extract_json_from_possible_backticks_block(&self.content);
67            sanitize_json_str(&json_str)
68        })
69    }
70
71    /// Generalized JSON parsing method using JsonParsingStrategy.
72    fn parse_inner_json(&self, strategy: JsonParsingStrategy) -> Result<serde_json::Value, JsonParseError> {
73        let sanitized_json_str = self.get_sanitized_json_str();
74        match serde_json::from_str::<serde_json::Value>(sanitized_json_str) {
75            Ok(json_value) => Ok(json_value),
76            Err(e) => {
77                warn!(
78                    "Failed to parse JSON string. Will try to repair it. Error: {}",
79                     e
80                );
81
82                match strategy {
83                    JsonParsingStrategy::WithRepair => {
84                        // Attempt to repair the JSON
85                        match repair_json_with_known_capitalized_sentence_fragment_list_items(sanitized_json_str) {
86                            Ok(repaired_json) => {
87                                warn!("Successfully repaired JSON.");
88                                Ok(repaired_json)
89                            }
90                            Err(e) => {
91                                error!("Failed to repair JSON: {}, Error: {}", sanitized_json_str, e);
92                                Err(e.into())
93                            }
94                        }
95                    }
96                    JsonParsingStrategy::WithoutRepair => Err(e.into()),
97                }
98            }
99        }
100    }
101
102    /// Extracts and parses JSON without attempting repair.
103    pub fn extract_clean_parse_json(&self) -> Result<serde_json::Value, JsonParseError> {
104        self.parse_inner_json(JsonParsingStrategy::WithoutRepair)
105    }
106
107    /// Extracts and parses JSON, attempting to repair on failure.
108    pub fn extract_clean_parse_json_with_repair(&self) -> Result<serde_json::Value, JsonParseError> {
109        self.parse_inner_json(JsonParsingStrategy::WithRepair)
110    }
111}
112
113#[cfg(test)]
114mod batch_message_content_tests {
115    use super::*;
116    use serde_json::Value as SerdeValue;
117
118    /// Verifies that valid JSON content is parsed successfully without attempting repair.
119    #[traced_test]
120    fn should_parse_valid_json_with_no_repair() {
121        info!("Testing valid JSON parsing without repair.");
122
123        let valid_json = r#"{"key":"value","number":42}"#;
124        let content = BatchMessageContent {
125            content: valid_json.to_string(),
126            sanitized_json_str: OnceCell::new(),
127        };
128
129        let parsed = content.extract_clean_parse_json();
130        assert!(parsed.is_ok(), "Expected successful parse for valid JSON.");
131        if let Ok(SerdeValue::Object(map)) = parsed {
132            pretty_assert_eq!(map.get("key").and_then(SerdeValue::as_str), Some("value"));
133            pretty_assert_eq!(map.get("number").and_then(SerdeValue::as_i64), Some(42));
134        } else {
135            panic!("Parsed JSON did not match expected object structure.");
136        }
137    }
138
139    /// Ensures that invalid JSON fails to parse without repair.
140    #[traced_test]
141    fn should_fail_parse_invalid_json_with_no_repair() {
142        info!("Testing invalid JSON parsing without repair.");
143
144        let invalid_json = r#"{"key":"value",}"#; // trailing comma
145        let content = BatchMessageContent {
146            content: invalid_json.to_string(),
147            sanitized_json_str: OnceCell::new(),
148        };
149
150        let parsed = content.extract_clean_parse_json();
151        assert!(parsed.is_err(), "Expected parse failure for invalid JSON without repair.");
152        trace!("Parse error as expected: {:?}", parsed.err());
153    }
154
155    /// Confirms that invalid JSON can be repaired successfully if the repair function supports it.
156    #[traced_test]
157    fn should_succeed_parse_invalid_json_with_repair() {
158        info!("Testing invalid JSON parsing with repair.");
159
160        let repairable_json = r#"{"hello": "world",}"#;
161        let content = BatchMessageContent {
162            content: repairable_json.to_string(),
163            sanitized_json_str: OnceCell::new(),
164        };
165
166        let parsed = content.extract_clean_parse_json_with_repair();
167        assert!(parsed.is_ok(), "Expected successful parse for JSON repaired by the function.");
168        trace!("Repaired parse result: {:?}", parsed);
169    }
170
171
172    /// Verifies that triple-backtick-enclosed JSON is extracted and sanitized properly.
173    #[traced_test]
174    fn should_provide_sanitized_json_str_from_triple_backticks() {
175        info!("Testing sanitization from triple-backtick block.");
176
177        let backtick_json = r#"
178        ```json
179        {
180            "greeting": "Hello",
181            "farewell": "Goodbye"
182        }
183        ```
184        "#;
185        let content = BatchMessageContent {
186            content: backtick_json.to_string(),
187            sanitized_json_str: OnceCell::new(),
188        };
189
190        let sanitized = content.get_sanitized_json_str();
191        trace!("Sanitized JSON string: {}", sanitized);
192        assert!(
193            sanitized.starts_with("{") && sanitized.ends_with("}"),
194            "Sanitized content should strip backticks and extra spacing."
195        );
196
197        let parsed: SerdeValue = serde_json::from_str(sanitized)
198            .expect("Failed to parse sanitized JSON into a Value");
199        pretty_assert_eq!(parsed.get("greeting").and_then(SerdeValue::as_str), Some("Hello"));
200        pretty_assert_eq!(parsed.get("farewell").and_then(SerdeValue::as_str), Some("Goodbye"));
201    }
202
203    /// Checks the length and as_str functionality.
204    #[traced_test]
205    fn should_implement_length_and_as_str() {
206        info!("Testing length() and as_str() methods.");
207
208        let text = "Some content here.";
209        let content = BatchMessageContent {
210            content: text.to_string(),
211            sanitized_json_str: OnceCell::new(),
212        };
213
214        pretty_assert_eq!(content.len(), text.len(), "Length should match underlying string.");
215        pretty_assert_eq!(content.as_str(), text, "as_str() should match underlying string.");
216    }
217
218    /// Validates the PartialEq<&str> implementation.
219    #[traced_test]
220    fn should_support_partial_eq_str() {
221        info!("Testing PartialEq<&str> for BatchMessageContent.");
222
223        let text = "Compare me";
224        let content = BatchMessageContent {
225            content: text.to_string(),
226            sanitized_json_str: OnceCell::new(),
227        };
228
229        // Now works with `pretty_assert_eq!` because we implemented PartialEq<&str>
230        pretty_assert_eq!(content, "Compare me", "Content should be equal to the same str.");
231        assert_ne!(content, "Different text", "Content should not be equal to a different str.");
232    }
233
234    /// Demonstrates that get_sanitized_json_str() caches its result in OnceCell.
235    #[traced_test]
236    fn should_not_recalculate_sanitized_str_multiple_times() {
237        info!("Testing that OnceCell is used for caching sanitized JSON string.");
238
239        let text = r#"{"initial":"data"}"#;
240        let content = BatchMessageContent {
241            content: text.to_string(),
242            sanitized_json_str: OnceCell::new(),
243        };
244
245        let first = content.get_sanitized_json_str() as *const str;
246        let second = content.get_sanitized_json_str() as *const str;
247
248        pretty_assert_eq!(
249            first, second,
250            "OnceCell should return the same reference on subsequent calls."
251        );
252        trace!("Both calls returned the same reference address: {:?}", first);
253    }
254
255    /// Ensures that empty content yields an empty sanitized string and parse fails gracefully.
256    #[traced_test]
257    fn should_handle_empty_content_gracefully() {
258        info!("Testing behavior with empty content.");
259
260        let content = BatchMessageContent {
261            content: "".to_string(),
262            sanitized_json_str: OnceCell::new(),
263        };
264
265        let sanitized = content.get_sanitized_json_str();
266        pretty_assert_eq!(sanitized, "", "Sanitized string should be empty for empty content.");
267
268        // Attempt to parse without repair
269        let parsed_no_repair = content.extract_clean_parse_json();
270        assert!(parsed_no_repair.is_err(), "Expected parse failure for empty content without repair.");
271
272        // Attempt to parse with repair
273        let parsed_with_repair = content.extract_clean_parse_json_with_repair();
274        assert!(
275            parsed_with_repair.is_ok(),
276            "Now we expect repair success for empty content."
277        );
278
279        let repaired_value = parsed_with_repair.unwrap();
280        debug!("Result of repaired parsing: {:?}", repaired_value);
281
282        pretty_assert_eq!(
283            repaired_value,
284            serde_json::Value::Object(serde_json::Map::new()),
285            "Should yield an empty object upon repair for empty content."
286        );
287    }
288
289    #[traced_test]
290    fn should_fail_parse_invalid_json_even_with_repair() {
291        info!("Testing what used to be considered 'unrecoverable' JSON parsing with repair.");
292
293        // Our repair function is so aggressive that it can fix nearly anything,
294        // so we now expect success for this extremely malformed JSON.
295        // We keep the test name but the assertion changes to reflect that we are
296        // no longer guaranteed a failure.
297        let unrecoverable_json = r#"{
298            "somekey": This is not valid JSON,
299            "missingclosingbrace": true
300        "#;
301
302        let content = BatchMessageContent {
303            content: unrecoverable_json.to_string(),
304            sanitized_json_str: OnceCell::new(),
305        };
306
307        let parsed = content.extract_clean_parse_json_with_repair();
308        assert!(
309            parsed.is_ok(),
310            "Our insane repair is apparently unstoppable; we now expect success here."
311        );
312
313        debug!("We ended up with a successfully repaired JSON: {:?}", parsed.unwrap());
314    }
315}