ant_quic/compliance_validator/
rfc_parser.rs

1/// RFC Parser Module
2///
3/// Parses IETF RFC documents and extracts compliance requirements
4use super::{ComplianceRequirement, RequirementCategory, RequirementLevel, ValidationError};
5use regex::Regex;
6use std::fs;
7use std::path::Path;
8
9/// Parser for RFC documents
10pub struct RfcParser {
11    /// Regex patterns for requirement extraction
12    must_pattern: Regex,
13    must_not_pattern: Regex,
14    should_pattern: Regex,
15    should_not_pattern: Regex,
16    may_pattern: Regex,
17}
18
19impl Default for RfcParser {
20    fn default() -> Self {
21        Self::new()
22    }
23}
24
25impl RfcParser {
26    /// Create a new RFC parser
27    pub fn new() -> Self {
28        Self {
29            // RFC 2119 keywords - match whole words with word boundaries
30            must_pattern: Regex::new(r"\b(MUST|SHALL|REQUIRED)\b").unwrap(),
31            must_not_pattern: Regex::new(r"\b(MUST NOT|SHALL NOT)\b").unwrap(),
32            should_pattern: Regex::new(r"\b(SHOULD|RECOMMENDED)\b").unwrap(),
33            should_not_pattern: Regex::new(r"\b(SHOULD NOT|NOT RECOMMENDED)\b").unwrap(),
34            may_pattern: Regex::new(r"\b(MAY|OPTIONAL)\b").unwrap(),
35        }
36    }
37
38    /// Parse an RFC file and extract requirements
39    pub fn parse_file(&self, path: &Path) -> Result<Vec<ComplianceRequirement>, ValidationError> {
40        let content = fs::read_to_string(path)?;
41        let spec_id = self.extract_spec_id(path)?;
42
43        Ok(self.parse_content(&content, &spec_id))
44    }
45
46    /// Parse RFC content and extract requirements
47    pub fn parse_content(&self, content: &str, spec_id: &str) -> Vec<ComplianceRequirement> {
48        let mut requirements = Vec::new();
49
50        // Split into sections
51        let sections = self.split_into_sections(content);
52
53        for (section_num, section_content) in sections {
54            // Extract requirements from each section
55            let section_reqs =
56                self.extract_requirements_from_section(spec_id, &section_num, &section_content);
57            requirements.extend(section_reqs);
58        }
59
60        requirements
61    }
62
63    /// Split RFC content into sections
64    fn split_into_sections(&self, content: &str) -> Vec<(String, String)> {
65        let mut sections = Vec::new();
66        let section_regex = Regex::new(r"(?m)^(\d+(?:\.\d+)*)\s+(.+)$").unwrap();
67
68        let mut current_section = String::new();
69        let mut current_content = String::new();
70
71        for line in content.lines() {
72            if let Some(captures) = section_regex.captures(line) {
73                // Found new section
74                if !current_section.is_empty() {
75                    sections.push((current_section.clone(), current_content.clone()));
76                }
77                current_section = captures[1].to_string();
78                current_content = String::new();
79            } else {
80                current_content.push_str(line);
81                current_content.push('\n');
82            }
83        }
84
85        // Add last section
86        if !current_section.is_empty() {
87            sections.push((current_section, current_content));
88        }
89
90        sections
91    }
92
93    /// Extract requirements from a section
94    fn extract_requirements_from_section(
95        &self,
96        spec_id: &str,
97        section: &str,
98        content: &str,
99    ) -> Vec<ComplianceRequirement> {
100        let mut requirements = Vec::new();
101
102        // Split into sentences for better requirement extraction
103        let sentences = self.split_into_sentences(content);
104
105        for sentence in sentences {
106            if let Some(req) = self.extract_requirement_from_sentence(spec_id, section, &sentence) {
107                requirements.push(req);
108            }
109        }
110
111        requirements
112    }
113
114    /// Split text into sentences
115    fn split_into_sentences(&self, text: &str) -> Vec<String> {
116        // Simple sentence splitter - can be improved
117        let sentence_regex = Regex::new(r"[.!?]+\s+").unwrap();
118        sentence_regex
119            .split(text)
120            .map(|s| s.trim().to_string())
121            .filter(|s| !s.is_empty())
122            .collect()
123    }
124
125    /// Extract requirement from a sentence
126    fn extract_requirement_from_sentence(
127        &self,
128        spec_id: &str,
129        section: &str,
130        sentence: &str,
131    ) -> Option<ComplianceRequirement> {
132        // Check for requirement keywords
133        let level = if self.must_not_pattern.is_match(sentence) {
134            RequirementLevel::MustNot
135        } else if self.should_not_pattern.is_match(sentence) {
136            RequirementLevel::ShouldNot
137        } else if self.must_pattern.is_match(sentence) {
138            RequirementLevel::Must
139        } else if self.should_pattern.is_match(sentence) {
140            RequirementLevel::Should
141        } else if self.may_pattern.is_match(sentence) {
142            RequirementLevel::May
143        } else {
144            return None;
145        };
146
147        // Categorize the requirement
148        let category = self.categorize_requirement(sentence);
149
150        Some(ComplianceRequirement {
151            spec_id: spec_id.to_string(),
152            section: section.to_string(),
153            level,
154            description: sentence.to_string(),
155            category,
156        })
157    }
158
159    /// Categorize requirement based on content
160    fn categorize_requirement(&self, description: &str) -> RequirementCategory {
161        let lower = description.to_lowercase();
162
163        if lower.contains("transport parameter") || lower.contains("transport_parameter") {
164            RequirementCategory::TransportParameters
165        } else if lower.contains("frame")
166            || lower.contains("encoding")
167            || lower.contains("decoding")
168        {
169            RequirementCategory::FrameFormat
170        } else if lower.contains("nat")
171            || lower.contains("traversal")
172            || lower.contains("hole punch")
173        {
174            RequirementCategory::NatTraversal
175        } else if lower.contains("address") && lower.contains("discovery") {
176            RequirementCategory::AddressDiscovery
177        } else if lower.contains("error") || lower.contains("close") || lower.contains("reset") {
178            RequirementCategory::ErrorHandling
179        } else if lower.contains("crypto")
180            || lower.contains("security")
181            || lower.contains("authentication")
182        {
183            RequirementCategory::Security
184        } else if lower.contains("connection")
185            || lower.contains("handshake")
186            || lower.contains("establishment")
187        {
188            RequirementCategory::ConnectionEstablishment
189        } else if lower.contains("performance")
190            || lower.contains("throughput")
191            || lower.contains("latency")
192        {
193            RequirementCategory::Performance
194        } else {
195            RequirementCategory::Transport
196        }
197    }
198
199    /// Extract spec ID from file path
200    fn extract_spec_id(&self, path: &Path) -> Result<String, ValidationError> {
201        let filename = path
202            .file_stem()
203            .and_then(|s| s.to_str())
204            .ok_or_else(|| ValidationError::RfcParseError("Invalid file path".to_string()))?;
205
206        // Extract RFC number or draft name
207        if filename.starts_with("rfc") {
208            Ok(filename.to_uppercase())
209        } else if filename.contains("draft") {
210            Ok(filename.to_string())
211        } else {
212            Ok(format!("spec-{filename}"))
213        }
214    }
215}
216
217/// Parse specific QUIC RFCs
218pub struct QuicRfcParser {
219    parser: RfcParser,
220}
221
222impl Default for QuicRfcParser {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228impl QuicRfcParser {
229    pub fn new() -> Self {
230        Self {
231            parser: RfcParser::new(),
232        }
233    }
234
235    /// Parse RFC 9000 (QUIC Transport Protocol)
236    pub fn parse_rfc9000(&self, content: &str) -> Vec<ComplianceRequirement> {
237        let mut requirements = self.parser.parse_content(content, "RFC9000");
238
239        // Add specific known requirements that might need special handling
240        self.add_rfc9000_specific_requirements(&mut requirements);
241
242        requirements
243    }
244
245    /// Parse draft-ietf-quic-address-discovery
246    pub fn parse_address_discovery_draft(&self, content: &str) -> Vec<ComplianceRequirement> {
247        let mut requirements = self
248            .parser
249            .parse_content(content, "draft-ietf-quic-address-discovery-00");
250
251        // Add specific requirements for address discovery
252        self.add_address_discovery_requirements(&mut requirements);
253
254        requirements
255    }
256
257    /// Parse draft-seemann-quic-nat-traversal
258    pub fn parse_nat_traversal_draft(&self, content: &str) -> Vec<ComplianceRequirement> {
259        let mut requirements = self
260            .parser
261            .parse_content(content, "draft-seemann-quic-nat-traversal-02");
262
263        // Add specific requirements for NAT traversal
264        self.add_nat_traversal_requirements(&mut requirements);
265
266        requirements
267    }
268
269    /// Add RFC 9000 specific requirements
270    fn add_rfc9000_specific_requirements(&self, requirements: &mut Vec<ComplianceRequirement>) {
271        // Add critical requirements that might be missed by simple pattern matching
272        requirements.push(ComplianceRequirement {
273            spec_id: "RFC9000".to_string(),
274            section: "4.1".to_string(),
275            level: RequirementLevel::Must,
276            description: "Endpoints MUST validate transport parameters during handshake"
277                .to_string(),
278            category: RequirementCategory::TransportParameters,
279        });
280
281        requirements.push(ComplianceRequirement {
282            spec_id: "RFC9000".to_string(),
283            section: "12.4".to_string(),
284            level: RequirementLevel::Must,
285            description:
286                "An endpoint MUST NOT send data on a stream without available flow control credit"
287                    .to_string(),
288            category: RequirementCategory::Transport,
289        });
290    }
291
292    /// Add address discovery specific requirements
293    fn add_address_discovery_requirements(&self, requirements: &mut Vec<ComplianceRequirement>) {
294        requirements.push(ComplianceRequirement {
295            spec_id: "draft-ietf-quic-address-discovery-00".to_string(),
296            section: "3.1".to_string(),
297            level: RequirementLevel::Must,
298            description:
299                "OBSERVED_ADDRESS frames MUST include monotonically increasing sequence numbers"
300                    .to_string(),
301            category: RequirementCategory::AddressDiscovery,
302        });
303
304        requirements.push(ComplianceRequirement {
305            spec_id: "draft-ietf-quic-address-discovery-00".to_string(),
306            section: "3.2".to_string(),
307            level: RequirementLevel::Must,
308            description:
309                "The IP version MUST be determined by the least significant bit of the frame type"
310                    .to_string(),
311            category: RequirementCategory::AddressDiscovery,
312        });
313    }
314
315    /// Add NAT traversal specific requirements
316    fn add_nat_traversal_requirements(&self, requirements: &mut Vec<ComplianceRequirement>) {
317        requirements.push(ComplianceRequirement {
318            spec_id: "draft-seemann-quic-nat-traversal-02".to_string(),
319            section: "4.1".to_string(),
320            level: RequirementLevel::Must,
321            description: "Clients MUST send empty NAT traversal transport parameter".to_string(),
322            category: RequirementCategory::NatTraversal,
323        });
324
325        requirements.push(ComplianceRequirement {
326            spec_id: "draft-seemann-quic-nat-traversal-02".to_string(),
327            section: "4.1".to_string(),
328            level: RequirementLevel::Must,
329            description: "Servers MUST send concurrency limit in NAT traversal transport parameter"
330                .to_string(),
331            category: RequirementCategory::NatTraversal,
332        });
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    #[test]
341    fn test_rfc_parser_creation() {
342        let parser = RfcParser::new();
343        assert!(parser.must_pattern.is_match("MUST implement"));
344        assert!(parser.must_not_pattern.is_match("MUST NOT send"));
345        assert!(parser.should_pattern.is_match("SHOULD use"));
346        assert!(parser.should_not_pattern.is_match("SHOULD NOT ignore"));
347        assert!(parser.may_pattern.is_match("MAY include"));
348    }
349
350    #[test]
351    fn test_requirement_extraction() {
352        let parser = RfcParser::new();
353        let sentence = "Endpoints MUST validate all received transport parameters.";
354
355        let req = parser.extract_requirement_from_sentence("RFC9000", "4.1", sentence);
356        assert!(req.is_some());
357
358        let req = req.unwrap();
359        assert_eq!(req.level, RequirementLevel::Must);
360        assert_eq!(req.category, RequirementCategory::TransportParameters);
361    }
362
363    #[test]
364    fn test_categorization() {
365        let parser = RfcParser::new();
366
367        assert_eq!(
368            parser.categorize_requirement("transport parameter validation"),
369            RequirementCategory::TransportParameters
370        );
371
372        assert_eq!(
373            parser.categorize_requirement("frame encoding rules"),
374            RequirementCategory::FrameFormat
375        );
376
377        assert_eq!(
378            parser.categorize_requirement("NAT traversal mechanism"),
379            RequirementCategory::NatTraversal
380        );
381    }
382
383    #[test]
384    fn test_sentence_splitting() {
385        let parser = RfcParser::new();
386        let text = "This is sentence one. This is sentence two! And sentence three?";
387
388        let sentences = parser.split_into_sentences(text);
389        assert_eq!(sentences.len(), 3);
390        assert_eq!(sentences[0], "This is sentence one");
391        assert_eq!(sentences[1], "This is sentence two");
392        assert_eq!(sentences[2], "And sentence three?");
393    }
394
395    #[test]
396    fn test_quic_rfc_parser() {
397        let parser = QuicRfcParser::new();
398        let content = "Endpoints MUST validate parameters. They SHOULD log errors.";
399
400        let requirements = parser.parse_rfc9000(content);
401        assert!(requirements.len() >= 2); // At least parsed + added requirements
402    }
403}