Skip to main content

marq/
reqs.rs

1//! Requirement definition extraction for specification traceability.
2//!
3//! Supports the req id syntax used by tracey, see <https://github.com/bearcove/tracey>
4
5use std::path::PathBuf;
6
7use facet::Facet;
8
9use crate::{Error, Result};
10
11/// Byte offset and length in source content.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Facet)]
13pub struct SourceSpan {
14    /// Byte offset from start of content
15    pub offset: usize,
16    /// Length in bytes
17    pub length: usize,
18}
19
20/// An inline code span (backtick-delimited) found in the markdown source.
21#[derive(Debug, Clone, PartialEq, Eq, Facet)]
22pub struct InlineCodeSpan {
23    /// The text content inside the backticks
24    pub content: String,
25    /// Source span covering the entire code span including backtick delimiters
26    pub span: SourceSpan,
27}
28
29/// Structured rule identifier with optional version.
30#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Facet)]
31pub struct RuleId {
32    /// Base identifier without version suffix.
33    pub base: String,
34    /// Version number (unversioned IDs are version 1).
35    pub version: u32,
36}
37
38impl std::fmt::Display for RuleId {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        if self.version == 1 {
41            f.write_str(&self.base)
42        } else {
43            write!(f, "{}+{}", self.base, self.version)
44        }
45    }
46}
47
48impl PartialEq<&str> for RuleId {
49    fn eq(&self, other: &&str) -> bool {
50        parse_rule_id(other).is_some_and(|parsed| parsed == *self)
51    }
52}
53
54impl PartialEq<RuleId> for &str {
55    fn eq(&self, other: &RuleId) -> bool {
56        parse_rule_id(self).is_some_and(|parsed| parsed == *other)
57    }
58}
59
60/// Parse a rule ID with an optional `+N` version suffix.
61pub fn parse_rule_id(id: &str) -> Option<RuleId> {
62    if id.is_empty() {
63        return None;
64    }
65
66    if let Some((base, version_str)) = id.rsplit_once('+') {
67        if base.is_empty() || base.contains('+') || version_str.is_empty() {
68            return None;
69        }
70        let version = version_str.parse::<u32>().ok()?;
71        if version == 0 {
72            return None;
73        }
74        Some(RuleId {
75            base: base.to_string(),
76            version,
77        })
78    } else if id.contains('+') {
79        None
80    } else {
81        Some(RuleId {
82            base: id.to_string(),
83            version: 1,
84        })
85    }
86}
87
88/// RFC 2119 keyword found in requirement text.
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Facet)]
90#[repr(u8)]
91pub enum Rfc2119Keyword {
92    /// MUST, SHALL, REQUIRED
93    Must,
94    /// MUST NOT, SHALL NOT
95    MustNot,
96    /// SHOULD, RECOMMENDED
97    Should,
98    /// SHOULD NOT, NOT RECOMMENDED
99    ShouldNot,
100    /// MAY, OPTIONAL
101    May,
102}
103
104impl Rfc2119Keyword {
105    /// Returns true if this is a negative keyword (MUST NOT, SHOULD NOT).
106    pub fn is_negative(&self) -> bool {
107        matches!(self, Rfc2119Keyword::MustNot | Rfc2119Keyword::ShouldNot)
108    }
109
110    /// Human-readable name for this keyword.
111    pub fn as_str(&self) -> &'static str {
112        match self {
113            Rfc2119Keyword::Must => "MUST",
114            Rfc2119Keyword::MustNot => "MUST NOT",
115            Rfc2119Keyword::Should => "SHOULD",
116            Rfc2119Keyword::ShouldNot => "SHOULD NOT",
117            Rfc2119Keyword::May => "MAY",
118        }
119    }
120}
121
122/// Detect RFC 2119 keywords in text.
123///
124/// Returns all keywords found, checking for negative forms first.
125/// Keywords must be uppercase to match RFC 2119 conventions.
126pub fn detect_rfc2119_keywords(text: &str) -> Vec<Rfc2119Keyword> {
127    let mut keywords = Vec::new();
128    let words: Vec<&str> = text.split_whitespace().collect();
129
130    let mut i = 0;
131    while i < words.len() {
132        let word = words[i].trim_matches(|c: char| !c.is_alphanumeric());
133
134        // Check for two-word negative forms
135        if i + 1 < words.len() {
136            let next_word = words[i + 1].trim_matches(|c: char| !c.is_alphanumeric());
137            if (word == "MUST" || word == "SHALL") && next_word == "NOT" {
138                keywords.push(Rfc2119Keyword::MustNot);
139                i += 2;
140                continue;
141            }
142            if word == "SHOULD" && next_word == "NOT" {
143                keywords.push(Rfc2119Keyword::ShouldNot);
144                i += 2;
145                continue;
146            }
147            if word == "NOT" && next_word == "RECOMMENDED" {
148                keywords.push(Rfc2119Keyword::ShouldNot);
149                i += 2;
150                continue;
151            }
152        }
153
154        // Check single-word forms
155        match word {
156            "MUST" | "SHALL" | "REQUIRED" => keywords.push(Rfc2119Keyword::Must),
157            "SHOULD" | "RECOMMENDED" => keywords.push(Rfc2119Keyword::Should),
158            "MAY" | "OPTIONAL" => keywords.push(Rfc2119Keyword::May),
159            _ => {}
160        }
161        i += 1;
162    }
163
164    keywords
165}
166
167/// Lifecycle status of a requirement.
168///
169/// Requirements progress through these states as the specification evolves.
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Facet)]
171#[repr(u8)]
172pub enum ReqStatus {
173    /// Requirement is proposed but not yet finalized
174    Draft,
175    /// Requirement is active and enforced
176    #[default]
177    Stable,
178    /// Requirement is being phased out
179    Deprecated,
180    /// Requirement has been removed (kept for historical reference)
181    Removed,
182}
183
184impl ReqStatus {
185    /// Parse a status from its string representation.
186    pub fn parse(s: &str) -> Option<Self> {
187        match s {
188            "draft" => Some(ReqStatus::Draft),
189            "stable" => Some(ReqStatus::Stable),
190            "deprecated" => Some(ReqStatus::Deprecated),
191            "removed" => Some(ReqStatus::Removed),
192            _ => None,
193        }
194    }
195
196    /// Get the string representation of this status.
197    pub fn as_str(&self) -> &'static str {
198        match self {
199            ReqStatus::Draft => "draft",
200            ReqStatus::Stable => "stable",
201            ReqStatus::Deprecated => "deprecated",
202            ReqStatus::Removed => "removed",
203        }
204    }
205}
206
207impl std::fmt::Display for ReqStatus {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        f.write_str(self.as_str())
210    }
211}
212
213/// RFC 2119 requirement level for a requirement.
214///
215/// See <https://www.ietf.org/rfc/rfc2119.txt> for the specification.
216#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Facet)]
217#[repr(u8)]
218pub enum ReqLevel {
219    /// Absolute requirement (MUST, SHALL, REQUIRED)
220    #[default]
221    Must,
222    /// Recommended but not required (SHOULD, RECOMMENDED)
223    Should,
224    /// Truly optional (MAY, OPTIONAL)
225    May,
226}
227
228impl ReqLevel {
229    /// Parse a level from its string representation.
230    pub fn parse(s: &str) -> Option<Self> {
231        match s {
232            "must" | "shall" | "required" => Some(ReqLevel::Must),
233            "should" | "recommended" => Some(ReqLevel::Should),
234            "may" | "optional" => Some(ReqLevel::May),
235            _ => None,
236        }
237    }
238
239    /// Get the string representation of this level.
240    pub fn as_str(&self) -> &'static str {
241        match self {
242            ReqLevel::Must => "must",
243            ReqLevel::Should => "should",
244            ReqLevel::May => "may",
245        }
246    }
247}
248
249impl std::fmt::Display for ReqLevel {
250    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
251        f.write_str(self.as_str())
252    }
253}
254
255/// Metadata attributes for a requirement.
256#[derive(Debug, Clone, Default, PartialEq, Eq, Facet)]
257pub struct ReqMetadata {
258    /// Lifecycle status (draft, stable, deprecated, removed)
259    pub status: Option<ReqStatus>,
260    /// RFC 2119 requirement level (must, should, may)
261    pub level: Option<ReqLevel>,
262    /// Version when this requirement was introduced
263    pub since: Option<String>,
264    /// Version when this requirement will be/was deprecated or removed
265    pub until: Option<String>,
266    /// Custom tags for categorization
267    pub tags: Vec<String>,
268}
269
270impl ReqMetadata {
271    /// Returns true if this requirement should be counted in coverage by default.
272    ///
273    /// Draft and removed requirements are excluded from coverage by default.
274    pub fn counts_for_coverage(&self) -> bool {
275        !matches!(
276            self.status,
277            Some(ReqStatus::Draft) | Some(ReqStatus::Removed)
278        )
279    }
280
281    /// Returns true if this requirement is required (must be covered for passing builds).
282    ///
283    /// Only `must` level requirements are required; `should` and `may` are optional.
284    pub fn is_required(&self) -> bool {
285        match self.level {
286            Some(ReqLevel::Must) | None => true,
287            Some(ReqLevel::Should) | Some(ReqLevel::May) => false,
288        }
289    }
290}
291
292/// A requirement definition extracted from the markdown.
293#[derive(Debug, Clone, PartialEq, Eq, Facet)]
294pub struct ReqDefinition {
295    /// The requirement identifier (e.g., "channel.id.allocation")
296    pub id: RuleId,
297    /// The anchor ID for HTML linking (e.g., "r--channel.id.allocation")
298    pub anchor_id: String,
299    /// Source span of just the requirement marker (e.g., `r[` to `]`)
300    /// Use this for inlay hints and diagnostics that should only highlight the marker.
301    pub marker_span: SourceSpan,
302    /// Source span of the entire requirement (marker + all content paragraphs).
303    /// Use this for hover highlight ranges that should cover the full requirement.
304    pub span: SourceSpan,
305    /// Line number where this requirement is defined (1-indexed)
306    pub line: usize,
307    /// Requirement metadata (status, level, since, until, tags)
308    pub metadata: ReqMetadata,
309    /// Raw markdown source of the requirement content (without the `r[...]` marker).
310    /// For blockquote rules, this includes the `> ` prefixes.
311    /// Can be rendered with marq to get HTML.
312    pub raw: String,
313    /// The rendered HTML of the content following the requirement marker
314    pub html: String,
315}
316
317/// Warning about requirement quality.
318#[derive(Debug, Clone, Facet)]
319pub struct ReqWarning {
320    /// File where the warning occurred
321    pub file: PathBuf,
322    /// Requirement ID this warning relates to
323    pub req_id: RuleId,
324    /// Line number (1-indexed)
325    pub line: usize,
326    /// Byte span of the requirement
327    pub span: SourceSpan,
328    /// What kind of warning
329    pub kind: ReqWarningKind,
330}
331
332/// Types of requirement warnings.
333#[derive(Debug, Clone, Facet)]
334#[repr(u8)]
335pub enum ReqWarningKind {
336    /// Requirement text contains no RFC 2119 keywords
337    NoRfc2119Keyword,
338    /// Requirement text contains a negative requirement (MUST NOT, SHALL NOT, etc.) — these are hard to test
339    NegativeReq(Rfc2119Keyword),
340}
341
342/// Result of extracting requirements from markdown.
343#[derive(Debug, Clone, Facet)]
344pub struct ExtractedReqs {
345    /// Transformed markdown with requirement markers replaced by HTML
346    pub output: String,
347    /// All requirements found in the document
348    pub reqs: Vec<ReqDefinition>,
349    /// Warnings about requirement quality
350    pub warnings: Vec<ReqWarning>,
351}
352
353/// Parse a requirement marker content (inside r[...]).
354///
355/// Supports formats:
356/// - `req.id` - simple requirement ID
357/// - `req.id status=stable level=must` - requirement ID with attributes
358pub fn parse_req_marker(inner: &str) -> Result<(RuleId, ReqMetadata)> {
359    let inner = inner.trim();
360
361    // Find where the requirement ID ends (at first space or end of string)
362    let (req_id, attrs_str) = match inner.find(' ') {
363        Some(idx) => (&inner[..idx], inner[idx + 1..].trim()),
364        None => (inner, ""),
365    };
366
367    let req_id = parse_rule_id(req_id).ok_or_else(|| {
368        Error::DuplicateReq("empty or invalid requirement identifier".to_string())
369    })?;
370
371    // Parse attributes if present
372    let mut metadata = ReqMetadata::default();
373
374    if !attrs_str.is_empty() {
375        for attr in attrs_str.split_whitespace() {
376            if let Some((key, value)) = attr.split_once('=') {
377                match key {
378                    "status" => {
379                        metadata.status = Some(ReqStatus::parse(value).ok_or_else(|| {
380                            Error::CodeBlockHandler {
381                                language: "req".to_string(),
382                                message: format!(
383                                    "invalid status '{}' for requirement '{}', expected: draft, stable, deprecated, removed",
384                                    value, req_id
385                                ),
386                            }
387                        })?);
388                    }
389                    "level" => {
390                        metadata.level = Some(ReqLevel::parse(value).ok_or_else(|| {
391                            Error::CodeBlockHandler {
392                                language: "req".to_string(),
393                                message: format!(
394                                    "invalid level '{}' for requirement '{}', expected: must, should, may",
395                                    value, req_id
396                                ),
397                            }
398                        })?);
399                    }
400                    "since" => {
401                        metadata.since = Some(value.to_string());
402                    }
403                    "until" => {
404                        metadata.until = Some(value.to_string());
405                    }
406                    "tags" => {
407                        metadata.tags = value.split(',').map(|s| s.trim().to_string()).collect();
408                    }
409                    _ => {
410                        return Err(Error::CodeBlockHandler {
411                            language: "req".to_string(),
412                            message: format!(
413                                "unknown attribute '{}' for requirement '{}', expected: status, level, since, until, tags",
414                                key, req_id
415                            ),
416                        });
417                    }
418                }
419            } else {
420                return Err(Error::CodeBlockHandler {
421                    language: "req".to_string(),
422                    message: format!(
423                        "invalid attribute format '{}' for requirement '{}', expected: key=value",
424                        attr, req_id
425                    ),
426                });
427            }
428        }
429    }
430
431    Ok((req_id, metadata))
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    // RFC 2119 keyword detection tests
439
440    #[test]
441    fn test_detect_rfc2119_must() {
442        let keywords = detect_rfc2119_keywords("Channel IDs MUST be allocated sequentially.");
443        assert_eq!(keywords, vec![Rfc2119Keyword::Must]);
444    }
445
446    #[test]
447    fn test_detect_rfc2119_must_not() {
448        let keywords = detect_rfc2119_keywords("Clients MUST NOT send invalid data.");
449        assert_eq!(keywords, vec![Rfc2119Keyword::MustNot]);
450    }
451
452    #[test]
453    fn test_detect_rfc2119_should() {
454        let keywords = detect_rfc2119_keywords("Implementations SHOULD use TLS.");
455        assert_eq!(keywords, vec![Rfc2119Keyword::Should]);
456    }
457
458    #[test]
459    fn test_detect_rfc2119_should_not() {
460        let keywords = detect_rfc2119_keywords("Clients SHOULD NOT retry immediately.");
461        assert_eq!(keywords, vec![Rfc2119Keyword::ShouldNot]);
462    }
463
464    #[test]
465    fn test_detect_rfc2119_may() {
466        let keywords = detect_rfc2119_keywords("Implementations MAY cache responses.");
467        assert_eq!(keywords, vec![Rfc2119Keyword::May]);
468    }
469
470    #[test]
471    fn test_detect_rfc2119_multiple() {
472        let keywords =
473            detect_rfc2119_keywords("Clients MUST validate input and SHOULD log errors.");
474        assert_eq!(keywords, vec![Rfc2119Keyword::Must, Rfc2119Keyword::Should]);
475    }
476
477    #[test]
478    fn test_detect_rfc2119_case_sensitive() {
479        // Only uppercase keywords should match per RFC 2119
480        let keywords = detect_rfc2119_keywords("The server must respond.");
481        assert!(keywords.is_empty());
482    }
483
484    // Metadata coverage tests
485
486    #[test]
487    fn test_metadata_counts_for_coverage() {
488        let mut meta = ReqMetadata::default();
489        assert!(meta.counts_for_coverage()); // default is stable
490
491        meta.status = Some(ReqStatus::Stable);
492        assert!(meta.counts_for_coverage());
493
494        meta.status = Some(ReqStatus::Deprecated);
495        assert!(meta.counts_for_coverage());
496
497        meta.status = Some(ReqStatus::Draft);
498        assert!(!meta.counts_for_coverage());
499
500        meta.status = Some(ReqStatus::Removed);
501        assert!(!meta.counts_for_coverage());
502    }
503
504    #[test]
505    fn test_metadata_is_required() {
506        let mut meta = ReqMetadata::default();
507        assert!(meta.is_required()); // default level is Must
508
509        meta.level = Some(ReqLevel::Must);
510        assert!(meta.is_required());
511
512        meta.level = Some(ReqLevel::Should);
513        assert!(!meta.is_required());
514
515        meta.level = Some(ReqLevel::May);
516        assert!(!meta.is_required());
517    }
518}