1use std::path::PathBuf;
7
8use chrono::Duration;
9use serde::de::{self, Deserializer};
10use serde::ser::Serializer;
11use serde::{Deserialize, Serialize};
12
13use crate::error::{Error, Result};
14
15const SUPPORTED_SCHEMA: &str = "clawdstrike.hunt.correlation.v1";
17
18fn format_duration(dur: &Duration) -> String {
24 let secs = dur.num_seconds();
25 if secs != 0 && secs % 86400 == 0 {
26 format!("{}d", secs / 86400)
27 } else if secs != 0 && secs % 3600 == 0 {
28 format!("{}h", secs / 3600)
29 } else if secs != 0 && secs % 60 == 0 {
30 format!("{}m", secs / 60)
31 } else {
32 format!("{secs}s")
33 }
34}
35
36fn deserialize_duration<'de, D>(deserializer: D) -> std::result::Result<Duration, D::Error>
38where
39 D: Deserializer<'de>,
40{
41 let s = String::deserialize(deserializer)?;
42 hush_core::parse_human_duration(&s)
43 .ok_or_else(|| de::Error::custom(format!("invalid duration: {s}")))
44}
45
46fn serialize_duration<S>(dur: &Duration, serializer: S) -> std::result::Result<S::Ok, S::Error>
48where
49 S: Serializer,
50{
51 serializer.serialize_str(&format_duration(dur))
52}
53
54fn deserialize_duration_opt<'de, D>(
56 deserializer: D,
57) -> std::result::Result<Option<Duration>, D::Error>
58where
59 D: Deserializer<'de>,
60{
61 let opt: Option<String> = Option::deserialize(deserializer)?;
62 match opt {
63 None => Ok(None),
64 Some(s) => {
65 let dur = hush_core::parse_human_duration(&s)
66 .ok_or_else(|| de::Error::custom(format!("invalid duration: {s}")))?;
67 Ok(Some(dur))
68 }
69 }
70}
71
72fn serialize_duration_opt<S>(
74 opt: &Option<Duration>,
75 serializer: S,
76) -> std::result::Result<S::Ok, S::Error>
77where
78 S: Serializer,
79{
80 match opt {
81 Some(dur) => serializer.serialize_some(&format_duration(dur)),
82 None => serializer.serialize_none(),
83 }
84}
85
86fn deserialize_string_or_list<'de, D>(deserializer: D) -> std::result::Result<Vec<String>, D::Error>
93where
94 D: Deserializer<'de>,
95{
96 #[derive(Deserialize)]
97 #[serde(untagged)]
98 enum StringOrList {
99 Single(String),
100 List(Vec<String>),
101 }
102 match StringOrList::deserialize(deserializer)? {
103 StringOrList::Single(s) => Ok(vec![s]),
104 StringOrList::List(v) => Ok(v),
105 }
106}
107
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
114#[serde(rename_all = "lowercase")]
115pub enum RuleSeverity {
116 Low,
117 Medium,
118 High,
119 Critical,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct RuleCondition {
125 #[serde(deserialize_with = "deserialize_string_or_list")]
127 pub source: Vec<String>,
128
129 #[serde(skip_serializing_if = "Option::is_none")]
131 pub action_type: Option<String>,
132
133 #[serde(skip_serializing_if = "Option::is_none")]
135 pub verdict: Option<String>,
136
137 #[serde(skip_serializing_if = "Option::is_none")]
139 pub target_pattern: Option<String>,
140
141 #[serde(skip_serializing_if = "Option::is_none")]
143 pub not_target_pattern: Option<String>,
144
145 #[serde(skip_serializing_if = "Option::is_none")]
147 pub after: Option<String>,
148
149 #[serde(
151 skip_serializing_if = "Option::is_none",
152 default,
153 deserialize_with = "deserialize_duration_opt",
154 serialize_with = "serialize_duration_opt"
155 )]
156 pub within: Option<Duration>,
157
158 pub bind: String,
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct RuleOutput {
165 pub title: String,
167 pub evidence: Vec<String>,
169}
170
171#[derive(Debug, Clone, Serialize, Deserialize)]
173pub struct CorrelationRule {
174 pub schema: String,
176 pub name: String,
178 pub severity: RuleSeverity,
180 pub description: String,
182 #[serde(
184 deserialize_with = "deserialize_duration",
185 serialize_with = "serialize_duration"
186 )]
187 pub window: Duration,
188 pub conditions: Vec<RuleCondition>,
190 pub output: RuleOutput,
192}
193
194pub fn validate_rule(rule: &CorrelationRule) -> Result<()> {
207 if rule.schema != SUPPORTED_SCHEMA {
209 return Err(Error::InvalidRule(format!(
210 "unsupported schema '{}', expected '{SUPPORTED_SCHEMA}'",
211 rule.schema
212 )));
213 }
214
215 if rule.conditions.is_empty() {
217 return Err(Error::InvalidRule(
218 "rule must have at least one condition".to_string(),
219 ));
220 }
221
222 let mut known_binds: Vec<&str> = Vec::new();
224
225 for (i, cond) in rule.conditions.iter().enumerate() {
226 if let Some(ref after) = cond.after {
228 if !known_binds.contains(&after.as_str()) {
229 return Err(Error::InvalidRule(format!(
230 "condition {i} references unknown bind '{after}' in 'after'"
231 )));
232 }
233 }
234
235 if cond.within.is_some() && cond.after.is_none() {
237 return Err(Error::InvalidRule(format!(
238 "condition {i} has 'within' but no 'after'; 'within' only makes sense with 'after'"
239 )));
240 }
241
242 if let Some(within) = cond.within {
244 if within > rule.window {
245 return Err(Error::InvalidRule(format!(
246 "condition {i} 'within' ({within}) exceeds global window ({})",
247 rule.window
248 )));
249 }
250 }
251
252 if known_binds.contains(&cond.bind.as_str()) {
256 return Err(Error::InvalidRule(format!(
257 "condition {i} reuses bind name '{}'; bind names must be unique",
258 cond.bind
259 )));
260 }
261
262 known_binds.push(&cond.bind);
263 }
264
265 for ev in &rule.output.evidence {
267 if !known_binds.contains(&ev.as_str()) {
268 return Err(Error::InvalidRule(format!(
269 "output evidence references unknown bind '{ev}'"
270 )));
271 }
272 }
273
274 Ok(())
275}
276
277pub fn parse_rule(yaml_str: &str) -> Result<CorrelationRule> {
279 let rule: CorrelationRule =
280 serde_yaml::from_str(yaml_str).map_err(|e| Error::Yaml(e.to_string()))?;
281 validate_rule(&rule)?;
282 Ok(rule)
283}
284
285pub fn load_rules_from_files(paths: &[PathBuf]) -> Result<Vec<CorrelationRule>> {
289 let mut rules = Vec::with_capacity(paths.len());
290 for path in paths {
291 let content = std::fs::read_to_string(path)?;
292 let rule = parse_rule(&content)?;
293 rules.push(rule);
294 }
295 Ok(rules)
296}
297
298#[cfg(test)]
303mod tests {
304 use super::*;
305
306 const EXAMPLE_RULE: &str = r#"
307schema: clawdstrike.hunt.correlation.v1
308name: "MCP Tool Exfiltration Attempt"
309severity: high
310description: >
311 Detects an MCP tool reading sensitive files followed by
312 network egress to an external domain within 30 seconds.
313window: 30s
314conditions:
315 - source: receipt
316 action_type: file
317 verdict: allow
318 target_pattern: "/etc/passwd|/etc/shadow|\\.ssh/|\\.(env|pem|key)$"
319 bind: file_access
320 - source: [receipt, hubble]
321 action_type: egress
322 not_target_pattern: "->\\s*(localhost|127\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|10\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|172\\.(1[6-9]|2[0-9]|3[01])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|192\\.168\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9]))(?::[0-9]{1,5})?(?:$|[^A-Za-z0-9.:-])"
323 after: file_access
324 within: 30s
325 bind: egress_event
326output:
327 title: "Potential data exfiltration via MCP tool"
328 evidence:
329 - file_access
330 - egress_event
331"#;
332
333 #[test]
334 fn parse_valid_rule() {
335 let rule = parse_rule(EXAMPLE_RULE).unwrap();
336 assert_eq!(rule.schema, "clawdstrike.hunt.correlation.v1");
337 assert_eq!(rule.name, "MCP Tool Exfiltration Attempt");
338 assert_eq!(rule.severity, RuleSeverity::High);
339 assert_eq!(rule.window, Duration::seconds(30));
340 assert_eq!(rule.conditions.len(), 2);
341
342 assert_eq!(rule.conditions[0].source, vec!["receipt".to_string()]);
344 assert_eq!(rule.conditions[0].action_type.as_deref(), Some("file"));
345 assert_eq!(rule.conditions[0].verdict.as_deref(), Some("allow"));
346 assert!(rule.conditions[0].target_pattern.is_some());
347 assert!(rule.conditions[0].after.is_none());
348 assert!(rule.conditions[0].within.is_none());
349 assert_eq!(rule.conditions[0].bind, "file_access");
350
351 assert_eq!(
353 rule.conditions[1].source,
354 vec!["receipt".to_string(), "hubble".to_string()]
355 );
356 assert_eq!(rule.conditions[1].after.as_deref(), Some("file_access"));
357 assert_eq!(rule.conditions[1].within, Some(Duration::seconds(30)));
358 assert_eq!(rule.conditions[1].bind, "egress_event");
359
360 assert_eq!(
362 rule.output.title,
363 "Potential data exfiltration via MCP tool"
364 );
365 assert_eq!(
366 rule.output.evidence,
367 vec!["file_access".to_string(), "egress_event".to_string()]
368 );
369 }
370
371 #[test]
372 fn parse_single_source_string() {
373 let yaml = r#"
374schema: clawdstrike.hunt.correlation.v1
375name: "Single source test"
376severity: low
377description: "test"
378window: 5m
379conditions:
380 - source: tetragon
381 bind: evt
382output:
383 title: "test"
384 evidence:
385 - evt
386"#;
387 let rule = parse_rule(yaml).unwrap();
388 assert_eq!(rule.conditions[0].source, vec!["tetragon".to_string()]);
389 }
390
391 #[test]
392 fn exfil_not_target_pattern_matches_private_destination_after_arrow() {
393 let rule = parse_rule(EXAMPLE_RULE).unwrap();
394 let pattern = rule.conditions[1]
395 .not_target_pattern
396 .as_ref()
397 .expect("pattern");
398 let re = regex::Regex::new(pattern).expect("valid regex");
399
400 assert!(
401 !re.is_match("172.16.0.1"),
402 "bare private IP should not match without destination delimiter"
403 );
404 assert!(
405 re.is_match("egress TCP 10.0.0.1:8080 -> 172.16.0.1:443"),
406 "private destination in real Hubble-style summary must match"
407 );
408 assert!(
409 re.is_match("egress TCP 10.0.0.1:8080 -> 172.16.0.1"),
410 "private destination without an explicit port must still match"
411 );
412 assert!(
413 re.is_match("10.0.0.1 -> 172.16.0.1:443"),
414 "private destination should match even when summary starts with source IP"
415 );
416 assert!(
417 re.is_match("egress TCP 10.0.0.1:8080 -> localhost:443"),
418 "localhost destination should remain excluded"
419 );
420 assert!(
421 !re.is_match("egress TCP 10.0.0.1:8080 -> 172.16.0.1443"),
422 "bare trailing digits must not be treated as an implicit port"
423 );
424 assert!(
425 !re.is_match("egress TCP 10.0.0.1:8080 -> localhost.evil.com:443"),
426 "localhost subdomains must not be excluded as localhost"
427 );
428 assert!(
429 !re.is_match("egress TCP 10.0.0.1:8080 -> 10.0.0.1.evil.com:443"),
430 "IP-like prefixes in hostnames must not be treated as private destinations"
431 );
432 assert!(
433 !re.is_match("172.160.0.1"),
434 "public 172.160.x.x must not be treated as private"
435 );
436 assert!(
437 !re.is_match("egress TCP 10.0.0.1:8080 -> 172.160.0.1:443"),
438 "public 172.160.x.x destination must not be treated as private"
439 );
440 assert!(
441 !re.is_match("egress TCP 10.0.0.1:8080 -> 93.184.216.34:443"),
442 "public destination should not be excluded just because source is private"
443 );
444 assert!(
445 !re.is_match("10.0.0.1 -> 93.184.216.34:443"),
446 "private source at start should not be excluded when destination is public"
447 );
448 assert!(
449 !re.is_match("egress TCP 10.0.0.1:8080 -> 100.1.2.3:443"),
450 "public 100.x.x.x must not be treated as private 10.x.x.x"
451 );
452 assert!(
453 !re.is_match("egress TCP 10.0.0.1:8080 -> 1270.0.0.1:443"),
454 "public 1270.x.x.x must not be treated as private 127.x.x.x"
455 );
456 assert!(
457 !re.is_match("egress TCP 10.0.0.1:8080 -> 172.25.999.1:443"),
458 "invalid octets must not match private-IP exclusion"
459 );
460 assert!(
461 re.is_match("egress TCP 10.0.0.1:8080 -> 172.25.99.1:443"),
462 "valid RFC1918 addresses should still be excluded"
463 );
464 }
465
466 #[test]
467 fn reject_unknown_schema() {
468 let yaml = r#"
469schema: clawdstrike.hunt.correlation.v99
470name: "Bad schema"
471severity: low
472description: "test"
473window: 10s
474conditions:
475 - source: receipt
476 bind: evt
477output:
478 title: "test"
479 evidence:
480 - evt
481"#;
482 let err = parse_rule(yaml).unwrap_err();
483 let msg = err.to_string();
484 assert!(msg.contains("unsupported schema"), "got: {msg}");
485 }
486
487 #[test]
488 fn reject_empty_conditions() {
489 let yaml = r#"
490schema: clawdstrike.hunt.correlation.v1
491name: "No conditions"
492severity: medium
493description: "test"
494window: 10s
495conditions: []
496output:
497 title: "test"
498 evidence: []
499"#;
500 let err = parse_rule(yaml).unwrap_err();
501 let msg = err.to_string();
502 assert!(msg.contains("at least one condition"), "got: {msg}");
503 }
504
505 #[test]
506 fn reject_invalid_after_reference() {
507 let yaml = r#"
508schema: clawdstrike.hunt.correlation.v1
509name: "Bad after ref"
510severity: high
511description: "test"
512window: 30s
513conditions:
514 - source: receipt
515 after: nonexistent
516 bind: evt
517output:
518 title: "test"
519 evidence:
520 - evt
521"#;
522 let err = parse_rule(yaml).unwrap_err();
523 let msg = err.to_string();
524 assert!(msg.contains("unknown bind 'nonexistent'"), "got: {msg}");
525 }
526
527 #[test]
528 fn reject_invalid_evidence_reference() {
529 let yaml = r#"
530schema: clawdstrike.hunt.correlation.v1
531name: "Bad evidence ref"
532severity: low
533description: "test"
534window: 10s
535conditions:
536 - source: receipt
537 bind: evt
538output:
539 title: "test"
540 evidence:
541 - missing_bind
542"#;
543 let err = parse_rule(yaml).unwrap_err();
544 let msg = err.to_string();
545 assert!(msg.contains("unknown bind 'missing_bind'"), "got: {msg}");
546 }
547
548 #[test]
549 fn reject_within_exceeding_window() {
550 let yaml = r#"
551schema: clawdstrike.hunt.correlation.v1
552name: "Within exceeds window"
553severity: low
554description: "test"
555window: 10s
556conditions:
557 - source: receipt
558 bind: first
559 - source: hubble
560 after: first
561 within: 60s
562 bind: second
563output:
564 title: "test"
565 evidence:
566 - first
567 - second
568"#;
569 let err = parse_rule(yaml).unwrap_err();
570 let msg = err.to_string();
571 assert!(msg.contains("exceeds global window"), "got: {msg}");
572 }
573
574 #[test]
575 fn reject_within_without_after() {
576 let yaml = r#"
577schema: clawdstrike.hunt.correlation.v1
578name: "Within without after"
579severity: low
580description: "test"
581window: 30s
582conditions:
583 - source: receipt
584 within: 10s
585 bind: evt
586output:
587 title: "test"
588 evidence:
589 - evt
590"#;
591 let err = parse_rule(yaml).unwrap_err();
592 let msg = err.to_string();
593 assert!(
594 msg.contains("'within' but no 'after'"),
595 "expected within-without-after error, got: {msg}"
596 );
597 }
598
599 #[test]
600 fn parse_duration_str_various() {
601 assert_eq!(
602 hush_core::parse_human_duration("30s"),
603 Some(Duration::seconds(30))
604 );
605 assert_eq!(
606 hush_core::parse_human_duration("5m"),
607 Some(Duration::minutes(5))
608 );
609 assert_eq!(
610 hush_core::parse_human_duration("1h"),
611 Some(Duration::hours(1))
612 );
613 assert_eq!(
614 hush_core::parse_human_duration("2d"),
615 Some(Duration::days(2))
616 );
617 assert_eq!(
618 hush_core::parse_human_duration("0s"),
619 Some(Duration::seconds(0))
620 );
621 assert_eq!(hush_core::parse_human_duration(""), None);
622 assert_eq!(hush_core::parse_human_duration("abc"), None);
623 assert_eq!(hush_core::parse_human_duration("10x"), None);
624 assert_eq!(hush_core::parse_human_duration("s"), None);
625 }
626
627 #[test]
628 fn parse_duration_str_multi_char_suffixes() {
629 assert_eq!(
630 hush_core::parse_human_duration("30sec"),
631 Some(Duration::seconds(30))
632 );
633 assert_eq!(
634 hush_core::parse_human_duration("5min"),
635 Some(Duration::minutes(5))
636 );
637 assert_eq!(
638 hush_core::parse_human_duration("5mins"),
639 Some(Duration::minutes(5))
640 );
641 assert_eq!(
642 hush_core::parse_human_duration("1hr"),
643 Some(Duration::hours(1))
644 );
645 assert_eq!(
646 hush_core::parse_human_duration("2hrs"),
647 Some(Duration::hours(2))
648 );
649 assert_eq!(
650 hush_core::parse_human_duration("1hour"),
651 Some(Duration::hours(1))
652 );
653 assert_eq!(
654 hush_core::parse_human_duration("3days"),
655 Some(Duration::days(3))
656 );
657 assert_eq!(
658 hush_core::parse_human_duration("1day"),
659 Some(Duration::days(1))
660 );
661 assert_eq!(
662 hush_core::parse_human_duration("10seconds"),
663 Some(Duration::seconds(10))
664 );
665 assert_eq!(
666 hush_core::parse_human_duration("2minutes"),
667 Some(Duration::minutes(2))
668 );
669 }
670
671 #[test]
672 fn parse_duration_str_multibyte_utf8_returns_none() {
673 assert_eq!(hush_core::parse_human_duration("30秒"), None);
676 assert_eq!(hush_core::parse_human_duration("5分"), None);
677 assert_eq!(hush_core::parse_human_duration("1時間"), None);
678 assert_eq!(hush_core::parse_human_duration("10🕐"), None);
680 }
681
682 #[test]
683 fn reject_duplicate_bind_names() {
684 let yaml = r#"
685schema: clawdstrike.hunt.correlation.v1
686name: "Duplicate bind"
687severity: high
688description: "test"
689window: 30s
690conditions:
691 - source: receipt
692 action_type: file
693 bind: evt
694 - source: hubble
695 action_type: egress
696 bind: evt
697output:
698 title: "test"
699 evidence:
700 - evt
701"#;
702 let err = parse_rule(yaml).unwrap_err();
703 let msg = err.to_string();
704 assert!(
705 msg.contains("reuses bind name 'evt'"),
706 "expected duplicate bind error, got: {msg}"
707 );
708 }
709
710 #[test]
711 fn load_rules_from_temp_files() {
712 let dir = tempfile::tempdir().unwrap();
713
714 let rule1_path = dir.path().join("rule1.yaml");
715 std::fs::write(&rule1_path, EXAMPLE_RULE).unwrap();
716
717 let rule2_yaml = r#"
718schema: clawdstrike.hunt.correlation.v1
719name: "Lateral movement"
720severity: critical
721description: "Detects lateral movement patterns"
722window: 5m
723conditions:
724 - source: tetragon
725 action_type: process
726 bind: proc
727output:
728 title: "Lateral movement detected"
729 evidence:
730 - proc
731"#;
732 let rule2_path = dir.path().join("rule2.yaml");
733 std::fs::write(&rule2_path, rule2_yaml).unwrap();
734
735 let rules = load_rules_from_files(&[rule1_path, rule2_path]).unwrap();
736 assert_eq!(rules.len(), 2);
737 assert_eq!(rules[0].name, "MCP Tool Exfiltration Attempt");
738 assert_eq!(rules[1].name, "Lateral movement");
739 assert_eq!(rules[1].severity, RuleSeverity::Critical);
740 }
741
742 #[test]
743 fn load_rules_missing_file() {
744 let result = load_rules_from_files(&[PathBuf::from("/nonexistent/rule.yaml")]);
745 assert!(result.is_err());
746 }
747
748 #[test]
749 fn severity_serde_roundtrip() {
750 let yaml = serde_yaml::to_string(&RuleSeverity::Critical).unwrap();
751 assert!(yaml.contains("critical"));
752 let back: RuleSeverity = serde_yaml::from_str(&yaml).unwrap();
753 assert_eq!(back, RuleSeverity::Critical);
754 }
755
756 #[test]
757 fn rule_serialization_roundtrip() {
758 let rule = parse_rule(EXAMPLE_RULE).unwrap();
759 let serialized = serde_yaml::to_string(&rule).unwrap();
760 let reparsed: CorrelationRule = serde_yaml::from_str(&serialized).unwrap();
761 assert_eq!(reparsed.name, rule.name);
762 assert_eq!(reparsed.conditions.len(), rule.conditions.len());
763 }
764}