1use crate::analysis::data_flow::DataFlowAnalysis;
14use crate::priority::{call_graph::CallGraph, call_graph::FunctionId};
15use serde::{Deserialize, Serialize};
16use std::collections::{HashMap, HashSet};
17
18pub mod population;
19
20mod function_id_serde {
21 use super::*;
22 use serde::{Deserialize, Deserializer, Serialize, Serializer};
23 use std::collections::HashMap as StdHashMap;
24
25 pub fn serialize<S, V>(map: &HashMap<FunctionId, V>, serializer: S) -> Result<S::Ok, S::Error>
26 where
27 S: Serializer,
28 V: Serialize,
29 {
30 let string_map: StdHashMap<String, &V> = map
31 .iter()
32 .map(|(k, v)| (format!("{}:{}:{}", k.file.display(), k.name, k.line), v))
33 .collect();
34 string_map.serialize(serializer)
35 }
36
37 pub fn deserialize<'de, D, V>(deserializer: D) -> Result<HashMap<FunctionId, V>, D::Error>
38 where
39 D: Deserializer<'de>,
40 V: Deserialize<'de>,
41 {
42 let string_map: StdHashMap<String, V> = StdHashMap::deserialize(deserializer)?;
43 let mut result = HashMap::new();
44 for (key, value) in string_map {
45 let parts: Vec<&str> = key.rsplitn(3, ':').collect();
46 if parts.len() == 3 {
47 let func_id = FunctionId::new(
48 parts[2].into(),
49 parts[1].to_string(),
50 parts[0].parse().unwrap_or(0),
51 );
52 result.insert(func_id, value);
53 }
54 }
55 Ok(result)
56 }
57}
58
59mod function_id_tuple_serde {
60 use super::*;
61 use serde::{Deserialize, Deserializer, Serialize, Serializer};
62 use std::collections::HashMap as StdHashMap;
63
64 pub fn serialize<S, V>(
65 map: &HashMap<(FunctionId, FunctionId), V>,
66 serializer: S,
67 ) -> Result<S::Ok, S::Error>
68 where
69 S: Serializer,
70 V: Serialize,
71 {
72 let string_map: StdHashMap<String, &V> = map
73 .iter()
74 .map(|((k1, k2), v)| {
75 let key = format!(
76 "{}:{}:{}|{}:{}:{}",
77 k1.file.display(),
78 k1.name,
79 k1.line,
80 k2.file.display(),
81 k2.name,
82 k2.line
83 );
84 (key, v)
85 })
86 .collect();
87 string_map.serialize(serializer)
88 }
89
90 pub fn deserialize<'de, D, V>(
91 deserializer: D,
92 ) -> Result<HashMap<(FunctionId, FunctionId), V>, D::Error>
93 where
94 D: Deserializer<'de>,
95 V: Deserialize<'de>,
96 {
97 let string_map: StdHashMap<String, V> = StdHashMap::deserialize(deserializer)?;
98 let mut result = HashMap::new();
99 for (key, value) in string_map {
100 let parts: Vec<&str> = key.split('|').collect();
101 if parts.len() == 2 {
102 let parts1: Vec<&str> = parts[0].rsplitn(3, ':').collect();
103 let parts2: Vec<&str> = parts[1].rsplitn(3, ':').collect();
104 if parts1.len() == 3 && parts2.len() == 3 {
105 let func_id1 = FunctionId::new(
106 parts1[2].into(),
107 parts1[1].to_string(),
108 parts1[0].parse().unwrap_or(0),
109 );
110 let func_id2 = FunctionId::new(
111 parts2[2].into(),
112 parts2[1].to_string(),
113 parts2[0].parse().unwrap_or(0),
114 );
115 result.insert((func_id1, func_id2), value);
116 }
117 }
118 }
119 Ok(result)
120 }
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct DataFlowGraph {
127 call_graph: CallGraph,
129 #[serde(with = "function_id_serde")]
131 variable_deps: HashMap<FunctionId, HashSet<String>>,
132 #[serde(with = "function_id_tuple_serde")]
134 data_transformations: HashMap<(FunctionId, FunctionId), DataTransformation>,
135 #[serde(with = "function_id_serde")]
137 io_operations: HashMap<FunctionId, Vec<IoOperation>>,
138 #[serde(with = "function_id_serde")]
140 purity_analysis: HashMap<FunctionId, PurityInfo>,
141 #[serde(skip)]
144 cfg_analysis: HashMap<FunctionId, DataFlowAnalysis>,
145 #[serde(skip)]
148 cfg_analysis_with_context: HashMap<FunctionId, CfgAnalysisWithContext>,
149 #[serde(with = "function_id_serde")]
151 mutation_analysis: HashMap<FunctionId, MutationInfo>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct DataTransformation {
156 pub input_vars: Vec<String>,
158 pub output_vars: Vec<String>,
160 pub transformation_type: String,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct IoOperation {
166 pub operation_type: String,
168 pub variables: Vec<String>,
170 pub line: usize,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct PurityInfo {
176 pub is_pure: bool,
178 pub confidence: f32,
180 pub impurity_reasons: Vec<String>,
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct MutationInfo {
188 pub has_mutations: bool,
190 pub detected_mutations: Vec<String>,
192}
193
194impl MutationInfo {
195 pub fn none() -> Self {
197 Self {
198 has_mutations: false,
199 detected_mutations: Vec::new(),
200 }
201 }
202
203 pub fn is_pure(&self) -> bool {
205 !self.has_mutations
206 }
207}
208
209#[derive(Debug, Clone)]
233pub struct CfgAnalysisWithContext {
234 pub analysis: DataFlowAnalysis,
236 pub var_names: Vec<String>,
238}
239
240impl CfgAnalysisWithContext {
241 pub fn new(var_names: Vec<String>, analysis: DataFlowAnalysis) -> Self {
243 Self {
244 analysis,
245 var_names,
246 }
247 }
248
249 pub fn var_name(&self, var_id: crate::analysis::data_flow::VarId) -> String {
251 self.var_names
252 .get(var_id.name_id as usize)
253 .cloned()
254 .unwrap_or_else(|| format!("unknown_{}", var_id.name_id))
255 }
256
257 pub fn var_names_for(
259 &self,
260 var_ids: impl Iterator<Item = crate::analysis::data_flow::VarId>,
261 ) -> Vec<String> {
262 var_ids.map(|id| self.var_name(id)).collect()
263 }
264}
265
266impl DataFlowGraph {
267 pub fn new() -> Self {
268 Self {
269 call_graph: CallGraph::new(),
270 variable_deps: HashMap::new(),
271 data_transformations: HashMap::new(),
272 io_operations: HashMap::new(),
273 purity_analysis: HashMap::new(),
274 cfg_analysis: HashMap::new(),
275 cfg_analysis_with_context: HashMap::new(),
276 mutation_analysis: HashMap::new(),
277 }
278 }
279
280 pub fn from_call_graph(call_graph: CallGraph) -> Self {
282 Self {
283 call_graph,
284 variable_deps: HashMap::new(),
285 data_transformations: HashMap::new(),
286 io_operations: HashMap::new(),
287 purity_analysis: HashMap::new(),
288 cfg_analysis: HashMap::new(),
289 cfg_analysis_with_context: HashMap::new(),
290 mutation_analysis: HashMap::new(),
291 }
292 }
293
294 pub fn call_graph(&self) -> &CallGraph {
296 &self.call_graph
297 }
298
299 pub fn get_variable_dependencies(&self, func_id: &FunctionId) -> Option<&HashSet<String>> {
301 self.variable_deps.get(func_id)
302 }
303
304 pub fn add_variable_dependencies(&mut self, func_id: FunctionId, variables: HashSet<String>) {
306 self.variable_deps.insert(func_id, variables);
307 }
308
309 pub fn get_data_transformation(
311 &self,
312 from: &FunctionId,
313 to: &FunctionId,
314 ) -> Option<&DataTransformation> {
315 self.data_transformations.get(&(from.clone(), to.clone()))
316 }
317
318 pub fn add_data_transformation(
320 &mut self,
321 from: FunctionId,
322 to: FunctionId,
323 transformation: DataTransformation,
324 ) {
325 self.data_transformations.insert((from, to), transformation);
326 }
327
328 pub fn get_io_operations(&self, func_id: &FunctionId) -> Option<&Vec<IoOperation>> {
330 self.io_operations.get(func_id)
331 }
332
333 pub fn add_io_operation(&mut self, func_id: FunctionId, operation: IoOperation) {
335 self.io_operations
336 .entry(func_id)
337 .or_default()
338 .push(operation);
339 }
340
341 pub fn get_purity_info(&self, func_id: &FunctionId) -> Option<&PurityInfo> {
343 self.purity_analysis.get(func_id)
344 }
345
346 pub fn set_purity_info(&mut self, func_id: FunctionId, purity: PurityInfo) {
348 self.purity_analysis.insert(func_id, purity);
349 }
350
351 pub fn get_cfg_analysis(&self, func_id: &FunctionId) -> Option<&DataFlowAnalysis> {
353 self.cfg_analysis.get(func_id)
354 }
355
356 pub fn set_cfg_analysis(&mut self, func_id: FunctionId, analysis: DataFlowAnalysis) {
358 self.cfg_analysis.insert(func_id, analysis);
359 }
360
361 pub fn get_mutation_info(&self, func_id: &FunctionId) -> Option<&MutationInfo> {
363 self.mutation_analysis.get(func_id)
364 }
365
366 pub fn set_mutation_info(&mut self, func_id: FunctionId, info: MutationInfo) {
368 self.mutation_analysis.insert(func_id, info);
369 }
370
371 pub fn get_cfg_analysis_with_context(
373 &self,
374 func_id: &FunctionId,
375 ) -> Option<&CfgAnalysisWithContext> {
376 self.cfg_analysis_with_context.get(func_id)
377 }
378
379 pub fn set_cfg_analysis_with_context(
381 &mut self,
382 func_id: FunctionId,
383 context: CfgAnalysisWithContext,
384 ) {
385 self.cfg_analysis_with_context.insert(func_id, context);
386 }
387
388 pub fn has_side_effects(&self, func_id: &FunctionId) -> bool {
392 if let Some(purity) = self.get_purity_info(func_id) {
394 return !purity.is_pure;
395 }
396
397 if let Some(io_ops) = self.get_io_operations(func_id) {
399 return !io_ops.is_empty();
400 }
401
402 true
404 }
405
406 pub fn get_downstream_dependencies(&self, func_id: &FunctionId) -> Vec<FunctionId> {
408 self.call_graph.get_callers(func_id)
410 }
411
412 pub fn get_upstream_dependencies(&self, _func_id: &FunctionId) -> Vec<FunctionId> {
414 Vec::new()
417 }
418
419 pub fn analyze_modification_impact(&self, func_id: &FunctionId) -> ModificationImpact {
421 let downstream = self.get_downstream_dependencies(func_id);
422 let upstream = self.get_upstream_dependencies(func_id);
423 let has_io = self
424 .get_io_operations(func_id)
425 .is_some_and(|ops| !ops.is_empty());
426 let is_pure = self.get_purity_info(func_id).is_some_and(|p| p.is_pure);
427
428 ModificationImpact {
429 affected_functions: downstream.len(),
430 dependency_count: upstream.len(),
431 has_side_effects: has_io || !is_pure,
432 risk_level: self.calculate_risk_level(&downstream, has_io, is_pure),
433 }
434 }
435
436 fn calculate_risk_level(
437 &self,
438 downstream: &[FunctionId],
439 has_io: bool,
440 is_pure: bool,
441 ) -> RiskLevel {
442 match (downstream.len(), has_io, is_pure) {
443 (0, false, true) => RiskLevel::Low,
444 (1..=5, false, true) => RiskLevel::Medium,
445 (1..=5, true, _) => RiskLevel::High,
446 (6.., _, _) => RiskLevel::Critical,
447 _ => RiskLevel::Medium,
448 }
449 }
450}
451
452#[derive(Debug, Clone, Serialize, Deserialize)]
453pub struct ModificationImpact {
454 pub affected_functions: usize,
456 pub dependency_count: usize,
458 pub has_side_effects: bool,
460 pub risk_level: RiskLevel,
462}
463
464#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
465pub enum RiskLevel {
466 Low,
467 Medium,
468 High,
469 Critical,
470}
471
472impl Default for DataFlowGraph {
473 fn default() -> Self {
474 Self::new()
475 }
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481 use std::path::PathBuf;
482
483 fn create_test_function_id(name: &str) -> FunctionId {
484 FunctionId::new(PathBuf::from("test.rs"), name.to_string(), 1)
485 }
486
487 #[test]
488 fn test_data_flow_graph_creation() {
489 let graph = DataFlowGraph::new();
490 assert_eq!(graph.call_graph().node_count(), 0);
491 assert!(graph.variable_deps.is_empty());
492 assert!(graph.data_transformations.is_empty());
493 }
494
495 #[test]
496 fn test_variable_dependencies() {
497 let mut graph = DataFlowGraph::new();
498 let func_id = create_test_function_id("test_func");
499
500 let mut variables = HashSet::new();
501 variables.insert("x".to_string());
502 variables.insert("y".to_string());
503
504 graph.add_variable_dependencies(func_id.clone(), variables);
505
506 let deps = graph.get_variable_dependencies(&func_id).unwrap();
507 assert_eq!(deps.len(), 2);
508 assert!(deps.contains("x"));
509 assert!(deps.contains("y"));
510 }
511
512 #[test]
513 fn test_io_operations() {
514 let mut graph = DataFlowGraph::new();
515 let func_id = create_test_function_id("io_func");
516
517 let io_op = IoOperation {
518 operation_type: "file_read".to_string(),
519 variables: vec!["filename".to_string()],
520 line: 42,
521 };
522
523 graph.add_io_operation(func_id.clone(), io_op);
524
525 let ops = graph.get_io_operations(&func_id).unwrap();
526 assert_eq!(ops.len(), 1);
527 assert_eq!(ops[0].operation_type, "file_read");
528 assert_eq!(ops[0].line, 42);
529 }
530
531 #[test]
532 fn test_purity_analysis() {
533 let mut graph = DataFlowGraph::new();
534 let func_id = create_test_function_id("pure_func");
535
536 let purity = PurityInfo {
537 is_pure: true,
538 confidence: 0.95,
539 impurity_reasons: vec![],
540 };
541
542 graph.set_purity_info(func_id.clone(), purity);
543
544 let purity_info = graph.get_purity_info(&func_id).unwrap();
545 assert!(purity_info.is_pure);
546 assert_eq!(purity_info.confidence, 0.95);
547 assert!(purity_info.impurity_reasons.is_empty());
548
549 assert!(!graph.has_side_effects(&func_id));
550 }
551
552 #[test]
553 fn test_side_effects_detection() {
554 let mut graph = DataFlowGraph::new();
555 let func_id = create_test_function_id("impure_func");
556
557 let io_op = IoOperation {
559 operation_type: "console_log".to_string(),
560 variables: vec!["message".to_string()],
561 line: 10,
562 };
563 graph.add_io_operation(func_id.clone(), io_op);
564
565 assert!(graph.has_side_effects(&func_id));
566 }
567
568 #[test]
569 fn test_data_transformation() {
570 let mut graph = DataFlowGraph::new();
571 let from_func = create_test_function_id("caller");
572 let to_func = create_test_function_id("callee");
573
574 let transformation = DataTransformation {
575 input_vars: vec!["input".to_string()],
576 output_vars: vec!["result".to_string()],
577 transformation_type: "map".to_string(),
578 };
579
580 graph.add_data_transformation(from_func.clone(), to_func.clone(), transformation);
581
582 let trans = graph.get_data_transformation(&from_func, &to_func).unwrap();
583 assert_eq!(trans.transformation_type, "map");
584 assert_eq!(trans.input_vars, vec!["input"]);
585 assert_eq!(trans.output_vars, vec!["result"]);
586 }
587
588 #[test]
589 fn test_modification_impact_analysis() {
590 let graph = DataFlowGraph::new();
591 let func_id = create_test_function_id("test_func");
592
593 let impact = graph.analyze_modification_impact(&func_id);
594
595 assert_eq!(impact.affected_functions, 0);
597 assert_eq!(impact.dependency_count, 0);
598 assert!(impact.has_side_effects);
600 }
601
602 #[test]
603 fn test_risk_level_calculation() {
604 let graph = DataFlowGraph::new();
605
606 assert_eq!(graph.calculate_risk_level(&[], false, true), RiskLevel::Low);
608
609 let downstream = vec![create_test_function_id("caller1")];
611 assert_eq!(
612 graph.calculate_risk_level(&downstream, true, false),
613 RiskLevel::High
614 );
615
616 let many_downstream: Vec<FunctionId> = (0..10)
618 .map(|i| create_test_function_id(&format!("caller_{}", i)))
619 .collect();
620 assert_eq!(
621 graph.calculate_risk_level(&many_downstream, false, true),
622 RiskLevel::Critical
623 );
624 }
625
626 #[test]
627 fn test_from_call_graph() {
628 let call_graph = CallGraph::new();
629 let graph = DataFlowGraph::from_call_graph(call_graph);
630
631 assert_eq!(graph.call_graph().node_count(), 0);
632 assert!(graph.variable_deps.is_empty());
633 }
634
635 #[test]
636 fn test_varid_translation() {
637 use crate::analysis::data_flow::{DataFlowAnalysis, ReachingDefinitions, VarId};
638
639 let var_names = vec!["x".to_string(), "y".to_string(), "buffer".to_string()];
640
641 let analysis = DataFlowAnalysis {
643 reaching_defs: ReachingDefinitions::default(),
644 };
645
646 let ctx = CfgAnalysisWithContext::new(var_names, analysis);
647
648 let var_id = VarId {
649 name_id: 0,
650 version: 0,
651 };
652 assert_eq!(ctx.var_name(var_id), "x");
653
654 let var_id = VarId {
655 name_id: 2,
656 version: 1,
657 };
658 assert_eq!(ctx.var_name(var_id), "buffer");
659 }
660
661 #[test]
662 fn test_translation_with_missing_id() {
663 use crate::analysis::data_flow::{DataFlowAnalysis, ReachingDefinitions, VarId};
664
665 let var_names = vec!["x".to_string()];
666
667 let analysis = DataFlowAnalysis {
668 reaching_defs: ReachingDefinitions::default(),
669 };
670
671 let ctx = CfgAnalysisWithContext::new(var_names, analysis);
672
673 let invalid_id = VarId {
674 name_id: 999,
675 version: 0,
676 };
677 assert_eq!(ctx.var_name(invalid_id), "unknown_999");
678 }
679
680 }