1use std::collections::HashMap;
6
7#[allow(dead_code)]
8#[derive(Debug, Clone)]
9pub enum BranchPredictability {
10 AlwaysTaken,
11 AlwaysNotTaken,
12 MostlyTaken(f64),
13 MostlyNotTaken(f64),
14 Unpredictable,
15}
16impl BranchPredictability {
17 #[allow(dead_code)]
18 pub fn from_frequency(taken_freq: f64, total: f64) -> Self {
19 if total == 0.0 {
20 return BranchPredictability::Unpredictable;
21 }
22 let ratio = taken_freq / total;
23 if ratio >= 0.95 {
24 BranchPredictability::AlwaysTaken
25 } else if ratio <= 0.05 {
26 BranchPredictability::AlwaysNotTaken
27 } else if ratio >= 0.75 {
28 BranchPredictability::MostlyTaken(ratio)
29 } else if ratio <= 0.25 {
30 BranchPredictability::MostlyNotTaken(ratio)
31 } else {
32 BranchPredictability::Unpredictable
33 }
34 }
35 #[allow(dead_code)]
36 pub fn is_biased(&self) -> bool {
37 !matches!(self, BranchPredictability::Unpredictable)
38 }
39 #[allow(dead_code)]
40 pub fn emit_hint(&self) -> Option<&str> {
41 match self {
42 BranchPredictability::AlwaysTaken | BranchPredictability::MostlyTaken(_) => {
43 Some("[[likely]]")
44 }
45 BranchPredictability::AlwaysNotTaken | BranchPredictability::MostlyNotTaken(_) => {
46 Some("[[unlikely]]")
47 }
48 BranchPredictability::Unpredictable => None,
49 }
50 }
51}
52#[allow(dead_code)]
53#[derive(Debug, Clone)]
54pub struct ThinLtoPgoData {
55 pub module_hash: u64,
56 pub function_profiles: Vec<FunctionProfile>,
57 pub summary_flags: u32,
58}
59impl ThinLtoPgoData {
60 #[allow(dead_code)]
61 pub fn new(module_hash: u64) -> Self {
62 ThinLtoPgoData {
63 module_hash,
64 function_profiles: Vec::new(),
65 summary_flags: 0,
66 }
67 }
68 #[allow(dead_code)]
69 pub fn add_profile(&mut self, profile: FunctionProfile) {
70 self.function_profiles.push(profile);
71 }
72 #[allow(dead_code)]
73 pub fn is_empty(&self) -> bool {
74 self.function_profiles.is_empty()
75 }
76 #[allow(dead_code)]
77 pub fn hot_function_names(&self, threshold: u64) -> Vec<&str> {
78 self.function_profiles
79 .iter()
80 .filter(|p| p.is_hot_function(threshold))
81 .map(|p| p.name.as_str())
82 .collect()
83 }
84}
85#[allow(dead_code)]
86#[derive(Debug, Clone)]
87pub struct PgoAnnotatedFunction {
88 pub name: String,
89 pub entry_count: u64,
90 pub inline_hint: Option<InlineHint>,
91 pub hot_attributes: Vec<String>,
92}
93impl PgoAnnotatedFunction {
94 #[allow(dead_code)]
95 pub fn new(name: impl Into<String>, entry_count: u64) -> Self {
96 PgoAnnotatedFunction {
97 name: name.into(),
98 entry_count,
99 inline_hint: None,
100 hot_attributes: Vec::new(),
101 }
102 }
103 #[allow(dead_code)]
104 pub fn with_inline_hint(mut self, hint: InlineHint) -> Self {
105 self.inline_hint = Some(hint);
106 self
107 }
108 #[allow(dead_code)]
109 pub fn add_hot_attribute(&mut self, attr: impl Into<String>) {
110 self.hot_attributes.push(attr.into());
111 }
112 #[allow(dead_code)]
113 pub fn emit_llvm_attrs(&self) -> String {
114 let mut attrs = Vec::new();
115 attrs.push(format!(
116 "!prof !{{!\"func_entry_count\", i64 {}}}",
117 self.entry_count
118 ));
119 if let Some(ref hint) = self.inline_hint {
120 match hint {
121 InlineHint::AlwaysInline => attrs.push("alwaysinline".to_string()),
122 InlineHint::NeverInline => attrs.push("noinline".to_string()),
123 InlineHint::InlineWithBenefit(b) => {
124 attrs.push(format!("inlinehint /* benefit: {:.2} */", b))
125 }
126 }
127 }
128 attrs.join(" ")
129 }
130}
131#[allow(dead_code)]
132pub struct WholeProgramDevirt {
133 pub vtable_map: std::collections::HashMap<String, Vec<String>>,
134 pub call_profiles: Vec<VirtualCallRecord>,
135 pub min_speculation_threshold: f64,
136}
137impl WholeProgramDevirt {
138 #[allow(dead_code)]
139 pub fn new() -> Self {
140 WholeProgramDevirt {
141 vtable_map: std::collections::HashMap::new(),
142 call_profiles: Vec::new(),
143 min_speculation_threshold: 0.8,
144 }
145 }
146 #[allow(dead_code)]
147 pub fn register_vtable(&mut self, class: impl Into<String>, methods: Vec<String>) {
148 self.vtable_map.insert(class.into(), methods);
149 }
150 #[allow(dead_code)]
151 pub fn add_call_profile(&mut self, profile: VirtualCallRecord) {
152 self.call_profiles.push(profile);
153 }
154 #[allow(dead_code)]
155 pub fn speculation_opportunities(&self) -> Vec<(&VirtualCallRecord, &str, f64)> {
156 self.call_profiles
157 .iter()
158 .filter_map(|p| {
159 if let Some((target, ratio)) = p.dominant_target() {
160 if ratio >= self.min_speculation_threshold && !p.is_monomorphic() {
161 return Some((p, target, ratio));
162 }
163 }
164 None
165 })
166 .collect()
167 }
168 #[allow(dead_code)]
169 pub fn class_count(&self) -> usize {
170 self.vtable_map.len()
171 }
172}
173#[allow(dead_code)]
174#[derive(Debug, Clone)]
175pub struct EdgeProfile {
176 pub from_block: u32,
177 pub to_block: u32,
178 pub execution_count: u64,
179}
180#[allow(dead_code)]
181#[derive(Debug, Clone)]
182pub struct ProfileMerger {
183 pub(super) profiles: Vec<RawProfileData>,
184 pub(super) weight_mode: MergeWeightMode,
185}
186impl ProfileMerger {
187 #[allow(dead_code)]
188 pub fn new(mode: MergeWeightMode) -> Self {
189 ProfileMerger {
190 profiles: Vec::new(),
191 weight_mode: mode,
192 }
193 }
194 #[allow(dead_code)]
195 pub fn add_profile(&mut self, profile: RawProfileData) {
196 self.profiles.push(profile);
197 }
198 #[allow(dead_code)]
199 pub fn merge_all(&self) -> Option<RawProfileData> {
200 if self.profiles.is_empty() {
201 return None;
202 }
203 let mut result = self.profiles[0].clone();
204 for p in &self.profiles[1..] {
205 result.merge(p);
206 }
207 match self.weight_mode {
208 MergeWeightMode::Equal => {
209 let n = self.profiles.len() as u64;
210 result.normalize(n);
211 }
212 MergeWeightMode::MaxCount => {
213 let max = result.max_count();
214 if max > 0 {
215 result.normalize(max);
216 }
217 }
218 MergeWeightMode::Proportional => {}
219 }
220 Some(result)
221 }
222}
223#[allow(dead_code)]
224#[derive(Debug, Clone)]
225pub struct RawProfileData {
226 pub version: u32,
227 pub num_counters: u64,
228 pub data: Vec<u64>,
229}
230impl RawProfileData {
231 #[allow(dead_code)]
232 pub fn new(version: u32) -> Self {
233 RawProfileData {
234 version,
235 num_counters: 0,
236 data: Vec::new(),
237 }
238 }
239 #[allow(dead_code)]
240 pub fn add_counter(&mut self, value: u64) {
241 self.data.push(value);
242 self.num_counters += 1;
243 }
244 #[allow(dead_code)]
245 pub fn merge(&mut self, other: &RawProfileData) {
246 for (a, b) in self.data.iter_mut().zip(other.data.iter()) {
247 *a = a.saturating_add(*b);
248 }
249 }
250 #[allow(dead_code)]
251 pub fn max_count(&self) -> u64 {
252 self.data.iter().copied().max().unwrap_or(0)
253 }
254 #[allow(dead_code)]
255 pub fn total_count(&self) -> u64 {
256 self.data.iter().sum()
257 }
258 #[allow(dead_code)]
259 pub fn normalize(&mut self, factor: u64) {
260 if factor == 0 {
261 return;
262 }
263 for v in &mut self.data {
264 *v /= factor;
265 }
266 }
267}
268#[allow(dead_code)]
269pub struct PgoExtra {
270 pub x: u32,
271}
272impl PgoExtra {
273 #[allow(dead_code)]
274 pub fn new() -> Self {
275 PgoExtra { x: 0 }
276 }
277 #[allow(dead_code)]
278 pub fn value(&self) -> u32 {
279 self.x
280 }
281 #[allow(dead_code)]
282 pub fn increment(&mut self) {
283 self.x += 1;
284 }
285}
286#[allow(dead_code)]
287#[derive(Debug, Clone, PartialEq)]
288pub enum PgoDataFormat {
289 LlvmRaw,
290 LlvmText,
291 GccGcda,
292 SpeeddataAutofdo,
293 PropellerProtobuf,
294}
295impl PgoDataFormat {
296 #[allow(dead_code)]
297 pub fn file_extension(&self) -> &str {
298 match self {
299 PgoDataFormat::LlvmRaw => "profraw",
300 PgoDataFormat::LlvmText => "proftext",
301 PgoDataFormat::GccGcda => "gcda",
302 PgoDataFormat::SpeeddataAutofdo => "afdo",
303 PgoDataFormat::PropellerProtobuf => "propeller",
304 }
305 }
306 #[allow(dead_code)]
307 pub fn merge_tool(&self) -> &str {
308 match self {
309 PgoDataFormat::LlvmRaw | PgoDataFormat::LlvmText => "llvm-profdata",
310 PgoDataFormat::GccGcda => "gcov",
311 PgoDataFormat::SpeeddataAutofdo => "create_gcov",
312 PgoDataFormat::PropellerProtobuf => "propeller_opt",
313 }
314 }
315 #[allow(dead_code)]
316 pub fn emit_merge_command(&self, inputs: &[&str], output: &str) -> String {
317 match self {
318 PgoDataFormat::LlvmRaw => {
319 format!(
320 "llvm-profdata merge -output={} {}",
321 output,
322 inputs.join(" ")
323 )
324 }
325 PgoDataFormat::LlvmText => {
326 format!(
327 "llvm-profdata merge -text -output={} {}",
328 output,
329 inputs.join(" ")
330 )
331 }
332 PgoDataFormat::GccGcda => {
333 format!("gcov --merge {} -o {}", inputs.join(" "), output)
334 }
335 _ => format!("# merge not supported for {:?}", self),
336 }
337 }
338}
339#[allow(dead_code)]
340pub struct ContextSensitiveProfile {
341 pub context_stack: Vec<String>,
342 pub entry_count: u64,
343 pub children: Vec<ContextSensitiveProfile>,
344}
345impl ContextSensitiveProfile {
346 #[allow(dead_code)]
347 pub fn new(context: Vec<String>, count: u64) -> Self {
348 ContextSensitiveProfile {
349 context_stack: context,
350 entry_count: count,
351 children: Vec::new(),
352 }
353 }
354 #[allow(dead_code)]
355 pub fn add_child(&mut self, child: ContextSensitiveProfile) {
356 self.children.push(child);
357 }
358 #[allow(dead_code)]
359 pub fn depth(&self) -> usize {
360 self.context_stack.len()
361 }
362 #[allow(dead_code)]
363 pub fn total_count_in_subtree(&self) -> u64 {
364 let child_total: u64 = self
365 .children
366 .iter()
367 .map(|c| c.total_count_in_subtree())
368 .sum();
369 self.entry_count + child_total
370 }
371 #[allow(dead_code)]
372 pub fn flatten(&self) -> Vec<(&[String], u64)> {
373 let mut result = vec![(self.context_stack.as_slice(), self.entry_count)];
374 for child in &self.children {
375 result.extend(child.flatten());
376 }
377 result
378 }
379}
380#[derive(Debug, Clone)]
382pub struct PgoConfig {
383 pub hot_threshold: u64,
385 pub inline_hot: bool,
387 pub specialize_hot: bool,
389 pub max_inline_size: usize,
391}
392pub struct PgoPass {
398 pub config: PgoConfig,
399 pub profile: ProfileData,
400}
401impl PgoPass {
402 pub fn new(config: PgoConfig) -> Self {
405 Self {
406 config,
407 profile: ProfileData::new(),
408 }
409 }
410 pub fn load_profile(&mut self, data: ProfileData) {
412 self.profile = data;
413 }
414 pub fn should_inline(&self, func_name: &str, size_estimate: usize) -> bool {
422 self.config.inline_hot
423 && self.profile.is_hot(func_name)
424 && size_estimate <= self.config.max_inline_size
425 }
426 pub fn should_specialize(&self, func_name: &str) -> bool {
433 self.config.specialize_hot && self.profile.is_hot(func_name)
434 }
435 pub fn optimize_call_sites(&self, functions: &[(String, usize)]) -> Vec<OptAction> {
441 functions
442 .iter()
443 .enumerate()
444 .map(|(idx, (name, size))| {
445 if self.should_inline(name, *size) {
446 OptAction::Inline {
447 caller: format!("__caller_{}", idx),
448 callee: name.clone(),
449 }
450 } else if self.should_specialize(name) {
451 OptAction::Specialize {
452 func: name.clone(),
453 call_site: idx,
454 }
455 } else {
456 OptAction::Noop
457 }
458 })
459 .collect()
460 }
461}
462#[allow(dead_code)]
463#[derive(Debug, Clone)]
464pub enum InlineHint {
465 AlwaysInline,
466 NeverInline,
467 InlineWithBenefit(f64),
468}
469#[allow(dead_code)]
470#[derive(Debug, Clone)]
471pub struct InstrumentationCounters {
472 pub function_entry: u64,
473 pub branch_taken: Vec<u64>,
474 pub branch_not_taken: Vec<u64>,
475 pub value_profiles: Vec<u64>,
476}
477impl InstrumentationCounters {
478 #[allow(dead_code)]
479 pub fn new(branch_count: usize, value_count: usize) -> Self {
480 InstrumentationCounters {
481 function_entry: 0,
482 branch_taken: vec![0; branch_count],
483 branch_not_taken: vec![0; branch_count],
484 value_profiles: vec![0; value_count],
485 }
486 }
487 #[allow(dead_code)]
488 pub fn record_entry(&mut self) {
489 self.function_entry += 1;
490 }
491 #[allow(dead_code)]
492 pub fn record_branch(&mut self, branch_id: usize, taken: bool) {
493 if branch_id < self.branch_taken.len() {
494 if taken {
495 self.branch_taken[branch_id] += 1;
496 } else {
497 self.branch_not_taken[branch_id] += 1;
498 }
499 }
500 }
501 #[allow(dead_code)]
502 pub fn record_value(&mut self, value_id: usize, _value: u64) {
503 if value_id < self.value_profiles.len() {
504 self.value_profiles[value_id] += 1;
505 }
506 }
507 #[allow(dead_code)]
508 pub fn branch_bias(&self, branch_id: usize) -> Option<f64> {
509 if branch_id >= self.branch_taken.len() {
510 return None;
511 }
512 let taken = self.branch_taken[branch_id];
513 let not_taken = self.branch_not_taken[branch_id];
514 let total = taken + not_taken;
515 if total == 0 {
516 return None;
517 }
518 Some(taken as f64 / total as f64)
519 }
520 #[allow(dead_code)]
521 pub fn serialize(&self) -> Vec<u64> {
522 let mut data = vec![self.function_entry];
523 data.extend_from_slice(&self.branch_taken);
524 data.extend_from_slice(&self.branch_not_taken);
525 data.extend_from_slice(&self.value_profiles);
526 data
527 }
528}
529#[allow(dead_code)]
530pub struct DevirtualizationPass {
531 pub records: Vec<VirtualCallRecord>,
532 pub monomorphic_threshold: f64,
533}
534impl DevirtualizationPass {
535 #[allow(dead_code)]
536 pub fn new() -> Self {
537 DevirtualizationPass {
538 records: Vec::new(),
539 monomorphic_threshold: 0.95,
540 }
541 }
542 #[allow(dead_code)]
543 pub fn add_record(&mut self, rec: VirtualCallRecord) {
544 self.records.push(rec);
545 }
546 #[allow(dead_code)]
547 pub fn devirtualize_candidates(&self) -> Vec<&VirtualCallRecord> {
548 self.records
549 .iter()
550 .filter(|r| {
551 if let Some((_, ratio)) = r.dominant_target() {
552 ratio >= self.monomorphic_threshold
553 } else {
554 false
555 }
556 })
557 .collect()
558 }
559 #[allow(dead_code)]
560 pub fn speculation_candidates(&self) -> Vec<&VirtualCallRecord> {
561 self.records
562 .iter()
563 .filter(|r| r.is_bimorphic() && !r.is_monomorphic())
564 .collect()
565 }
566}
567#[allow(dead_code)]
568pub struct BoltInstrumentationConfig {
569 pub reorder_blocks: bool,
570 pub reorder_functions: bool,
571 pub split_functions: bool,
572 pub dyno_stats: bool,
573 pub plt_call_opt: bool,
574 pub peepholes: bool,
575}
576impl BoltInstrumentationConfig {
577 #[allow(dead_code)]
578 pub fn default_bolt() -> Self {
579 BoltInstrumentationConfig {
580 reorder_blocks: true,
581 reorder_functions: true,
582 split_functions: true,
583 dyno_stats: false,
584 plt_call_opt: true,
585 peepholes: true,
586 }
587 }
588 #[allow(dead_code)]
589 pub fn emit_flags(&self) -> Vec<String> {
590 let mut flags = Vec::new();
591 if self.reorder_blocks {
592 flags.push("--reorder-blocks=ext-tsp".to_string());
593 }
594 if self.reorder_functions {
595 flags.push("--reorder-functions=hfsort".to_string());
596 }
597 if self.split_functions {
598 flags.push("--split-functions".to_string());
599 }
600 if self.dyno_stats {
601 flags.push("--dyno-stats".to_string());
602 }
603 if self.plt_call_opt {
604 flags.push("--plt=hot".to_string());
605 }
606 if self.peepholes {
607 flags.push("--peepholes=all".to_string());
608 }
609 flags
610 }
611}
612#[derive(Debug, Clone, Default)]
614pub struct ProfileData {
615 pub call_counts: HashMap<String, u64>,
617 pub hot_functions: Vec<String>,
619 pub edge_counts: HashMap<(String, String), u64>,
621}
622impl ProfileData {
623 pub fn new() -> Self {
625 Self::default()
626 }
627 pub fn record_call(&mut self, func: &str) {
629 *self.call_counts.entry(func.to_owned()).or_insert(0) += 1;
630 }
631 pub fn record_edge(&mut self, caller: &str, callee: &str) {
633 *self
634 .edge_counts
635 .entry((caller.to_owned(), callee.to_owned()))
636 .or_insert(0) += 1;
637 }
638 pub fn mark_hot(&mut self, threshold: u64) {
642 let mut hot: Vec<(String, u64)> = self
643 .call_counts
644 .iter()
645 .filter(|(_, &count)| count > threshold)
646 .map(|(name, &count)| (name.clone(), count))
647 .collect();
648 hot.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
649 self.hot_functions = hot.into_iter().map(|(name, _)| name).collect();
650 }
651 pub fn is_hot(&self, func: &str) -> bool {
653 self.hot_functions.iter().any(|f| f == func)
654 }
655 pub fn top_k_functions(&self, k: usize) -> Vec<(String, u64)> {
659 let mut entries: Vec<(String, u64)> = self
660 .call_counts
661 .iter()
662 .map(|(name, &count)| (name.clone(), count))
663 .collect();
664 entries.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
665 entries.into_iter().take(k).collect()
666 }
667}
668#[allow(dead_code)]
669#[derive(Debug, Clone)]
670pub struct PgoWorkflow {
671 pub phase: PgoPhase,
672 pub input_profile: Option<String>,
673 pub output_profile: Option<String>,
674 pub optimization_level: u8,
675}
676impl PgoWorkflow {
677 #[allow(dead_code)]
678 pub fn new_instrumentation() -> Self {
679 PgoWorkflow {
680 phase: PgoPhase::Instrumentation,
681 input_profile: None,
682 output_profile: Some("default.profraw".to_string()),
683 optimization_level: 0,
684 }
685 }
686 #[allow(dead_code)]
687 pub fn new_optimization(profile: impl Into<String>) -> Self {
688 PgoWorkflow {
689 phase: PgoPhase::Optimization,
690 input_profile: Some(profile.into()),
691 output_profile: None,
692 optimization_level: 3,
693 }
694 }
695 #[allow(dead_code)]
696 pub fn emit_flags(&self) -> Vec<String> {
697 let mut flags = Vec::new();
698 match self.phase {
699 PgoPhase::Instrumentation => {
700 flags.push("-fprofile-generate".to_string());
701 if let Some(ref out) = self.output_profile {
702 flags.push(format!("-fprofile-dir={}", out));
703 }
704 }
705 PgoPhase::Optimization => {
706 if let Some(ref inp) = self.input_profile {
707 flags.push(format!("-fprofile-use={}", inp));
708 }
709 flags.push(format!("-O{}", self.optimization_level));
710 }
711 _ => {}
712 }
713 flags
714 }
715}
716#[derive(Debug, Clone, PartialEq, Eq)]
718pub enum OptAction {
719 Inline { caller: String, callee: String },
721 Specialize { func: String, call_site: usize },
723 Noop,
725}
726#[allow(dead_code)]
727#[derive(Debug, Clone)]
728pub enum MergeWeightMode {
729 Equal,
730 Proportional,
731 MaxCount,
732}
733#[allow(dead_code)]
734#[derive(Debug, Clone)]
735pub struct VirtualCallRecord {
736 pub callsite_id: u32,
737 pub targets: Vec<(String, u64)>,
738 pub total_calls: u64,
739}
740impl VirtualCallRecord {
741 #[allow(dead_code)]
742 pub fn new(callsite_id: u32) -> Self {
743 VirtualCallRecord {
744 callsite_id,
745 targets: Vec::new(),
746 total_calls: 0,
747 }
748 }
749 #[allow(dead_code)]
750 pub fn record_call(&mut self, type_name: impl Into<String>, count: u64) {
751 let name = type_name.into();
752 if let Some(entry) = self.targets.iter_mut().find(|(n, _)| n == &name) {
753 entry.1 += count;
754 } else {
755 self.targets.push((name, count));
756 }
757 self.total_calls += count;
758 }
759 #[allow(dead_code)]
760 pub fn dominant_target(&self) -> Option<(&str, f64)> {
761 if self.total_calls == 0 {
762 return None;
763 }
764 self.targets
765 .iter()
766 .max_by_key(|(_, c)| c)
767 .map(|(name, count)| (name.as_str(), *count as f64 / self.total_calls as f64))
768 }
769 #[allow(dead_code)]
770 pub fn is_monomorphic(&self) -> bool {
771 if let Some((_, ratio)) = self.dominant_target() {
772 ratio >= 0.99
773 } else {
774 false
775 }
776 }
777 #[allow(dead_code)]
778 pub fn is_bimorphic(&self) -> bool {
779 self.targets.len() == 2
780 }
781}
782#[allow(dead_code)]
783pub struct GlobalInstrumentationRegistry {
784 pub(super) functions: std::collections::HashMap<String, InstrumentationCounters>,
785}
786impl GlobalInstrumentationRegistry {
787 #[allow(dead_code)]
788 pub fn new() -> Self {
789 GlobalInstrumentationRegistry {
790 functions: std::collections::HashMap::new(),
791 }
792 }
793 #[allow(dead_code)]
794 pub fn register(&mut self, name: impl Into<String>, branches: usize, values: usize) {
795 self.functions
796 .insert(name.into(), InstrumentationCounters::new(branches, values));
797 }
798 #[allow(dead_code)]
799 pub fn get_mut(&mut self, name: &str) -> Option<&mut InstrumentationCounters> {
800 self.functions.get_mut(name)
801 }
802 #[allow(dead_code)]
803 pub fn get(&self, name: &str) -> Option<&InstrumentationCounters> {
804 self.functions.get(name)
805 }
806 #[allow(dead_code)]
807 pub fn total_entries(&self) -> u64 {
808 self.functions.values().map(|c| c.function_entry).sum()
809 }
810 #[allow(dead_code)]
811 pub fn function_count(&self) -> usize {
812 self.functions.len()
813 }
814 #[allow(dead_code)]
815 pub fn export_profile(&self) -> Vec<SampleRecord> {
816 self.functions
817 .iter()
818 .map(|(name, counters)| {
819 let mut rec = SampleRecord::new(name.clone());
820 rec.head_samples = counters.function_entry;
821 rec.body_samples = counters.branch_taken.iter().sum::<u64>()
822 + counters.branch_not_taken.iter().sum::<u64>();
823 rec
824 })
825 .collect()
826 }
827}
828#[allow(dead_code)]
829#[derive(Debug, Clone)]
830pub enum PgoDecision {
831 Inlined { callee: String, benefit: f64 },
832 NotInlined { callee: String, reason: String },
833 Unrolled { loop_id: u32, factor: u32 },
834 Vectorized { loop_id: u32, width: u32 },
835 HotColdSplit { function: String },
836 BlockReordered { function: String, blocks: u32 },
837 StackPromotion { site_id: u32 },
838 Devirtualized { callsite: u32, target: String },
839}
840impl PgoDecision {
841 #[allow(dead_code)]
842 pub fn description(&self) -> String {
843 match self {
844 PgoDecision::Inlined { callee, benefit } => {
845 format!("Inlined {} (benefit: {:.2})", callee, benefit)
846 }
847 PgoDecision::NotInlined { callee, reason } => {
848 format!("Not inlined {}: {}", callee, reason)
849 }
850 PgoDecision::Unrolled { loop_id, factor } => {
851 format!("Unrolled loop {} by {}x", loop_id, factor)
852 }
853 PgoDecision::Vectorized { loop_id, width } => {
854 format!("Vectorized loop {} with width {}", loop_id, width)
855 }
856 PgoDecision::HotColdSplit { function } => {
857 format!("Hot-cold split: {}", function)
858 }
859 PgoDecision::BlockReordered { function, blocks } => {
860 format!("Reordered {} blocks in {}", blocks, function)
861 }
862 PgoDecision::StackPromotion { site_id } => {
863 format!("Stack promotion at site {}", site_id)
864 }
865 PgoDecision::Devirtualized { callsite, target } => {
866 format!("Devirtualized callsite {} -> {}", callsite, target)
867 }
868 }
869 }
870 #[allow(dead_code)]
871 pub fn is_beneficial(&self) -> bool {
872 !matches!(self, PgoDecision::NotInlined { .. })
873 }
874}
875#[allow(dead_code)]
876#[derive(Debug, Clone)]
877pub struct ProfileSummary {
878 pub total_samples: u64,
879 pub max_function_count: u64,
880 pub num_functions: usize,
881 pub detailed_summary: Vec<(u32, u64, u64)>,
882}
883impl ProfileSummary {
884 #[allow(dead_code)]
885 pub fn new() -> Self {
886 ProfileSummary {
887 total_samples: 0,
888 max_function_count: 0,
889 num_functions: 0,
890 detailed_summary: Vec::new(),
891 }
892 }
893 #[allow(dead_code)]
894 pub fn compute_from_profiles(profiles: &[FunctionProfile]) -> Self {
895 let total_samples: u64 = profiles.iter().map(|p| p.total_calls).sum();
896 let max_function_count = profiles.iter().map(|p| p.total_calls).max().unwrap_or(0);
897 ProfileSummary {
898 total_samples,
899 max_function_count,
900 num_functions: profiles.len(),
901 detailed_summary: Vec::new(),
902 }
903 }
904 #[allow(dead_code)]
905 pub fn is_empty(&self) -> bool {
906 self.total_samples == 0
907 }
908}
909#[allow(dead_code)]
910pub struct PgoInfrastructure {
911 pub enabled: bool,
912 pub phase: String,
913}
914impl PgoInfrastructure {
915 #[allow(dead_code)]
916 pub fn new(phase: impl Into<String>) -> Self {
917 PgoInfrastructure {
918 enabled: true,
919 phase: phase.into(),
920 }
921 }
922 #[allow(dead_code)]
923 pub fn phase(&self) -> &str {
924 &self.phase
925 }
926}
927#[allow(dead_code)]
928pub struct SampleRecord {
929 pub function_name: String,
930 pub head_samples: u64,
931 pub body_samples: u64,
932 pub callsites: Vec<(u64, String, u64)>,
933}
934impl SampleRecord {
935 #[allow(dead_code)]
936 pub fn new(function_name: impl Into<String>) -> Self {
937 SampleRecord {
938 function_name: function_name.into(),
939 head_samples: 0,
940 body_samples: 0,
941 callsites: Vec::new(),
942 }
943 }
944 #[allow(dead_code)]
945 pub fn add_callsite(&mut self, offset: u64, callee: impl Into<String>, count: u64) {
946 self.callsites.push((offset, callee.into(), count));
947 }
948 #[allow(dead_code)]
949 pub fn total_samples(&self) -> u64 {
950 self.head_samples + self.body_samples
951 }
952 #[allow(dead_code)]
953 pub fn emit_prof_text(&self) -> String {
954 let mut out = format!("{}:{}\n", self.function_name, self.head_samples);
955 for (offset, callee, count) in &self.callsites {
956 out.push_str(&format!(" {}: {} {} {}\n", offset, callee, count, count));
957 }
958 out
959 }
960}
961pub struct InstrumentationPass;
964impl InstrumentationPass {
965 pub fn new() -> Self {
967 Self
968 }
969 pub fn instrument_function(&self, name: &str) -> String {
974 format!(
975 "/* [PGO] instrumentation stub for `{}` */\n\
976 __pgo_counter_increment(\"{}\");",
977 name, name
978 )
979 }
980 pub fn generate_profile_report(&self, data: &ProfileData) -> String {
982 let mut lines: Vec<String> = Vec::new();
983 lines.push("=== PGO Profile Report ===".to_owned());
984 lines.push(format!("Hot functions ({}):", data.hot_functions.len()));
985 for f in &data.hot_functions {
986 let count = data.call_counts.get(f).copied().unwrap_or(0);
987 lines.push(format!(" {} — {} calls", f, count));
988 }
989 lines.push(format!(
990 "Total tracked functions: {}",
991 data.call_counts.len()
992 ));
993 lines.push(format!(
994 "Total call-graph edges: {}",
995 data.edge_counts.len()
996 ));
997 lines.join("\n")
998 }
999}
1000#[allow(dead_code)]
1001pub struct PgoPassManager {
1002 pub passes: Vec<String>,
1003 pub feedback: PgoFeedback,
1004 pub heuristic: InlineHeuristic,
1005 pub hot_cold_split: HotColdSplit,
1006}
1007impl PgoPassManager {
1008 #[allow(dead_code)]
1009 pub fn new(feedback: PgoFeedback) -> Self {
1010 PgoPassManager {
1011 passes: Vec::new(),
1012 feedback,
1013 heuristic: InlineHeuristic::default_heuristic(),
1014 hot_cold_split: HotColdSplit::new(80.0),
1015 }
1016 }
1017 #[allow(dead_code)]
1018 pub fn add_pass(&mut self, pass: impl Into<String>) {
1019 self.passes.push(pass.into());
1020 }
1021 #[allow(dead_code)]
1022 pub fn compute_hot_cold(&mut self) {
1023 let profiles = self.feedback.profiles.clone();
1024 self.hot_cold_split.classify(&profiles);
1025 }
1026 #[allow(dead_code)]
1027 pub fn is_hot_function(&self, name: &str) -> bool {
1028 self.hot_cold_split.hot_functions.iter().any(|f| f == name)
1029 }
1030 #[allow(dead_code)]
1031 pub fn pass_count(&self) -> usize {
1032 self.passes.len()
1033 }
1034}
1035#[allow(dead_code)]
1036#[derive(Debug, Clone)]
1037pub struct LoopIterationProfile {
1038 pub loop_id: u32,
1039 pub function_name: String,
1040 pub iteration_counts: Vec<u64>,
1041 pub trip_count_avg: f64,
1042 pub trip_count_max: u64,
1043}
1044impl LoopIterationProfile {
1045 #[allow(dead_code)]
1046 pub fn new(loop_id: u32, function_name: impl Into<String>) -> Self {
1047 LoopIterationProfile {
1048 loop_id,
1049 function_name: function_name.into(),
1050 iteration_counts: Vec::new(),
1051 trip_count_avg: 0.0,
1052 trip_count_max: 0,
1053 }
1054 }
1055 #[allow(dead_code)]
1056 pub fn record_execution(&mut self, iterations: u64) {
1057 self.iteration_counts.push(iterations);
1058 if iterations > self.trip_count_max {
1059 self.trip_count_max = iterations;
1060 }
1061 let sum: u64 = self.iteration_counts.iter().sum();
1062 self.trip_count_avg = sum as f64 / self.iteration_counts.len() as f64;
1063 }
1064 #[allow(dead_code)]
1065 pub fn is_constant_trip_count(&self) -> bool {
1066 if self.iteration_counts.len() < 2 {
1067 return true;
1068 }
1069 let first = self.iteration_counts[0];
1070 self.iteration_counts.iter().all(|&c| c == first)
1071 }
1072 #[allow(dead_code)]
1073 pub fn estimated_unroll_factor(&self) -> u32 {
1074 if self.trip_count_avg <= 1.0 {
1075 return 1;
1076 }
1077 if self.trip_count_avg <= 4.0 {
1078 return 2;
1079 }
1080 if self.trip_count_avg <= 8.0 {
1081 return 4;
1082 }
1083 if self.trip_count_avg <= 16.0 {
1084 return 8;
1085 }
1086 16
1087 }
1088}
1089#[allow(dead_code)]
1090pub struct PgoFeedback {
1091 pub run_count: u32,
1092 pub profiles: Vec<FunctionProfile>,
1093 pub call_graph: CallGraph,
1094}
1095impl PgoFeedback {
1096 #[allow(dead_code)]
1097 pub fn new() -> Self {
1098 PgoFeedback {
1099 run_count: 0,
1100 profiles: Vec::new(),
1101 call_graph: CallGraph::new(),
1102 }
1103 }
1104 #[allow(dead_code)]
1105 pub fn add_profile(&mut self, profile: FunctionProfile) {
1106 self.profiles.push(profile);
1107 }
1108 #[allow(dead_code)]
1109 pub fn increment_run(&mut self) {
1110 self.run_count += 1;
1111 }
1112 #[allow(dead_code)]
1113 pub fn normalize_counts(&mut self) {
1114 if self.run_count == 0 {
1115 return;
1116 }
1117 for p in &mut self.profiles {
1118 p.total_calls /= self.run_count as u64;
1119 for b in &mut p.blocks {
1120 b.execution_count /= self.run_count as u64;
1121 }
1122 }
1123 }
1124 #[allow(dead_code)]
1125 pub fn top_hot_functions(&self, n: usize, threshold: u64) -> Vec<&FunctionProfile> {
1126 let mut hot: Vec<&FunctionProfile> = self
1127 .profiles
1128 .iter()
1129 .filter(|p| p.is_hot_function(threshold))
1130 .collect();
1131 hot.sort_by(|a, b| b.total_calls.cmp(&a.total_calls));
1132 hot.truncate(n);
1133 hot
1134 }
1135}
1136#[allow(dead_code)]
1137pub struct SampleBasedProfileGenerator {
1138 pub stack_traces: Vec<Vec<String>>,
1139 pub sample_interval: u64,
1140}
1141impl SampleBasedProfileGenerator {
1142 #[allow(dead_code)]
1143 pub fn new(sample_interval: u64) -> Self {
1144 SampleBasedProfileGenerator {
1145 stack_traces: Vec::new(),
1146 sample_interval,
1147 }
1148 }
1149 #[allow(dead_code)]
1150 pub fn add_trace(&mut self, trace: Vec<String>) {
1151 self.stack_traces.push(trace);
1152 }
1153 #[allow(dead_code)]
1154 pub fn build_flat_profile(&self) -> std::collections::HashMap<String, u64> {
1155 let mut counts = std::collections::HashMap::new();
1156 for trace in &self.stack_traces {
1157 if let Some(top) = trace.first() {
1158 *counts.entry(top.clone()).or_insert(0) += 1;
1159 }
1160 }
1161 counts
1162 }
1163 #[allow(dead_code)]
1164 pub fn build_inclusive_profile(&self) -> std::collections::HashMap<String, u64> {
1165 let mut counts = std::collections::HashMap::new();
1166 for trace in &self.stack_traces {
1167 let mut seen = std::collections::HashSet::new();
1168 for frame in trace {
1169 if seen.insert(frame.clone()) {
1170 *counts.entry(frame.clone()).or_insert(0) += 1;
1171 }
1172 }
1173 }
1174 counts
1175 }
1176 #[allow(dead_code)]
1177 pub fn top_functions(&self, n: usize) -> Vec<(String, u64)> {
1178 let profile = self.build_flat_profile();
1179 let mut entries: Vec<(String, u64)> = profile.into_iter().collect();
1180 entries.sort_by(|a, b| b.1.cmp(&a.1));
1181 entries.truncate(n);
1182 entries
1183 }
1184}
1185#[allow(dead_code)]
1186pub struct AutoFdoConfig {
1187 pub perf_data_file: String,
1188 pub binary_path: String,
1189 pub profile_output: String,
1190 pub sampling_frequency: u32,
1191}
1192impl AutoFdoConfig {
1193 #[allow(dead_code)]
1194 pub fn new(binary: impl Into<String>) -> Self {
1195 AutoFdoConfig {
1196 perf_data_file: "perf.data".to_string(),
1197 binary_path: binary.into(),
1198 profile_output: "profile.afdo".to_string(),
1199 sampling_frequency: 4000,
1200 }
1201 }
1202 #[allow(dead_code)]
1203 pub fn emit_perf_command(&self) -> String {
1204 format!(
1205 "perf record -e cycles:u -j any,u -a -F {} -o {} -- {}",
1206 self.sampling_frequency, self.perf_data_file, self.binary_path
1207 )
1208 }
1209 #[allow(dead_code)]
1210 pub fn emit_create_gcov_command(&self) -> String {
1211 format!(
1212 "create_gcov --binary={} --profile={} --gcov={}",
1213 self.binary_path, self.perf_data_file, self.profile_output
1214 )
1215 }
1216}
1217#[allow(dead_code)]
1218#[derive(Debug, Clone)]
1219pub struct FunctionProfile {
1220 pub name: String,
1221 pub total_calls: u64,
1222 pub blocks: Vec<BlockProfile>,
1223 pub edges: Vec<EdgeProfile>,
1224 pub average_call_depth: f64,
1225}
1226impl FunctionProfile {
1227 #[allow(dead_code)]
1228 pub fn new(name: impl Into<String>) -> Self {
1229 FunctionProfile {
1230 name: name.into(),
1231 total_calls: 0,
1232 blocks: Vec::new(),
1233 edges: Vec::new(),
1234 average_call_depth: 0.0,
1235 }
1236 }
1237 #[allow(dead_code)]
1238 pub fn add_block(&mut self, block_id: u32, count: u64, hot_threshold: u64) {
1239 self.blocks.push(BlockProfile {
1240 block_id,
1241 execution_count: count,
1242 is_hot: count >= hot_threshold,
1243 });
1244 }
1245 #[allow(dead_code)]
1246 pub fn hot_blocks(&self) -> Vec<&BlockProfile> {
1247 self.blocks.iter().filter(|b| b.is_hot).collect()
1248 }
1249 #[allow(dead_code)]
1250 pub fn total_block_executions(&self) -> u64 {
1251 self.blocks.iter().map(|b| b.execution_count).sum()
1252 }
1253 #[allow(dead_code)]
1254 pub fn is_hot_function(&self, threshold: u64) -> bool {
1255 self.total_calls >= threshold
1256 }
1257}
1258#[allow(dead_code)]
1259#[derive(Debug, Clone)]
1260pub enum PgoPhase {
1261 Instrumentation,
1262 Training,
1263 Optimization,
1264 Verification,
1265}
1266#[allow(dead_code)]
1267pub struct CoverageReport {
1268 pub total_lines: u64,
1269 pub covered_lines: u64,
1270 pub total_branches: u64,
1271 pub covered_branches: u64,
1272 pub function_coverage: Vec<(String, bool)>,
1273}
1274impl CoverageReport {
1275 #[allow(dead_code)]
1276 pub fn new() -> Self {
1277 CoverageReport {
1278 total_lines: 0,
1279 covered_lines: 0,
1280 total_branches: 0,
1281 covered_branches: 0,
1282 function_coverage: Vec::new(),
1283 }
1284 }
1285 #[allow(dead_code)]
1286 pub fn line_coverage_pct(&self) -> f64 {
1287 if self.total_lines == 0 {
1288 return 0.0;
1289 }
1290 (self.covered_lines as f64 / self.total_lines as f64) * 100.0
1291 }
1292 #[allow(dead_code)]
1293 pub fn branch_coverage_pct(&self) -> f64 {
1294 if self.total_branches == 0 {
1295 return 0.0;
1296 }
1297 (self.covered_branches as f64 / self.total_branches as f64) * 100.0
1298 }
1299 #[allow(dead_code)]
1300 pub fn function_coverage_pct(&self) -> f64 {
1301 if self.function_coverage.is_empty() {
1302 return 0.0;
1303 }
1304 let covered = self.function_coverage.iter().filter(|(_, c)| *c).count();
1305 (covered as f64 / self.function_coverage.len() as f64) * 100.0
1306 }
1307 #[allow(dead_code)]
1308 pub fn add_function(&mut self, name: impl Into<String>, covered: bool) {
1309 self.function_coverage.push((name.into(), covered));
1310 }
1311 #[allow(dead_code)]
1312 pub fn summary(&self) -> String {
1313 format!(
1314 "Lines: {:.1}%, Branches: {:.1}%, Functions: {:.1}%",
1315 self.line_coverage_pct(),
1316 self.branch_coverage_pct(),
1317 self.function_coverage_pct()
1318 )
1319 }
1320}
1321#[allow(dead_code)]
1322#[derive(Debug, Clone)]
1323pub struct PropellerEdge {
1324 pub from_addr: u64,
1325 pub to_addr: u64,
1326 pub weight: u64,
1327}
1328#[allow(dead_code)]
1329#[derive(Debug, Clone)]
1330pub struct BlockProfile {
1331 pub block_id: u32,
1332 pub execution_count: u64,
1333 pub is_hot: bool,
1334}
1335#[allow(dead_code)]
1336#[derive(Debug, Clone)]
1337pub struct InlineHeuristic {
1338 pub call_count_threshold: u64,
1339 pub size_limit: usize,
1340 pub depth_limit: u32,
1341 pub benefit_multiplier: f64,
1342}
1343impl InlineHeuristic {
1344 #[allow(dead_code)]
1345 pub fn aggressive() -> Self {
1346 InlineHeuristic {
1347 call_count_threshold: 10,
1348 size_limit: 500,
1349 depth_limit: 10,
1350 benefit_multiplier: 2.0,
1351 }
1352 }
1353 #[allow(dead_code)]
1354 pub fn conservative() -> Self {
1355 InlineHeuristic {
1356 call_count_threshold: 100,
1357 size_limit: 50,
1358 depth_limit: 3,
1359 benefit_multiplier: 0.5,
1360 }
1361 }
1362 #[allow(dead_code)]
1363 pub fn default_heuristic() -> Self {
1364 InlineHeuristic {
1365 call_count_threshold: 50,
1366 size_limit: 100,
1367 depth_limit: 5,
1368 benefit_multiplier: 1.0,
1369 }
1370 }
1371 #[allow(dead_code)]
1372 pub fn should_inline(&self, call_count: u64, callee_size: usize, current_depth: u32) -> bool {
1373 call_count >= self.call_count_threshold
1374 && callee_size <= self.size_limit
1375 && current_depth <= self.depth_limit
1376 }
1377 #[allow(dead_code)]
1378 pub fn compute_benefit(&self, call_count: u64, callee_size: usize) -> f64 {
1379 let base = call_count as f64 / (callee_size as f64 + 1.0);
1380 base * self.benefit_multiplier
1381 }
1382}
1383#[allow(dead_code)]
1384#[derive(Debug, Clone)]
1385pub struct HotColdSplit {
1386 pub hot_functions: Vec<String>,
1387 pub cold_functions: Vec<String>,
1388 pub hot_threshold_percentile: f64,
1389}
1390impl HotColdSplit {
1391 #[allow(dead_code)]
1392 pub fn new(hot_threshold: f64) -> Self {
1393 HotColdSplit {
1394 hot_functions: Vec::new(),
1395 cold_functions: Vec::new(),
1396 hot_threshold_percentile: hot_threshold,
1397 }
1398 }
1399 #[allow(dead_code)]
1400 pub fn classify(&mut self, profiles: &[FunctionProfile]) {
1401 if profiles.is_empty() {
1402 return;
1403 }
1404 let mut counts: Vec<(String, u64)> = profiles
1405 .iter()
1406 .map(|p| (p.name.clone(), p.total_calls))
1407 .collect();
1408 counts.sort_by(|a, b| b.1.cmp(&a.1));
1409 let total: u64 = counts.iter().map(|(_, c)| c).sum();
1410 let threshold = (total as f64 * self.hot_threshold_percentile / 100.0) as u64;
1411 let mut cumulative = 0u64;
1412 for (name, count) in &counts {
1413 cumulative += count;
1414 if cumulative <= threshold {
1415 self.hot_functions.push(name.clone());
1416 } else {
1417 self.cold_functions.push(name.clone());
1418 }
1419 }
1420 }
1421 #[allow(dead_code)]
1422 pub fn hot_count(&self) -> usize {
1423 self.hot_functions.len()
1424 }
1425 #[allow(dead_code)]
1426 pub fn cold_count(&self) -> usize {
1427 self.cold_functions.len()
1428 }
1429}
1430#[allow(dead_code)]
1431#[derive(Debug, Clone)]
1432pub struct CallGraph {
1433 pub edges: Vec<(String, String, u64)>,
1434}
1435impl CallGraph {
1436 #[allow(dead_code)]
1437 pub fn new() -> Self {
1438 CallGraph { edges: Vec::new() }
1439 }
1440 #[allow(dead_code)]
1441 pub fn add_edge(&mut self, caller: impl Into<String>, callee: impl Into<String>, count: u64) {
1442 self.edges.push((caller.into(), callee.into(), count));
1443 }
1444 #[allow(dead_code)]
1445 pub fn callers_of(&self, target: &str) -> Vec<(&str, u64)> {
1446 self.edges
1447 .iter()
1448 .filter(|(_, callee, _)| callee == target)
1449 .map(|(caller, _, count)| (caller.as_str(), *count))
1450 .collect()
1451 }
1452 #[allow(dead_code)]
1453 pub fn callees_of(&self, source: &str) -> Vec<(&str, u64)> {
1454 self.edges
1455 .iter()
1456 .filter(|(caller, _, _)| caller == source)
1457 .map(|(_, callee, count)| (callee.as_str(), *count))
1458 .collect()
1459 }
1460 #[allow(dead_code)]
1461 pub fn total_call_count(&self) -> u64 {
1462 self.edges.iter().map(|(_, _, c)| c).sum()
1463 }
1464 #[allow(dead_code)]
1465 pub fn hot_call_sites(&self, threshold: u64) -> Vec<(&str, &str, u64)> {
1466 self.edges
1467 .iter()
1468 .filter(|(_, _, c)| *c >= threshold)
1469 .map(|(caller, callee, count)| (caller.as_str(), callee.as_str(), *count))
1470 .collect()
1471 }
1472}
1473#[allow(dead_code)]
1474#[derive(Debug, Clone)]
1475pub struct MemoryAccessPattern {
1476 pub access_id: u32,
1477 pub is_sequential: bool,
1478 pub stride: i64,
1479 pub cache_hit_rate: f64,
1480 pub access_count: u64,
1481}
1482impl MemoryAccessPattern {
1483 #[allow(dead_code)]
1484 pub fn new(access_id: u32) -> Self {
1485 MemoryAccessPattern {
1486 access_id,
1487 is_sequential: false,
1488 stride: 0,
1489 cache_hit_rate: 0.0,
1490 access_count: 0,
1491 }
1492 }
1493 #[allow(dead_code)]
1494 pub fn is_cache_friendly(&self) -> bool {
1495 self.is_sequential && self.stride > 0 && self.stride <= 64 && self.cache_hit_rate >= 0.9
1496 }
1497 #[allow(dead_code)]
1498 pub fn prefetch_distance(&self) -> i64 {
1499 if self.is_sequential {
1500 8
1501 } else {
1502 0
1503 }
1504 }
1505}
1506#[allow(dead_code)]
1507pub struct AllocationProfile {
1508 pub function_name: String,
1509 pub allocation_sites: Vec<AllocationSiteProfile>,
1510}
1511#[allow(dead_code)]
1512impl AllocationProfile {
1513 pub fn new(function_name: impl Into<String>) -> Self {
1514 AllocationProfile {
1515 function_name: function_name.into(),
1516 allocation_sites: Vec::new(),
1517 }
1518 }
1519 #[allow(dead_code)]
1520 pub fn add_site(&mut self, site: AllocationSiteProfile) {
1521 self.allocation_sites.push(site);
1522 }
1523 #[allow(dead_code)]
1524 pub fn stack_promotion_candidates(&self) -> Vec<&AllocationSiteProfile> {
1525 self.allocation_sites
1526 .iter()
1527 .filter(|s| s.stack_promotion_candidate())
1528 .collect()
1529 }
1530 #[allow(dead_code)]
1531 pub fn total_allocations(&self) -> u64 {
1532 self.allocation_sites.iter().map(|s| s.alloc_count).sum()
1533 }
1534}
1535#[allow(dead_code)]
1536pub struct PgoOptimizationLog {
1537 pub(super) decisions: Vec<(String, PgoDecision)>,
1538 pub(super) total_beneficial: u32,
1539 pub(super) total_non_beneficial: u32,
1540}
1541impl PgoOptimizationLog {
1542 #[allow(dead_code)]
1543 pub fn new() -> Self {
1544 PgoOptimizationLog {
1545 decisions: Vec::new(),
1546 total_beneficial: 0,
1547 total_non_beneficial: 0,
1548 }
1549 }
1550 #[allow(dead_code)]
1551 pub fn record(&mut self, function: impl Into<String>, decision: PgoDecision) {
1552 if decision.is_beneficial() {
1553 self.total_beneficial += 1;
1554 } else {
1555 self.total_non_beneficial += 1;
1556 }
1557 self.decisions.push((function.into(), decision));
1558 }
1559 #[allow(dead_code)]
1560 pub fn generate_report(&self) -> String {
1561 let mut out = String::new();
1562 out.push_str(&format!("PGO Optimization Report:\n"));
1563 out.push_str(&format!(" Total decisions: {}\n", self.decisions.len()));
1564 out.push_str(&format!(" Beneficial: {}\n", self.total_beneficial));
1565 out.push_str(&format!(
1566 " Non-beneficial: {}\n\n",
1567 self.total_non_beneficial
1568 ));
1569 for (func, decision) in &self.decisions {
1570 out.push_str(&format!(" [{}] {}\n", func, decision.description()));
1571 }
1572 out
1573 }
1574 #[allow(dead_code)]
1575 pub fn filter_by_function(&self, name: &str) -> Vec<&PgoDecision> {
1576 self.decisions
1577 .iter()
1578 .filter(|(f, _)| f == name)
1579 .map(|(_, d)| d)
1580 .collect()
1581 }
1582 #[allow(dead_code)]
1583 pub fn inline_decisions(&self) -> Vec<&PgoDecision> {
1584 self.decisions
1585 .iter()
1586 .filter(|(_, d)| {
1587 matches!(
1588 d,
1589 PgoDecision::Inlined { .. } | PgoDecision::NotInlined { .. }
1590 )
1591 })
1592 .map(|(_, d)| d)
1593 .collect()
1594 }
1595}
1596#[allow(dead_code)]
1597#[derive(Debug, Clone)]
1598pub struct AllocationSiteProfile {
1599 pub site_id: u32,
1600 pub alloc_count: u64,
1601 pub avg_size: f64,
1602 pub max_size: u64,
1603 pub live_at_exit: u64,
1604}
1605impl AllocationSiteProfile {
1606 #[allow(dead_code)]
1607 pub fn new(site_id: u32) -> Self {
1608 AllocationSiteProfile {
1609 site_id,
1610 alloc_count: 0,
1611 avg_size: 0.0,
1612 max_size: 0,
1613 live_at_exit: 0,
1614 }
1615 }
1616 #[allow(dead_code)]
1617 pub fn is_short_lived(&self) -> bool {
1618 self.alloc_count > 0 && self.live_at_exit == 0
1619 }
1620 #[allow(dead_code)]
1621 pub fn stack_promotion_candidate(&self) -> bool {
1622 self.is_short_lived() && self.max_size <= 4096
1623 }
1624}
1625#[allow(dead_code)]
1626pub struct PgoStatisticsReport {
1627 pub total_functions: usize,
1628 pub hot_functions: usize,
1629 pub cold_functions: usize,
1630 pub inlined_callsites: usize,
1631 pub devirtualized_sites: usize,
1632 pub stack_promoted_sites: usize,
1633 pub blocks_reordered: u64,
1634 pub loops_unrolled: usize,
1635 pub loops_vectorized: usize,
1636}
1637impl PgoStatisticsReport {
1638 #[allow(dead_code)]
1639 pub fn new() -> Self {
1640 PgoStatisticsReport {
1641 total_functions: 0,
1642 hot_functions: 0,
1643 cold_functions: 0,
1644 inlined_callsites: 0,
1645 devirtualized_sites: 0,
1646 stack_promoted_sites: 0,
1647 blocks_reordered: 0,
1648 loops_unrolled: 0,
1649 loops_vectorized: 0,
1650 }
1651 }
1652 #[allow(dead_code)]
1653 pub fn from_log(log: &PgoOptimizationLog) -> Self {
1654 let mut rep = Self::new();
1655 for (_, decision) in &log.decisions {
1656 match decision {
1657 PgoDecision::Inlined { .. } => rep.inlined_callsites += 1,
1658 PgoDecision::Devirtualized { .. } => rep.devirtualized_sites += 1,
1659 PgoDecision::StackPromotion { .. } => rep.stack_promoted_sites += 1,
1660 PgoDecision::Unrolled { .. } => rep.loops_unrolled += 1,
1661 PgoDecision::Vectorized { .. } => rep.loops_vectorized += 1,
1662 PgoDecision::BlockReordered { blocks, .. } => {
1663 rep.blocks_reordered += *blocks as u64;
1664 }
1665 _ => {}
1666 }
1667 }
1668 rep
1669 }
1670 #[allow(dead_code)]
1671 pub fn format_summary(&self) -> String {
1672 format!(
1673 "Functions: {} ({} hot, {} cold)\n\
1674 Inlined: {} callsites\n\
1675 Devirtualized: {} sites\n\
1676 Stack promoted: {} sites\n\
1677 Loops unrolled: {}, vectorized: {}\n\
1678 Blocks reordered: {}",
1679 self.total_functions,
1680 self.hot_functions,
1681 self.cold_functions,
1682 self.inlined_callsites,
1683 self.devirtualized_sites,
1684 self.stack_promoted_sites,
1685 self.loops_unrolled,
1686 self.loops_vectorized,
1687 self.blocks_reordered
1688 )
1689 }
1690}
1691#[allow(dead_code)]
1692#[derive(Debug, Clone)]
1693pub struct PropellerFunctionInfo {
1694 pub name: String,
1695 pub address: u64,
1696 pub size: u64,
1697 pub entry_count: u64,
1698}
1699#[allow(dead_code)]
1700#[derive(Debug, Clone)]
1701pub struct PropellerProfile {
1702 pub binary_id: String,
1703 pub hot_functions: Vec<PropellerFunctionInfo>,
1704 pub cfg_edges: Vec<PropellerEdge>,
1705}
1706impl PropellerProfile {
1707 #[allow(dead_code)]
1708 pub fn new(binary_id: impl Into<String>) -> Self {
1709 PropellerProfile {
1710 binary_id: binary_id.into(),
1711 hot_functions: Vec::new(),
1712 cfg_edges: Vec::new(),
1713 }
1714 }
1715 #[allow(dead_code)]
1716 pub fn add_function(&mut self, func: PropellerFunctionInfo) {
1717 self.hot_functions.push(func);
1718 }
1719 #[allow(dead_code)]
1720 pub fn add_edge(&mut self, edge: PropellerEdge) {
1721 self.cfg_edges.push(edge);
1722 }
1723 #[allow(dead_code)]
1724 pub fn total_edge_weight(&self) -> u64 {
1725 self.cfg_edges.iter().map(|e| e.weight).sum()
1726 }
1727 #[allow(dead_code)]
1728 pub fn emit_protobuf_format(&self) -> String {
1729 let mut out = format!("binary_id: \"{}\"\n", self.binary_id);
1730 for f in &self.hot_functions {
1731 out.push_str(&format!(
1732 "function {{ name: \"{}\" addr: {} size: {} count: {} }}\n",
1733 f.name, f.address, f.size, f.entry_count
1734 ));
1735 }
1736 out
1737 }
1738}