phago_runtime/
training_format.rs1use crate::curriculum::Curriculum;
7use crate::export::WeightedTriple;
8use serde::Serialize;
9
10#[derive(Debug, Clone, Serialize)]
12pub struct TrainingExample {
13 pub instruction: String,
14 pub input: String,
15 pub output: String,
16 pub weight: f64,
17 pub section: String,
18}
19
20pub fn to_jsonl(curriculum: &Curriculum) -> String {
22 let mut lines = Vec::new();
23
24 for triple in &curriculum.foundation {
25 lines.push(triple_to_example(triple, "foundation"));
26 }
27 for triple in &curriculum.bridges {
28 lines.push(triple_to_example(triple, "bridge"));
29 }
30 for triple in &curriculum.periphery {
31 lines.push(triple_to_example(triple, "periphery"));
32 }
33
34 lines.iter()
35 .filter_map(|ex| serde_json::to_string(ex).ok())
36 .collect::<Vec<_>>()
37 .join("\n")
38}
39
40pub fn to_jsonl_random(curriculum: &Curriculum, seed: u64) -> String {
42 let mut all_triples: Vec<(&WeightedTriple, &str)> = Vec::new();
43 for t in &curriculum.foundation { all_triples.push((t, "foundation")); }
44 for t in &curriculum.bridges { all_triples.push((t, "bridge")); }
45 for t in &curriculum.periphery { all_triples.push((t, "periphery")); }
46
47 let mut indices: Vec<usize> = (0..all_triples.len()).collect();
49 let mut rng = seed;
50 for i in (1..indices.len()).rev() {
51 rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
52 let j = (rng >> 33) as usize % (i + 1);
53 indices.swap(i, j);
54 }
55
56 let lines: Vec<String> = indices.iter()
57 .filter_map(|&i| {
58 let (triple, section) = all_triples.get(i)?;
59 let ex = triple_to_example(triple, section);
60 serde_json::to_string(&ex).ok()
61 })
62 .collect();
63
64 lines.join("\n")
65}
66
67fn triple_to_example(triple: &WeightedTriple, section: &str) -> TrainingExample {
68 TrainingExample {
69 instruction: format!(
70 "What is the relationship between '{}' and '{}'?",
71 triple.subject, triple.object
72 ),
73 input: String::new(),
74 output: format!(
75 "'{}' is {} '{}'. This is a {} concept with connection strength {:.2}.",
76 triple.subject,
77 triple.predicate,
78 triple.object,
79 section,
80 triple.weight,
81 ),
82 weight: triple.weight,
83 section: section.to_string(),
84 }
85}
86
87pub fn section_counts(curriculum: &Curriculum) -> (usize, usize, usize) {
89 (
90 curriculum.foundation.len(),
91 curriculum.bridges.len(),
92 curriculum.periphery.len(),
93 )
94}