ocsf_codegen/
lib.rs

1#[macro_use]
2extern crate lazy_static;
3
4use std::collections::HashMap;
5use std::error::Error;
6use std::fs::{create_dir_all, File};
7use std::io::{BufReader, BufWriter, Read, Write};
8use std::path::PathBuf;
9
10use chrono::Utc;
11use codegen::Scope;
12use itertools::Itertools;
13use log::*;
14use regex::Regex;
15use serde_json::{self, Value};
16use walkdir::{DirEntry, WalkDir};
17
18pub mod categories;
19pub mod dictionary;
20pub mod enums;
21pub mod errors;
22pub mod events;
23pub mod module;
24pub mod objects;
25pub mod other;
26pub mod profiles;
27use categories::*;
28use dictionary::*;
29use enums::*;
30pub use errors::*;
31use events::*;
32use objects::*;
33use other::*;
34use profiles::*;
35
36lazy_static! {
37    static ref URL_FINDER: Regex = Regex::new(r#"(?P<url>\w+://[^<\s]+)"#).unwrap();
38}
39
40#[allow(dead_code)]
41type ClassesHashMap = HashMap<&'static str, HashMap<String, ClassType>>;
42
43#[derive(Debug)]
44pub enum ClassType {
45    Event { value: EventDef },
46    Enum { value: EnumDef },
47}
48
49#[derive(Debug)]
50pub enum ClassPath {
51    Enums { class_path: String },
52    Event { class_path: String },
53    Unknown,
54}
55pub fn find_files(schema_path: &str) -> Vec<String> {
56    debug!("looking for schema files in {schema_path}");
57    let files: Vec<DirEntry> = WalkDir::new(schema_path)
58        .into_iter()
59        .filter_map(|p| p.ok())
60        .collect();
61    info!("Found {} files to process.", files.len());
62    files
63        .iter()
64        .filter_map(|p| match p.path().is_file() {
65            true => Some(p.path().to_string_lossy().into()),
66            false => None,
67        })
68        .collect()
69}
70
71pub fn filename_to_classpath(schema_base_path: &str, filename: &str) -> ClassPath {
72    let fname = filename.to_owned().replace(schema_base_path, "");
73    if fname.starts_with("enums/") {
74        let class_path = fname.replace("enums/", "").replace(".json", "");
75        return ClassPath::Enums { class_path };
76    } else if fname.starts_with("events/") {
77        let class_path = fname.replace("event/", "").replace(".json", "");
78        return ClassPath::Event { class_path };
79    }
80    ClassPath::Unknown
81}
82
83fn collapsed_title_case(input: impl std::fmt::Display) -> String {
84    let res = input.to_string();
85    string_morph::to_title_case(
86        &res.replace("enums/", "")
87            .replace(".json", "")
88            .replace('_', " "),
89    )
90    .replace(' ', "")
91}
92
93/// write a file to a place
94pub fn write_source_file(filename: &str, contents: &str) -> Result<(), Box<dyn Error>> {
95    debug!("Writing source file to {filename}");
96
97    if let Ok(existing_file) = &mut File::open(filename) {
98        debug!("Found an existing file in place!");
99        let mut file_contents = String::new();
100        existing_file.read_to_string(&mut file_contents)?;
101
102        if without_timestamp_line(file_contents) == without_timestamp_line(contents.to_string()) {
103            debug!("Don't need to write a new file to {filename}, contents already match other than timestamp!");
104            return Ok(());
105        }
106    }
107
108    let file = File::create(filename)?;
109
110    let mut writer = BufWriter::new(file);
111    let written_bytes = writer.write(contents.as_bytes())?;
112    debug!("Successfully wrote {written_bytes} bytes to {filename}");
113    Ok(())
114}
115
116pub fn get_new_scope_with_comment(first_line: Option<String>) -> Scope {
117    let mut new_scope = Scope::new();
118    if let Some(comment) = first_line {
119        new_scope.raw(&comment);
120    }
121    new_scope.add_generation_timestamp_comment();
122    new_scope
123}
124
125/// uses serde_json to try and parse a given file
126pub fn read_file_to_value(filename: &str) -> Result<Value, Box<dyn Error>> {
127    debug!("read_file_to_value {filename}");
128    let file = File::open(filename)?;
129    let reader = BufReader::new(file);
130
131    // Read the JSON contents of the file as an instance of `User`.
132    let res: Value = serde_json::from_reader(reader)?;
133    Ok(res)
134}
135
136#[allow(dead_code)]
137fn write_modules(
138    ocsf_dir: &str,
139    modules: HashMap<&str, Vec<String>>,
140) -> Result<(), Box<dyn Error>> {
141    let enums_dir = format!("{ocsf_dir}src/enums/");
142    if !PathBuf::from(&enums_dir).exists() {
143        warn!("{enums_dir} is missing, creating it...");
144        create_dir_all(&enums_dir)?;
145    }
146    let events_dir = format!("{ocsf_dir}src/events/");
147    if !PathBuf::from(&events_dir).exists() {
148        warn!("{events_dir} is missing, creating it...");
149        create_dir_all(&events_dir)?;
150    }
151
152    let enums_mod_file = File::create(PathBuf::from(format!("{enums_dir}mod.rs")))?;
153    let mut enums_mod = BufWriter::new(enums_mod_file);
154
155    let mut enums = modules.get("enums").unwrap().to_vec();
156    enums.sort();
157    let _ = enums_mod.write("\n".as_bytes())?;
158    enums.iter().for_each(|e| {
159        if e.is_empty() {
160            panic!("Empty module name?");
161        }
162        enums_mod.write_fmt(format_args!("pub mod {e};\n")).unwrap();
163    });
164
165    enums.iter().for_each(|e| {
166        enums_mod
167            .write_fmt(format_args!("pub use {e}::*;\n"))
168            .unwrap();
169    });
170
171    let events_mod_file = File::create(PathBuf::from(format!("{events_dir}mod.rs")))?;
172    let mut events_mod = BufWriter::new(events_mod_file);
173
174    let mut events = modules.get("events").unwrap().to_vec();
175    events.sort();
176    let _ = events_mod.write("\n".as_bytes())?;
177
178    events.iter().for_each(|e| {
179        if e.is_empty() {
180            panic!("Empty module name?");
181        }
182        events_mod
183            .write_fmt(format_args!("pub mod {e};\n"))
184            .unwrap();
185    });
186
187    events.iter().for_each(|e| {
188        events_mod
189            .write_fmt(format_args!("pub use {e}::*;\n"))
190            .unwrap();
191    });
192
193    Ok(())
194}
195
196pub struct DirPaths {
197    pub destination_path: String,
198    pub schema_path: String,
199}
200
201impl DirPaths {
202    fn new(base_path: &str) -> Self {
203        Self {
204            destination_path: format!("{base_path}ocsf/"),
205            schema_path: format!("{base_path}ocsf-schema/"),
206        }
207    }
208    /// destination path + `src/`
209    fn source_path(&self) -> PathBuf {
210        PathBuf::from(&format!("{}src/", &self.destination_path))
211    }
212}
213
214pub fn get_timestamp_matcher() -> regex::Regex {
215    regex::Regex::new(r#"^(// This file was automatically generated by ocsf-codegen at .*)"#)
216        .unwrap()
217}
218
219/// output the code with the timestamp line stripped
220pub fn without_timestamp_line(input: String) -> String {
221    get_timestamp_matcher().replace_all(&input, "").into_owned()
222}
223
224pub trait CustomScopeThings {
225    fn writeln(&mut self, line: impl std::fmt::Display);
226    fn add_generation_timestamp_comment(&mut self);
227}
228
229impl CustomScopeThings for Scope {
230    /// Writes a raw string to the scope but doesn't forget to add the newline this time.
231    fn writeln(&mut self, line: impl std::fmt::Display) {
232        self.raw(&format!("{line}"));
233    }
234
235    fn add_generation_timestamp_comment(&mut self) {
236        let timestamp = Utc::now();
237
238        let commit = last_git_commit::LastGitCommit::new().build().unwrap();
239
240        self.writeln(&format!(
241            "// This file was automatically generated by ocsf-codegen at {} branch: \"{}\" link: <https://github.com/yaleman/ocsf-rs/commit/{}>",
242            timestamp.to_rfc3339_opts(chrono::SecondsFormat::Secs, false),
243            commit.branch(),
244            commit.id().long(),
245        ));
246    }
247}
248
249/// Strips a bunch of stuff out
250pub fn fix_docstring(input: String, leading_docstring: Option<&'static str>) -> String {
251    let comment = leading_docstring.unwrap_or("///");
252
253    input
254        .replace("<b>", "**")
255        .replace("</b>", "**")
256        .replace("<p>", "")
257        .replace("</p>", &format!("\n{}", comment))
258        .replace("<code>", "`")
259        .replace("</code>", "`")
260}
261
262fn generate_expected_paths(paths: &DirPaths, modules: &[String]) -> Vec<String> {
263    let mut ok_paths: Vec<String> = vec![];
264
265    modules.iter().for_each(|m| {
266        ok_paths.push(format!("{}src/{}.rs", paths.destination_path, m));
267        ok_paths.push(format!("{}src/{}/", paths.destination_path, m));
268    });
269
270    ok_paths.push(format!("{}src/", paths.destination_path));
271    ok_paths.push(format!("{}src/lib.rs", paths.destination_path));
272    ok_paths
273}
274
275/// checks that all the expected files are there, and if not then it's
276fn check_crate_files(paths: &DirPaths, ok_paths: Vec<String>) -> Result<(), &'static str> {
277    debug!("OK Paths:{:#?}", ok_paths.iter().sorted());
278
279    let mut found_bad_files = false;
280
281    // let's double-check that any files we expect actually exist...
282    for filename in walkdir::WalkDir::new(format!("{}src/", paths.destination_path)) {
283        let filename = filename.unwrap();
284
285        let mut file_type = "file";
286        if filename.path().is_dir() {
287            file_type = "directory";
288        }
289
290        let filename_str = filename.path().to_str().unwrap().to_string();
291
292        if !ok_paths.contains(&filename_str) {
293            error!("module has unexpected {file_type}: {filename_str}");
294            found_bad_files = true;
295        } else {
296            trace!("Found expected crate source file file: {filename_str}");
297        }
298    }
299
300    match found_bad_files {
301        false => Ok(()),
302        true => Err("found something bad, you should check that!"),
303    }
304}
305
306/// main function of the library that generates the `ocsf` crate.
307pub fn generate_source_code(base_path: &str) -> Result<(), Box<dyn Error>> {
308    let paths = DirPaths::new(base_path);
309
310    if !PathBuf::from(&paths.destination_path).exists() {
311        error!("Dir {} is missing!", paths.destination_path);
312        panic!();
313    }
314    let src_dir = PathBuf::from(&format!("{}/src/", &paths.destination_path));
315    if !src_dir.exists() {
316        std::fs::create_dir(src_dir)?;
317    }
318
319    let mut root_module = module::Module::new("lib".to_string(), true);
320
321    root_module.scope = Scope::new();
322    root_module
323        .scope
324        .raw("//! OCSF crate, does Open Cyber Security Framework things.");
325    root_module.scope.raw("//! ");
326    root_module
327        .scope
328        .raw("//! <h1><span color=\"red\"> THIS IS VERY VERY VERY EARLY ALPHA</span></h1>");
329    root_module.scope.raw("//! ");
330    root_module
331        .scope
332        .raw("//! The base schema is available at <https://ocsf.io>.");
333    root_module.scope.add_generation_timestamp_comment();
334
335    let modules = vec![
336        "categories",
337        "dictionary",
338        // "enums",
339        "events",
340        "objects",
341        // "other",
342        "profiles",
343    ];
344    let modules: Vec<String> = modules.iter().map(|f| f.to_string()).collect();
345
346    for module_name in modules.iter() {
347        root_module.add_child(module_name.to_string());
348    }
349
350    add_version_element(&paths, &mut root_module.scope)?;
351
352    generate_enums(&paths, &mut root_module)?;
353
354    generate_dictionary_entries(&paths, &mut root_module)?;
355    generate_profiles(&paths, &mut root_module)?;
356    generate_categories(&paths, &mut root_module)?;
357    generate_objects(&paths, &mut root_module)?;
358    generate_events(&paths, &mut root_module)?;
359
360    let mut expected_paths: Vec<String> = generate_expected_paths(&paths, &modules);
361    root_module.write_module(&mut expected_paths, &paths.source_path())?;
362    check_crate_files(&paths, expected_paths)?;
363
364    Ok(())
365}