1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
use crate::hash;
use crate::repo::Repo;
use rust_code_analysis::{get_function_spaces, read_file, CodeMetrics, FuncSpace, SpaceKind, LANG};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
use walkdir::WalkDir;

#[derive(Deserialize, Serialize, Clone, Debug)]
pub struct FileData {
    pub name: String,
    pub path: String,
    pub extension: String,
    pub language: String,
    // Contains data about the code from rust_code_analysis
    // We need to skip_deserializing here because `FuncSpace` does not implement
    // Deserialize
    #[serde(skip_deserializing)]
    // skip_deserializing does not work well on the `spaces` field because
    // `FuncSpace` does not implement `Default`, forcing us set a default
    #[serde(default = "default_func_spaces")]
    pub spaces: FuncSpace,
}

/// This function will be called when the default value of `spaces` is needed
pub fn default_func_spaces() -> FuncSpace {
    FuncSpace {
        name: Some("".to_string()),
        start_line: 0,
        end_line: 0,
        kind: SpaceKind::Function,
        spaces: Vec::new(),
        metrics: CodeMetrics::default(),
    }
}

impl Default for FileData {
    fn default() -> FileData {
        FileData {
            name: Default::default(),
            path: Default::default(),
            extension: Default::default(),
            language: Default::default(),
            spaces: FuncSpace {
                name: Default::default(),
                start_line: Default::default(),
                end_line: Default::default(),
                kind: SpaceKind::Unknown,
                spaces: Default::default(),
                metrics: Default::default(),
            },
        }
    }
}

#[derive(Deserialize, Serialize, Clone, Debug, Default)]
pub struct Code {
    pub repo_name: String,
    pub files_data: HashMap<String, FileData>,
}

pub fn get_repo_path(repo: &Repo) -> Result<&Path, String> {
    match repo.repo.path().parent() {
        Some(repo_path) => Ok(repo_path),
        None => Err(format!(
            "Failed to get repo path, repo: {:?}",
            repo.repo.path().to_str()
        )),
    }
}

pub fn get_file_language(file_extension: &String) -> Option<LANG> {
    match file_extension.as_ref() {
        "js" => Some(LANG::Javascript),
        "jsm" => Some(LANG::Mozjs),
        "java" => Some(LANG::Java),
        "rs" => Some(LANG::Rust),
        "cpp" | "cxx" | "cc" | "hxx" | "hpp" | "c" | "h" | "hh" | "inc" | "mm" | "m" => {
            Some(LANG::Cpp)
        }
        "py" => Some(LANG::Python),
        "tsx" => Some(LANG::Tsx),
        "ts" | "jsw" | "jsmw" => Some(LANG::Typescript),
        _ => None,
    }
}

pub fn extract_code_data(repo_path: &Path) -> Result<Code, String> {
    let repo_name = match repo_path.file_name() {
        Some(repo_name) => String::from(repo_name.to_string_lossy()),
        None => {
            return Err(format!(
                "Couldn't get repo name, path: {}",
                repo_path.display()
            ))
        }
    };

    let mut code_data = Code {
        repo_name,
        files_data: HashMap::new(),
    };

    // Extract code data from each file in the given repository
    // if the file type is supported by rust_code_analysis
    for file in WalkDir::new(repo_path)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_file())
    {
        let file_path = file.path();
        let file_name = match file_path.file_name() {
            Some(file_name) => String::from(file_name.to_string_lossy()),
            None => continue,
        };

        let file_extension = match file_path.extension() {
            Some(file_extension) => String::from(file_extension.to_string_lossy()),
            None => continue,
        };

        let file_language = match get_file_language(&file_extension) {
            Some(file_language) => file_language,
            None => continue,
        };

        let source_code = match read_file(file_path) {
            Ok(source_code) => source_code,
            Err(_) => continue,
        };

        let spaces = match get_function_spaces(&file_language, source_code, file_path, None) {
            Some(functions_spaces) => functions_spaces,
            None => continue,
        };

        //Get the path file relative to the root of the git repository
        //for a /Users/elhmn/.wake/scanner/github-com-elhmn-qautomata/src/example.rs file
        //the `path` will be `src/example.rs`
        let path = match file_path.strip_prefix(String::from(repo_path.to_string_lossy())) {
            Ok(p) => p.to_str().unwrap_or_default().to_string(),
            Err(_) => "".to_string(),
        };

        let file_data = FileData {
            name: file_name,
            path: path.clone(),
            extension: file_extension,
            language: String::from(file_language.get_name()),
            spaces,
        };

        code_data.files_data.insert(hash::new(path), file_data);
    }

    Ok(code_data)
}

pub fn new(repo: &Repo) -> Result<Code, String> {
    let repo_path = get_repo_path(repo)?;
    let code_data = extract_code_data(repo_path)?;

    Ok(code_data)
}