1use std::fs::File;
2use std::path::PathBuf;
3use std::str::FromStr;
4
5use anyhow::{anyhow, ensure, Result};
6use lazy_static::lazy_static;
7use levenshtein::levenshtein;
8use log::{error, info, warn};
9use regex::Regex;
10use structopt::{clap, StructOpt};
11use unicode_normalization::{is_nfc, UnicodeNormalization};
12
13use crate::utils::{check_csv, check_logic};
14use crate::{MaybeMusic, Music, Platform};
15
16#[derive(StructOpt)]
17#[structopt(
18version = clap::crate_version ! (),
19author = clap::crate_authors ! (),
20about = "Validate csv files"
21)]
22pub struct CheckOpt {
23 #[structopt(
24 about = "The CSV file to check",
25 default_value = "suisei-music.csv",
26 index = 1,
27 required = true
28 )]
29 csv_file: PathBuf,
30
31 #[structopt(short, long, about = "Only check formats")]
32 format_only: bool,
33
34 #[structopt(long)]
35 json_output: bool,
36}
37
38fn similarity_ratio(a: &str, b: &str) -> f32 {
41 let len = a.chars().count().max(b.chars().count());
42 1f32 - (levenshtein(a, b) as f32) / (len as f32)
43}
44
45fn similarity_check(
46 field_name: &str,
47 musics: &[MaybeMusic],
48 picker: impl for<'a> Fn(&'a MaybeMusic) -> &'a str,
49) {
50 for (i, one) in musics.iter().map(&picker).enumerate() {
51 for two in musics.iter().map(&picker).skip(i + 1) {
52 if one == two {
53 continue;
54 }
55 let sim = similarity_ratio(one, two);
56 if sim > 0.75 {
57 warn!(
58 "[{}] {} & {}: Similar titles ({})",
59 field_name, one, two, sim
60 );
61 }
62 }
63 }
64}
65
66lazy_static! {
67 static ref RE: Regex = Regex::new(r" ?[(\(].+[)\)]$").unwrap();
68}
69
70pub fn check(opts: CheckOpt) -> Result<()> {
71 let mut has_err = false;
72
73 let csv_file: PathBuf = opts.csv_file;
74 info!("CSV file: {:?}", csv_file);
75
76 ensure!(csv_file.exists(), format!("{:?} does not exists", csv_file));
77
78 let read_file = File::open(csv_file).unwrap();
79 let check_result =
80 check_csv(&read_file).map_err(|e| anyhow!(format!("CSV validation failed: {}", e)))?;
81
82 info!(
83 "CSV successfully validated. {} entries found.",
84 check_result.len()
85 );
86
87 if opts.format_only {
88 return Ok(());
89 }
90
91 info!("Checking entry support...");
93 for x in &check_result {
94 if x.video_type.is_empty() {
95 warn!("{}: Empty video_type", x);
97 continue;
98 }
99 if let Err(v) = Platform::from_str(&x.video_type) {
100 error!("{}: {}", x, v);
101 has_err = true;
102 }
103 }
104
105 info!("Checking potential typos...");
107 for x in &check_result {
108 if x.title.trim() != x.title {
109 error!("{}: Spaces around title", x);
110 has_err = true;
111 }
112 if x.artist.trim() != x.artist {
113 error!("{}: Spaces around artist", x);
114 has_err = true;
115 }
116 }
117
118 info!("Checking Unicode NFC conformity...");
120 for x in &check_result {
121 if !is_nfc(&x.title) {
122 error!(
123 "{}: Title is not in NFC, please change to '{}'",
124 x,
125 x.title.chars().nfc()
126 );
127 has_err = true;
128 }
129 if !is_nfc(&x.artist) {
130 error!(
131 "{}: Artist is not in NFC, please change to '{}'",
132 x,
133 x.artist.chars().nfc()
134 );
135 has_err = true;
136 }
137 }
138
139 info!("Check similar metadatas...");
140
141 let mut check_result_altered = check_result.clone();
143 for i in check_result_altered.iter_mut() {
144 i.title = RE.replace_all(&i.title, "").to_string();
145 }
146
147 similarity_check("Title", &check_result_altered, |x| &x.title);
148 similarity_check("Artist", &check_result, |x| &x.artist);
149
150 info!("Validating fields...");
151
152 let converted_result = check_result
153 .into_iter()
154 .filter_map(|x| {
155 let x_desc = x.to_string();
156 let v: Result<Music> = x.try_into();
157 match v {
158 Ok(m) => Some(m),
159 Err(e) => {
160 if &e.to_string() == "No status present" {
161 warn!("{}: Failed to convert to music: {}", x_desc, e);
163 } else {
164 error!("{}: Failed to convert to music: {}", x_desc, e);
165 has_err = true;
166 }
167 None
168 }
169 }
170 })
171 .collect::<Vec<Music>>();
172
173 info!("Checking entry logic...");
175 for x in &converted_result {
176 if let Err(v) = check_logic(x) {
177 error!("{}: {}", x, v);
178 has_err = true;
179 }
180 }
181
182 info!("Check finished.");
183
184 if opts.json_output {
185 let base = serde_json::to_string(&converted_result).unwrap();
186 println!("{}", base);
187 }
188
189 if has_err {
190 Err(anyhow!("Some hard checks didn't pass."))
191 } else {
192 Ok(())
193 }
194}
195
196#[test]
197fn test_similarity_ratio() {
198 assert_eq!(similarity_ratio("test", "test"), 1.0);
200 assert_eq!(similarity_ratio("abcd", "efgh"), 0.0);
201 assert_eq!(similarity_ratio("双海亚美", "双海真美"), 0.75);
203 assert_eq!(similarity_ratio("中文Aka", "英文Aka"), 0.8);
204}