Skip to main content

similarity_core/
structure_comparator.rs

1use std::collections::HashMap;
2
3/// 一般化された構造定義
4#[derive(Debug, Clone)]
5pub struct Structure {
6    /// 識別子(名前、種類、名前空間)
7    pub identifier: StructureIdentifier,
8
9    /// メンバー(プロパティ、フィールド、メソッドなど)
10    pub members: Vec<StructureMember>,
11
12    /// メタデータ(位置情報、ジェネリクス、継承など)
13    pub metadata: StructureMetadata,
14}
15
16#[derive(Debug, Clone)]
17pub struct StructureIdentifier {
18    pub name: String,
19    pub kind: StructureKind,
20    pub namespace: Option<String>,
21}
22
23#[derive(Debug, Clone, PartialEq)]
24pub enum StructureKind {
25    TypeScriptInterface,
26    TypeScriptTypeAlias,
27    TypeScriptTypeLiteral,
28    TypeScriptClass,
29    RustStruct,
30    RustEnum,
31    CssRule,
32    CssClass,
33    Generic(String),
34}
35
36#[derive(Debug, Clone)]
37pub struct StructureMember {
38    pub name: String,
39    pub value_type: String,
40    pub modifiers: Vec<String>,
41    pub nested: Option<Box<Structure>>,
42}
43
44#[derive(Debug, Clone, Default)]
45pub struct StructureMetadata {
46    pub location: SourceLocation,
47    pub generics: Vec<String>,
48    pub extends: Vec<String>,
49    pub visibility: Option<String>,
50}
51
52#[derive(Debug, Clone, Default)]
53pub struct SourceLocation {
54    pub file_path: String,
55    pub start_line: usize,
56    pub end_line: usize,
57}
58
59/// 構造比較の結果
60#[derive(Debug, Clone)]
61pub struct StructureComparisonResult {
62    pub overall_similarity: f64,
63    pub identifier_similarity: f64,
64    pub member_similarity: f64,
65    pub member_matches: Vec<MemberMatch>,
66    pub differences: StructureDifferences,
67}
68
69#[derive(Debug, Clone)]
70pub struct MemberMatch {
71    pub member1: String,
72    pub member2: String,
73    pub similarity: f64,
74}
75
76#[derive(Debug, Clone)]
77pub struct StructureDifferences {
78    pub missing_members: Vec<String>,
79    pub extra_members: Vec<String>,
80    pub type_mismatches: Vec<(String, String, String)>, // (name, type1, type2)
81}
82
83/// 比較オプション
84#[derive(Debug, Clone)]
85pub struct ComparisonOptions {
86    pub name_weight: f64,
87    pub structure_weight: f64,
88    pub member_comparison: MemberComparisonStrategy,
89    pub ignore_order: bool,
90    pub fuzzy_matching: bool,
91    pub threshold: f64,
92    pub strict_size_check: bool,  // サイズチェックを厳格にする
93    pub require_type_match: bool, // 型の一致を要求する
94}
95
96impl Default for ComparisonOptions {
97    fn default() -> Self {
98        Self {
99            name_weight: 0.3,
100            structure_weight: 0.7,
101            member_comparison: MemberComparisonStrategy::Normalized,
102            ignore_order: true,
103            fuzzy_matching: true,
104            threshold: 0.7,
105            strict_size_check: true,
106            require_type_match: false,
107        }
108    }
109}
110
111#[derive(Debug, Clone)]
112pub enum MemberComparisonStrategy {
113    Exact,
114    Normalized,
115    Semantic,
116}
117
118/// 汎用構造比較エンジン
119pub struct StructureComparator {
120    options: ComparisonOptions,
121    fingerprint_cache: HashMap<String, String>,
122}
123
124impl StructureComparator {
125    pub fn new(options: ComparisonOptions) -> Self {
126        Self { options, fingerprint_cache: HashMap::new() }
127    }
128
129    pub fn compare(&mut self, s1: &Structure, s2: &Structure) -> StructureComparisonResult {
130        // 識別子の類似性
131        let identifier_similarity = self.compare_identifiers(&s1.identifier, &s2.identifier);
132
133        // メンバーの類似性と詳細
134        let (member_similarity, member_matches, differences) =
135            self.compare_members(&s1.members, &s2.members);
136
137        // メンバー数の違いによるペナルティを計算
138        let size_penalty = self.calculate_size_penalty(s1.members.len(), s2.members.len());
139
140        // 全体的な類似性を計算(サイズペナルティを適用)
141        let base_similarity = self.options.name_weight * identifier_similarity
142            + self.options.structure_weight * member_similarity;
143
144        let overall_similarity = base_similarity * size_penalty;
145
146        StructureComparisonResult {
147            overall_similarity,
148            identifier_similarity,
149            member_similarity,
150            member_matches,
151            differences,
152        }
153    }
154
155    fn calculate_size_penalty(&self, size1: usize, size2: usize) -> f64 {
156        let min_size = size1.min(size2) as f64;
157        let max_size = size1.max(size2) as f64;
158
159        if max_size == 0.0 {
160            return 1.0;
161        }
162
163        let ratio = min_size / max_size;
164
165        if self.options.strict_size_check {
166            // 厳格モード: より強いペナルティ
167            if ratio < 0.3 {
168                // 30%未満: 非常に強いペナルティ
169                ratio * ratio * 0.5
170            } else if ratio < 0.5 {
171                // 30-50%: 強いペナルティ
172                ratio * ratio
173            } else if ratio < 0.7 {
174                // 50-70%: 中程度のペナルティ
175                0.4 + (ratio * 0.6)
176            } else {
177                // 70%以上: 軽いペナルティ
178                0.7 + (ratio * 0.3)
179            }
180        } else {
181            // 通常モード: 従来のペナルティ
182            if ratio < 0.5 {
183                ratio * ratio
184            } else {
185                0.25 + (ratio * 0.75)
186            }
187        }
188    }
189
190    fn compare_identifiers(&self, id1: &StructureIdentifier, id2: &StructureIdentifier) -> f64 {
191        // 種類が異なる場合はペナルティ
192        let kind_factor = if id1.kind == id2.kind { 1.0 } else { 0.8 };
193
194        // 名前の類似性
195        let name_similarity = calculate_string_similarity(&id1.name, &id2.name);
196
197        name_similarity * kind_factor
198    }
199
200    fn compare_members(
201        &self,
202        members1: &[StructureMember],
203        members2: &[StructureMember],
204    ) -> (f64, Vec<MemberMatch>, StructureDifferences) {
205        let mut matches = Vec::new();
206        let mut matched_indices1 = vec![false; members1.len()];
207        let mut matched_indices2 = vec![false; members2.len()];
208
209        // 各メンバーの最良マッチを見つける
210        for (i, m1) in members1.iter().enumerate() {
211            let mut best_match = None;
212            let mut best_score = 0.0;
213
214            for (j, m2) in members2.iter().enumerate() {
215                if matched_indices2[j] {
216                    continue;
217                }
218
219                let score = self.compare_single_member(m1, m2);
220                if score > best_score && score >= self.options.threshold {
221                    best_score = score;
222                    best_match = Some(j);
223                }
224            }
225
226            if let Some(j) = best_match {
227                matched_indices1[i] = true;
228                matched_indices2[j] = true;
229                matches.push(MemberMatch {
230                    member1: m1.name.clone(),
231                    member2: members2[j].name.clone(),
232                    similarity: best_score,
233                });
234            }
235        }
236
237        // 差分を収集
238        let missing_members: Vec<String> = members1
239            .iter()
240            .enumerate()
241            .filter(|(i, _)| !matched_indices1[*i])
242            .map(|(_, m)| m.name.clone())
243            .collect();
244
245        let extra_members: Vec<String> = members2
246            .iter()
247            .enumerate()
248            .filter(|(i, _)| !matched_indices2[*i])
249            .map(|(_, m)| m.name.clone())
250            .collect();
251
252        let type_mismatches: Vec<(String, String, String)> = matches
253            .iter()
254            .filter_map(|m| {
255                let m1 = members1.iter().find(|member| member.name == m.member1)?;
256                let m2 = members2.iter().find(|member| member.name == m.member2)?;
257                if m1.value_type != m2.value_type {
258                    Some((m.member1.clone(), m1.value_type.clone(), m2.value_type.clone()))
259                } else {
260                    None
261                }
262            })
263            .collect();
264
265        // 類似性スコアを計算
266        // マッチしたメンバー数と最小メンバー数の両方を考慮
267        let min_members = members1.len().min(members2.len()) as f64;
268        let max_members = members1.len().max(members2.len()) as f64;
269
270        let similarity = if max_members > 0.0 {
271            // マッチしたメンバーの割合を計算
272            let match_ratio = matches.len() as f64 / max_members;
273
274            // すべてのメンバーがマッチしているかチェック
275            if matches.len() as f64 >= min_members && min_members == max_members {
276                // 完全一致
277                match_ratio
278            } else if matches.len() as f64 >= min_members {
279                // 部分一致(追加フィールドあり)
280                match_ratio * 0.9
281            } else {
282                // 不完全な一致
283                match_ratio * 0.7
284            }
285        } else {
286            1.0
287        };
288
289        let differences = StructureDifferences { missing_members, extra_members, type_mismatches };
290
291        (similarity, matches, differences)
292    }
293
294    fn compare_single_member(&self, m1: &StructureMember, m2: &StructureMember) -> f64 {
295        let name_sim = calculate_string_similarity(&m1.name, &m2.name);
296
297        let type_sim = match self.options.member_comparison {
298            MemberComparisonStrategy::Exact => {
299                if m1.value_type == m2.value_type {
300                    1.0
301                } else {
302                    0.0
303                }
304            }
305            MemberComparisonStrategy::Normalized => {
306                calculate_type_similarity(&m1.value_type, &m2.value_type)
307            }
308            MemberComparisonStrategy::Semantic => {
309                // 意味的な類似性(将来実装)
310                calculate_type_similarity(&m1.value_type, &m2.value_type)
311            }
312        };
313
314        // 修飾子の一致度
315        let modifier_sim = calculate_modifier_similarity(&m1.modifiers, &m2.modifiers);
316
317        // 重み付き平均
318        0.4 * name_sim + 0.5 * type_sim + 0.1 * modifier_sim
319    }
320
321    pub fn generate_fingerprint(&mut self, structure: &Structure) -> String {
322        let key = format!(
323            "{}::{}",
324            structure.identifier.namespace.as_deref().unwrap_or(""),
325            structure.identifier.name
326        );
327
328        self.fingerprint_cache
329            .entry(key)
330            .or_insert_with(|| compute_structure_fingerprint(structure))
331            .clone()
332    }
333}
334
335/// 構造のフィンガープリントを計算
336pub fn compute_structure_fingerprint(structure: &Structure) -> String {
337    let mut parts = Vec::new();
338
339    // 種類
340    parts.push(format!("kind:{:?}", structure.identifier.kind));
341
342    // メンバー数(より細かい分類)
343    let member_count = structure.members.len();
344    let member_category = match member_count {
345        0 => "empty",
346        1 => "single",
347        2..=3 => "small",
348        4..=6 => "medium",
349        7..=10 => "large",
350        _ => "huge",
351    };
352    parts.push(format!("size:{}", member_category));
353    parts.push(format!("members:{}", member_count));
354
355    // 型の分布を計算
356    let mut type_counts: HashMap<String, usize> = HashMap::new();
357    for member in &structure.members {
358        let normalized_type = normalize_type(&member.value_type);
359        *type_counts.entry(normalized_type).or_insert(0) += 1;
360    }
361
362    // ソートして一貫性を保つ
363    let mut type_entries: Vec<_> = type_counts.iter().collect();
364    type_entries.sort_by_key(|(k, _)| k.as_str());
365
366    for (type_name, count) in type_entries {
367        parts.push(format!("{}:{}", type_name, count));
368    }
369
370    // ジェネリクスがあれば追加
371    if !structure.metadata.generics.is_empty() {
372        parts.push(format!("generics:{}", structure.metadata.generics.len()));
373    }
374
375    parts.join(",")
376}
377
378/// フィンガープリントが比較対象として妥当かチェック
379pub fn should_compare_fingerprints(fp1: &str, fp2: &str) -> bool {
380    let parts1 = parse_fingerprint(fp1);
381    let parts2 = parse_fingerprint(fp2);
382
383    // 種類が違う場合は比較しない(TypeScriptInterfaceとRustStructなど)
384    if let (Some(kind1), Some(kind2)) = (parts1.get("kind"), parts2.get("kind")) {
385        if kind1 != kind2 {
386            return false;
387        }
388    }
389
390    // サイズカテゴリが大きく異なる場合は比較しない
391    if let (Some(size1), Some(size2)) = (parts1.get("size"), parts2.get("size")) {
392        let size_diff = size_category_distance(size1, size2);
393        if size_diff > 2 {
394            return false;
395        }
396    }
397
398    // メンバー数が大きく異なる場合は比較しない
399    if let (Some(members1), Some(members2)) = (parts1.get("members"), parts2.get("members")) {
400        if let (Ok(count1), Ok(count2)) = (members1.parse::<usize>(), members2.parse::<usize>()) {
401            let min = count1.min(count2);
402            let max = count1.max(count2);
403            if max > 0 && (min as f64 / max as f64) < 0.3 {
404                return false;
405            }
406        }
407    }
408
409    true
410}
411
412fn parse_fingerprint(fp: &str) -> HashMap<String, String> {
413    fp.split(',')
414        .filter_map(|part| {
415            let mut iter = part.split(':');
416            Some((iter.next()?.to_string(), iter.next()?.to_string()))
417        })
418        .collect()
419}
420
421fn size_category_distance(cat1: &str, cat2: &str) -> usize {
422    let categories = ["empty", "single", "small", "medium", "large", "huge"];
423    let pos1 = categories.iter().position(|&c| c == cat1).unwrap_or(0);
424    let pos2 = categories.iter().position(|&c| c == cat2).unwrap_or(0);
425    pos1.abs_diff(pos2)
426}
427
428/// 型を正規化
429fn normalize_type(type_str: &str) -> String {
430    // Check for array patterns first (before checking for the base type)
431    if type_str.contains("[]") || type_str.contains("Array") {
432        return "array".to_string();
433    }
434
435    match type_str {
436        s if s.contains("string") => "string".to_string(),
437        s if s.contains("number") => "number".to_string(),
438        s if s.contains("boolean") => "boolean".to_string(),
439        s if s.contains("{") && s.contains("}") => "object".to_string(),
440        _ => "other".to_string(),
441    }
442}
443
444/// 文字列の類似性を計算
445fn calculate_string_similarity(s1: &str, s2: &str) -> f64 {
446    if s1 == s2 {
447        return 1.0;
448    }
449
450    let len1 = s1.len();
451    let len2 = s2.len();
452    let max_len = len1.max(len2) as f64;
453
454    if max_len == 0.0 {
455        return 1.0;
456    }
457
458    // 簡単なレーベンシュタイン距離の近似
459    let common_prefix = s1.chars().zip(s2.chars()).take_while(|(a, b)| a == b).count();
460    let common_suffix = s1.chars().rev().zip(s2.chars().rev()).take_while(|(a, b)| a == b).count();
461    let common = (common_prefix + common_suffix).min(len1.min(len2));
462
463    common as f64 / max_len
464}
465
466/// 型の類似性を計算
467fn calculate_type_similarity(t1: &str, t2: &str) -> f64 {
468    if t1 == t2 {
469        return 1.0;
470    }
471
472    let norm1 = normalize_type(t1);
473    let norm2 = normalize_type(t2);
474
475    if norm1 == norm2 {
476        0.8 // 正規化後に一致
477    } else {
478        0.0
479    }
480}
481
482/// 修飾子の類似性を計算
483fn calculate_modifier_similarity(m1: &[String], m2: &[String]) -> f64 {
484    if m1.is_empty() && m2.is_empty() {
485        return 1.0;
486    }
487
488    let set1: HashMap<_, _> = m1.iter().map(|s| (s.as_str(), true)).collect();
489    let set2: HashMap<_, _> = m2.iter().map(|s| (s.as_str(), true)).collect();
490
491    let intersection = set1.keys().filter(|k| set2.contains_key(*k)).count();
492    let union = (set1.len() + set2.len() - intersection).max(1);
493
494    intersection as f64 / union as f64
495}