qobuz_api_rust/metadata/
embedder.rs

1use std::{collections::HashSet, io::Error};
2
3use lofty::{
4    config::WriteOptions,
5    file::{
6        AudioFile,
7        FileType::{Flac, Mpeg},
8    },
9    picture::{MimeType::Jpeg, Picture, PictureType::CoverFront},
10    prelude::{
11        Accessor,
12        ItemKey::{
13            self, AlbumArtist, CommercialInformationUrl, Composer, CopyrightMessage, Isrc, Label,
14            MusicianCredits, OriginalMediaType, RecordingDate, ReleaseDate,
15        },
16        TagExt, TaggedFileExt,
17    },
18    read_from_path,
19    tag::{
20        ItemValue::{self, Text},
21        Tag, TagItem,
22        TagType::{Id3v2, VorbisComments},
23    },
24};
25
26use crate::{
27    errors::QobuzApiError::{self, IoError, LoftyError},
28    metadata::MetadataConfig,
29    models::{Album, Artist, Track},
30    utils::{download_image, timestamp_to_date_and_year},
31};
32
33/// Embeds comprehensive metadata into an audio file.
34///
35/// This function takes track, album, and artist information and embeds it into
36/// the specified audio file. It handles different audio formats (FLAC, MP3, etc.)
37/// with format-specific tagging approaches to ensure compatibility and consistency
38/// with the original C# implementation.
39///
40/// The function embeds various metadata fields including:
41/// - Track title (with version if available)
42/// - Album title (with version if available)
43/// - Artist information (with format-specific handling)
44/// - Album artist (with classical music considerations)
45/// - Composer information (with duplicate handling)
46/// - Performers and involved people
47/// - Track and disc numbers
48/// - Release dates and years
49/// - ISRC and copyright information
50/// - Genre and label information
51/// - Commercial URL and media type
52/// - Cover art (with quality preference order)
53///
54/// # Arguments
55///
56/// * `filepath` - Path to the audio file to embed metadata into
57/// * `track` - Track information containing title, performers, composers, etc.
58/// * `album` - Album information containing title, artists, label, dates, etc.
59/// * `artist` - Primary artist information
60/// * `config` - Configuration defining which metadata tags should be written
61///
62/// # Returns
63///
64/// Returns `Ok(())` if the metadata was successfully embedded, or an error if
65/// there was a problem reading, writing, or processing the file.
66///
67/// # Example
68///
69/// ```rust
70/// use qobuz_api_rust::{models::{Track, Album, Artist}, metadata::embedder::embed_metadata_in_file};
71///
72/// // Assuming you have track, album, and artist data
73/// // let result = embed_metadata_in_file("path/to/audio.flac", &track, &album, &artist).await;
74/// // if result.is_err() {
75/// //     eprintln!("Failed to embed metadata");
76/// // }
77/// ```
78pub async fn embed_metadata_in_file(
79    filepath: &str,
80    track: &Track,
81    album: &Album,
82    artist: &Artist,
83    config: &MetadataConfig,
84) -> Result<(), QobuzApiError> {
85    // Read the audio file
86    let mut tagged_file = read_from_path(filepath).map_err(LoftyError)?;
87
88    // Get the file type before getting a mutable reference to the tag
89    let file_type = tagged_file.file_type();
90
91    // Create or get the tag based on the actual file format
92    let tag = match tagged_file.primary_tag_mut() {
93        Some(primary_tag) => primary_tag,
94        None => {
95            // If no primary tag exists, try to get the tag from audio file
96            if let Some(tag) = tagged_file.first_tag_mut() {
97                tag
98            } else {
99                // Determine the appropriate tag type based on the actual file format detected by lofty
100                let tag_type = match file_type {
101                    Flac => VorbisComments,
102                    Mpeg => Id3v2,
103                    _ => Id3v2, // Default to ID3v2 for unknown formats
104                };
105
106                let new_tag = Tag::new(tag_type);
107                tagged_file.insert_tag(new_tag);
108
109                // After inserting the tag, we need to access it again
110                // Get the primary tag which should now be the one we just inserted
111                tagged_file.primary_tag_mut().ok_or_else(|| {
112                    IoError(Error::other(
113                        "Could not create or access tag for metadata embedding",
114                    ))
115                })?
116            }
117        }
118    };
119
120    // Clear existing tags to avoid duplicates
121    tag.clear();
122
123    // Add track metadata
124    if config.track_title
125        && let Some(ref title) = track.title
126    {
127        let mut full_title = title.clone();
128        if let Some(ref version) = track.version
129            && !version.is_empty()
130        {
131            full_title = format!("{} ({})", full_title, version);
132        }
133        tag.set_title(full_title);
134    }
135
136    if config.album
137        && let Some(ref album_title) = album.title
138    {
139        // Combine album title with version if available, similar to C# implementation
140        let album_name = if let Some(ref version) = album.version {
141            if !version.is_empty() {
142                format!("{} ({})", album_title, version)
143            } else {
144                album_title.clone()
145            }
146        } else {
147            album_title.clone()
148        };
149        tag.set_album(album_name);
150    }
151
152    // Determine the Album Artist for FLAC files (singular, conductor priority for classical)
153    let album_artist_for_flac = {
154        let mut result = String::new();
155        if let Some(ref album_artists) = album.artists {
156            let conductor_artist = album_artists.iter().find(|artist| {
157                artist.roles.as_ref().is_some_and(|roles| {
158                    roles.contains(&"main-artist".to_string())
159                        && artist.name.as_ref().is_some_and(|name| {
160                            track.performers.as_ref().is_some_and(|performers_str| {
161                                performers_str.contains(&format!("{}, Conductor", name))
162                            })
163                        })
164                })
165            });
166
167            if let Some(artist) = conductor_artist {
168                result = artist.name.clone().unwrap_or_default();
169            } else if let Some(ref album_artist) = album.artist
170                && let Some(ref name) = album_artist.name
171            {
172                result = name.clone();
173            }
174        } else if let Some(ref album_artist) = album.artist
175            && let Some(ref name) = album_artist.name
176        {
177            result = name.clone();
178        }
179        result
180    };
181
182    // Determine the Album Artist(s) for MP3 files (merged string of main artists)
183    let album_artist_for_mp3 = {
184        let mut main_artists_from_album = Vec::new();
185        if let Some(ref album_artists) = album.artists {
186            for artist_in_list in album_artists {
187                if let Some(ref roles) = artist_in_list.roles
188                    && roles.contains(&"main-artist".to_string())
189                    && let Some(ref name) = artist_in_list.name
190                {
191                    main_artists_from_album.push(name.clone());
192                }
193            }
194        }
195        // Fallback to album.artist.name if album.artists is empty or no main artists found
196        if main_artists_from_album.is_empty()
197            && let Some(ref album_artist) = album.artist
198            && let Some(ref name) = album_artist.name
199        {
200            main_artists_from_album.push(name.clone());
201        }
202        main_artists_from_album.join("/")
203    };
204
205    // Select the final album artist name based on file type
206    let album_artist_name = match file_type {
207        Flac => album_artist_for_flac,
208        Mpeg => album_artist_for_mp3,
209        _ => album_artist_for_mp3, // Default to MP3 behavior for other formats
210    };
211
212    if config.album_artist && !album_artist_name.is_empty() {
213        let tag_item = TagItem::new(AlbumArtist, Text(album_artist_name));
214        tag.push(tag_item);
215    }
216
217    // Set artist - combine multiple artists if available from different sources
218    // For consistency with C# implementation, prioritize performers order when they contain artist info
219    let mut artist_names = Vec::new();
220    let mut artist_set = HashSet::new(); // Use a set to prevent duplicates
221
222    // Extract additional artists from performers string first to preserve their order
223    if let Some(ref track_performers) = track.performers {
224        let performer_artists = extract_artist_names_from_performers(track_performers, &artist_set);
225        for performer_artist in performer_artists {
226            if !artist_set.contains(&performer_artist) {
227                artist_names.push(performer_artist.clone());
228                artist_set.insert(performer_artist.clone());
229            }
230        }
231    }
232
233    // Set producers for FLAC files
234    if config.producer
235        && file_type == Flac
236        && let Some(ref performers_str) = track.performers
237    {
238        let producers = extract_producers_from_performers(performers_str);
239        for producer in producers {
240            // Lofty uses "PRODUCER" as the tag for Vorbis Comments (FLAC)
241            let tag_item = TagItem::new(
242                ItemKey::from_key(VorbisComments, "PRODUCER"),
243                Text(producer.clone()),
244            );
245            tag.push(tag_item);
246        }
247    }
248
249    // Then add the main artist from the artist parameter if not already included as a performer
250    if let Some(ref artist_name) = artist.name
251        && !artist_set.contains(artist_name)
252    {
253        artist_names.push(artist_name.clone());
254        artist_set.insert(artist_name.clone());
255    }
256
257    // Add artists from album.artists field (for classical music and multi-artist albums)
258    if let Some(ref album_artists) = album.artists {
259        for album_artist in album_artists {
260            if let Some(ref name) = album_artist.name
261                && !name.is_empty()
262                && !artist_set.contains(name)
263            {
264                artist_names.push(name.clone());
265                artist_set.insert(name.clone());
266            }
267        }
268    }
269
270    // Set the combined artist field
271    if config.artist && !artist_names.is_empty() {
272        let combined_artists = match file_type {
273            Flac => artist_names.join(", "),
274            _ => artist_names.join("/"),
275        };
276        tag.set_artist(combined_artists);
277    }
278
279    // Initialize composers list - combine multiple composers if available
280    let mut composers = Vec::new();
281    let mut composer_normalized_set = HashSet::new(); // Use a set to prevent duplicates based on normalized names
282
283    // Determine composers based on file type and C# behavior
284    if file_type == Flac {
285        // For FLAC, we need to carefully select a single composer to match C# behavior.
286        // C# seems to prioritize composers from performers string, then potentially track.composer.name
287        let mut potential_composers_from_performers = Vec::new();
288        if let Some(ref performers_str) = track.performers {
289            potential_composers_from_performers = extract_composers_from_performers(performers_str);
290        }
291
292        if let Some(composer_from_performers) = potential_composers_from_performers.last() {
293            // If composers are found in performers, take the last one (mimicking a possible C# selection)
294            if composer_from_performers != "Various Composers" {
295                composers.push(composer_from_performers.clone());
296                composer_normalized_set.insert(normalize_composer_name(composer_from_performers));
297            }
298        } else if let Some(ref track_composer) = track.composer
299            && let Some(ref composer_name) = track_composer.name
300            && composer_name != "Various Composers"
301        {
302            // Fallback to track.composer.name if no composers found in performers
303            composers.push(composer_name.clone());
304            composer_normalized_set.insert(normalize_composer_name(composer_name));
305        } else if let Some(ref album_composer) = album.composer
306            && let Some(ref composer_name) = album_composer.name
307            && composer_name != "Various Composers"
308            && !is_duplicate_composer(composer_name, &composer_normalized_set)
309        {
310            // Fallback to album composer if neither performers nor track composer yields a result
311            composers.push(composer_name.clone());
312            composer_normalized_set.insert(normalize_composer_name(composer_name));
313        }
314    } else {
315        // For other file types (e.g., Mpeg), use the existing aggregation logic
316        // This will aggregate all composers found in performers, track.composer, and album.composer
317        if let Some(ref performers_str) = track.performers {
318            let extracted_composers = extract_composers_from_performers(performers_str);
319            for composer in extracted_composers {
320                if composer != "Various Composers"
321                    && !is_duplicate_composer(&composer, &composer_normalized_set)
322                {
323                    composers.push(composer.clone());
324                    composer_normalized_set.insert(normalize_composer_name(&composer));
325                }
326            }
327        }
328
329        if let Some(ref track_composer) = track.composer
330            && let Some(ref composer_name) = track_composer.name
331            && composer_name != "Various Composers"
332            && !is_duplicate_composer(composer_name, &composer_normalized_set)
333        {
334            composers.push(composer_name.clone());
335            composer_normalized_set.insert(normalize_composer_name(composer_name));
336        }
337
338        if let Some(ref album_composer) = album.composer
339            && let Some(ref composer_name) = album_composer.name
340            && composer_name != "Various Composers"
341            && !is_duplicate_composer(composer_name, &composer_normalized_set)
342        {
343            composers.push(composer_name.clone());
344            composer_normalized_set.insert(normalize_composer_name(composer_name));
345        }
346    }
347
348    // Set involved people - this should be more comprehensive and avoid duplicates
349    let involved_people = if let Some(ref performers_str) = track.performers {
350        performers_str.clone()
351    } else {
352        String::new()
353    };
354
355    // The Track model doesn't have a contributor field, so we'll just use performers and composer
356    if config.involved_people && !involved_people.is_empty() {
357        // Add as MusicianCredits which maps to the Involved People field in ID3
358        let tag_item = TagItem::new(MusicianCredits, Text(involved_people));
359        tag.push(tag_item);
360    }
361
362    // Set composer - combine all composers with "/" separator as in the C# implementation
363    if config.composer && !composers.is_empty() {
364        let combined_composers = composers.join("/");
365        let tag_item = TagItem::new(Composer, Text(combined_composers));
366        tag.push(tag_item);
367    }
368
369    // Set label/publisher
370    if config.label
371        && let Some(ref album_label) = album.label
372        && let Some(ref label_name) = album_label.name
373    {
374        let tag_item = TagItem::new(Label, Text(label_name.clone()));
375        tag.push(tag_item);
376    }
377
378    if config.genre
379        && let Some(ref genre) = album.genre
380        && let Some(ref genre_name) = genre.name
381    {
382        tag.set_genre(genre_name.clone());
383    }
384
385    // Add track number and total tracks
386    if config.track_number
387        && let Some(track_number) = track.track_number
388    {
389        tag.set_track(track_number as u32);
390    }
391    if config.track_total
392        && let Some(ref album_tracks_count) = album.tracks_count
393    {
394        tag.set_track_total(*album_tracks_count as u32);
395    }
396
397    // Add disc number and total discs (Part of Set)
398    if config.disc_number
399        && let Some(media_number) = track.media_number
400    {
401        tag.set_disk(media_number as u32);
402    }
403    if config.disc_total
404        && let Some(ref album_media_count) = album.media_count
405    {
406        tag.set_disk_total(*album_media_count as u32);
407    }
408
409    if config.copyright
410        && let Some(ref copyright) = track.copyright
411    {
412        let tag_item = TagItem::new(CopyrightMessage, Text(copyright.clone()));
413        tag.push(tag_item);
414    }
415
416    if config.isrc
417        && let Some(ref isrc) = track.isrc
418    {
419        let tag_item = TagItem::new(Isrc, Text(isrc.clone()));
420        tag.push(tag_item);
421    }
422
423    // Determine the primary date string (YYYY-MM-DD) and year (YYYY)
424    let mut primary_date_full: Option<String> = None;
425    let mut primary_year: Option<u32> = None;
426
427    // Prioritize album release dates, then track release dates, then timestamp
428    if let Some(ref release_date) = album.release_date_download {
429        primary_date_full = Some(release_date.clone());
430        if let Some(year_str) = release_date.split('-').next() {
431            primary_year = year_str.parse::<u32>().ok();
432        }
433    } else if let Some(ref release_date) = album.release_date_original {
434        primary_date_full = Some(release_date.clone());
435        if let Some(year_str) = release_date.split('-').next() {
436            primary_year = year_str.parse::<u32>().ok();
437        }
438    } else if let Some(ref release_date) = track.release_date_original {
439        primary_date_full = Some(release_date.clone());
440        if let Some(year_str) = release_date.split('-').next() {
441            primary_year = year_str.parse::<u32>().ok();
442        }
443    } else if let Some(released_at) = album.released_at {
444        let (date_str, year_num) = timestamp_to_date_and_year(released_at);
445        primary_date_full = date_str;
446        primary_year = year_num;
447    }
448
449    // Set the year tag if available
450    if config.release_year
451        && let Some(year) = primary_year
452    {
453        tag.set_year(year);
454    }
455
456    if config.release_date {
457        // Set the RecordingDate (maps to TDRC in ID3v2, DATE in Vorbis Comments)
458        if file_type == Flac
459            && let Some(ref date_str) = primary_date_full
460        {
461            // Lofty automatically maps RecordingDate to the appropriate tag for Vorbis Comments (DATE)
462            let tag_item = TagItem::new(RecordingDate, Text(date_str.clone()));
463            tag.push(tag_item);
464        }
465
466        // Set ReleaseDate (maps to TDRL frame in ID3)
467        // This should be set for MP3 files to match C# behavior for "Release Time", but not for FLAC.
468        if file_type == Mpeg
469            && let Some(ref date_str) = primary_date_full
470        {
471            let tag_item = TagItem::new(ReleaseDate, Text(date_str.clone()));
472            tag.push(tag_item);
473        }
474    }
475
476    // Add commercial URL - using CommercialInformationUrl field
477    if config.url
478        && let Some(ref product_url) = album.product_url
479    {
480        // Check if the URL is already a full URL or just a path
481        let full_url = if product_url.starts_with("http") {
482            product_url.clone()
483        } else {
484            format!("https://www.qobuz.com{}", product_url)
485        };
486        let tag_item = TagItem::new(CommercialInformationUrl, ItemValue::Locator(full_url));
487        tag.push(tag_item);
488    }
489
490    // Add media type - using OriginalMediaType field
491    // Use release_type if available, otherwise fall back to product_type to match C# implementation
492    if config.media_type {
493        match file_type {
494            Flac => {
495                // For FLAC, always add OriginalMediaType if release_type or product_type is "album" or "compilation"
496                if let Some(ref release_type) = album.release_type {
497                    // If release_type is "compilation", use it directly. Otherwise, if it's "album", use "album".
498                    // For any other release_type, use it as is.
499                    let media_type_str = if release_type == "compilation" {
500                        "compilation".to_string()
501                    } else if release_type == "album" {
502                        "album".to_string()
503                    } else {
504                        release_type.clone()
505                    };
506                    let tag_item = TagItem::new(OriginalMediaType, Text(media_type_str));
507                    tag.push(tag_item);
508                } else if let Some(ref product_type) = album.product_type {
509                    // Fallback to product_type if release_type is not available, with similar logic
510                    let media_type_str = if product_type == "compilation" {
511                        "compilation".to_string()
512                    } else if product_type == "album" {
513                        "album".to_string()
514                    } else {
515                        product_type.clone()
516                    };
517                    let tag_item = TagItem::new(OriginalMediaType, Text(media_type_str));
518                    tag.push(tag_item);
519                }
520            }
521
522            _ => {
523                // For other file types, use the existing logic
524                if let Some(ref release_type) = album.release_type {
525                    let tag_item = TagItem::new(OriginalMediaType, Text(release_type.clone()));
526                    tag.push(tag_item);
527                } else if let Some(ref product_type) = album.product_type {
528                    // Fallback to product_type if release_type is not available
529                    let tag_item = TagItem::new(OriginalMediaType, Text(product_type.clone()));
530                    tag.push(tag_item);
531                }
532            }
533        }
534    }
535
536    // Add cover art if available - try different image sizes in order of preference
537    if config.cover_art
538        && let Some(ref album_image) = album.image
539    {
540        // Try to get the best quality image available, in order of preference
541        let image_url = album_image
542            .mega
543            .as_ref()
544            .or(album_image.extralarge.as_ref())
545            .or(album_image.large.as_ref())
546            .or(album_image.medium.as_ref())
547            .or(album_image.small.as_ref())
548            .or(album_image.thumbnail.as_ref());
549
550        if let Some(url) = image_url {
551            // Download the image and embed it
552            match download_image(url).await {
553                Ok(image_data) => {
554                    // Create picture with proper format to match C# implementation
555                    let picture = Picture::new_unchecked(
556                        CoverFront,           // Picture Type: Front Cover (as in C# implementation)
557                        Some(Jpeg),           // MIME Type: image/jpeg (as in C# implementation)
558                        Some("".to_string()), // Empty description (as in C# implementation)
559                        image_data,
560                    );
561
562                    // Add the picture to the tag
563                    tag.push_picture(picture);
564                }
565
566                Err(e) => {
567                    eprintln!(
568                        "Warning: Could not download album cover from URL: {} - {}",
569                        url, e
570                    );
571                }
572            }
573        } else {
574            eprintln!("Warning: No album cover image URL available");
575        }
576    }
577
578    // Write the tag to the file with options appropriate for the file type
579    let options = WriteOptions::default();
580    tagged_file
581        .save_to_path(filepath, options)
582        .map_err(LoftyError)?;
583    Ok(())
584}
585
586/// Extracts composer names from a performers string by identifying roles containing "Composer" or "Lyricist".
587///
588/// This function parses the performers string which contains names followed by their roles
589/// separated by commas, and identifies individuals with composer or lyricist roles.
590/// The format typically follows the pattern: "Name, Role1, Role2 - Another Name, Role3, Role4".
591///
592/// # Arguments
593///
594/// * `performers_str` - A string containing performer names and their roles, separated by " - "
595///
596/// # Returns
597///
598/// A vector of unique composer names extracted from the performers string.
599fn extract_composers_from_performers(performers_str: &str) -> Vec<String> {
600    let mut composers = Vec::new();
601
602    // Split by " - " to separate different people/role groups
603    let person_groups: Vec<&str> = performers_str.split(" - ").collect();
604
605    for group in person_groups.iter() {
606        let group = group.trim();
607
608        // Each group contains a person name followed by their roles separated by commas
609        let mut parts: Vec<&str> = group.split(',').map(|s| s.trim()).collect();
610
611        if !parts.is_empty() {
612            // First part is the person's name
613            let person_name = parts.remove(0).trim();
614
615            // Check if any of the roles is ComposerLyricist
616            for role in &parts {
617                if role.contains("Composer") || role.contains("Lyricist") {
618                    if !composers.contains(&person_name.to_string()) {
619                        composers.push(person_name.to_string());
620                    }
621                    break; // Found composer role, no need to check other roles for this person
622                }
623            }
624        }
625    }
626
627    composers
628}
629
630/// Extracts artist names from a performers string by identifying performers with specific roles.
631///
632/// This function identifies individuals with performer roles such as MainArtist, Performer,
633/// AssociatedPerformer, Orchestra, or Conductor. It preserves the order from the original
634/// performers string to match the C# implementation behavior and avoids duplicates by
635/// checking against existing artists.
636///
637/// # Arguments
638///
639/// * `performers_str` - A string containing performer names and their roles, separated by " - "
640/// * `existing_artists` - A set of artist names to avoid duplicates
641///
642/// # Returns
643///
644/// A vector of artist names extracted from the performers string, preserving the original order.
645fn extract_artist_names_from_performers(
646    performers_str: &str,
647    existing_artists: &HashSet<String>,
648) -> Vec<String> {
649    let mut artist_names = Vec::new();
650
651    // Split by " - " to separate different people/role groups, preserving order
652    let person_groups: Vec<&str> = performers_str.split(" - ").collect();
653
654    for group in person_groups.iter() {
655        let group = group.trim();
656
657        // Each group contains a person name followed by their roles separated by commas
658        let mut parts: Vec<&str> = group.split(',').map(|s| s.trim()).collect();
659
660        if !parts.is_empty() {
661            // First part is the person's name
662            let person_name = parts.remove(0).trim();
663
664            // Check if this person has a performer role (e.g., MainArtist, Performer, AssociatedPerformer, etc.)
665            let has_performer_role = parts.iter().any(|role| {
666                role.contains("MainArtist")
667                    || role.contains("Performer")
668                    || role.contains("AssociatedPerformer")
669                    || role.contains("Orchestra")
670                    || role.contains("Conductor")
671            });
672
673            // Only add if it's a performer role and we haven't seen this artist before
674            if has_performer_role
675                && !existing_artists.contains(person_name)
676                && !artist_names.contains(&person_name.to_string())
677            {
678                artist_names.push(person_name.to_string());
679            }
680        }
681    }
682
683    artist_names
684}
685
686/// Extracts producer names from a performers string by identifying the "Producer" role.
687///
688/// This function parses the performers string to find individuals with the "Producer" role.
689/// The format typically follows the pattern: "Name, Role1, Role2 - Another Name, Role3, Role4".
690///
691/// # Arguments
692///
693/// * `performers_str` - A string containing performer names and their roles, separated by " - "
694///
695/// # Returns
696///
697/// A vector of unique producer names extracted from the performers string.
698fn extract_producers_from_performers(performers_str: &str) -> Vec<String> {
699    let mut producers = Vec::new();
700
701    // Split by " - " to separate different people/role groups
702    let person_groups: Vec<&str> = performers_str.split(" - ").collect();
703
704    for group in person_groups.iter() {
705        let group = group.trim();
706
707        // Each group contains a person name followed by their roles separated by commas
708        let mut parts: Vec<&str> = group.split(',').map(|s| s.trim()).collect();
709
710        if !parts.is_empty() {
711            let person_name = parts.remove(0).trim();
712
713            // Check if "Producer" role is present
714            if parts.iter().any(|role| role.contains("Producer")) {
715                producers.push(person_name.to_string());
716            }
717        }
718    }
719    producers
720}
721
722/// Normalizes a composer name for comparison purposes to identify duplicates.
723///
724/// This function handles common variations in name formatting to identify duplicates,
725/// including:
726/// - Converting to lowercase
727/// - Removing common punctuation
728/// - Standardizing hyphenation patterns
729/// - Handling abbreviated names (e.g., "M. Davis" vs "Miles Davis")
730///
731/// The normalization helps identify equivalent composer names that may be formatted
732/// differently in the source data.
733///
734/// # Arguments
735///
736/// * `name` - The composer name to normalize
737///
738/// # Returns
739///
740/// A normalized version of the composer name suitable for comparison.
741fn normalize_composer_name(name: &str) -> String {
742    let mut normalized = name
743        .to_lowercase()
744        .trim()
745        // Remove common punctuation that might vary
746        .replace(".", "")
747        .replace(",", "")
748        .replace("-", " ") // Replace hyphens with spaces for better normalization
749        .replace("  ", " ") // Replace multiple spaces with single space
750        .replace("  ", " ") // Additional cleanup for multiple spaces
751        .trim()
752        .to_string();
753
754    // Handle specific variations seen in the test data
755    // Fix hyphenation variations in "de Homem-Christo"
756    normalized = normalized
757        .replace("de homem -christo", "de homem christo")
758        .replace("de homem- christo", "de homem christo")
759        .replace("de homem - christo", "de homem christo");
760
761    // Handle "Guy-Manuel" vs "Guy Manuel" variations
762    normalized = normalized
763        .replace("guy manuel", "guymanuel")
764        .replace("guy-manuel", "guymanuel");
765
766    // Handle "M. Davis" vs "Miles Davis" variations
767    normalized = normalized
768        .replace("m. davis", "miles davis")
769        .replace("m davis", "miles davis");
770
771    normalized.trim().to_string()
772}
773
774/// Checks if a composer name is a duplicate of an existing one in the normalized set.
775///
776/// This function determines if a composer name is already present in the set of
777/// normalized composer names, using both direct matching and substring matching
778/// to catch variations like "Miles Davis" vs "M. Davis".
779///
780/// # Arguments
781///
782/// * `composer_name` - The composer name to check for duplicates
783/// * `existing_normalized_set` - A set of already normalized composer names
784///
785/// # Returns
786///
787/// `true` if the composer name is a duplicate, `false` otherwise.
788fn is_duplicate_composer(composer_name: &str, existing_normalized_set: &HashSet<String>) -> bool {
789    let normalized_name = normalize_composer_name(composer_name);
790
791    // Direct match
792    if existing_normalized_set.contains(&normalized_name) {
793        return true;
794    }
795
796    // Check for substring matches (e.g., "Miles Davis" vs "M. Davis")
797    for existing in existing_normalized_set {
798        // If the new name is a substring of an existing name or vice versa
799        if existing.contains(&normalized_name) || normalized_name.contains(existing) {
800            return true;
801        }
802    }
803
804    false
805}