From 43bd076d0439ae4bc36772a364df96cdbc9fbeea Mon Sep 17 00:00:00 2001 From: Gabriel Kaszewski Date: Fri, 25 Jul 2025 03:13:47 +0200 Subject: [PATCH] Improve suggestion matching --- src/services/suggestion.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/services/suggestion.rs b/src/services/suggestion.rs index af23cef..6952be5 100644 --- a/src/services/suggestion.rs +++ b/src/services/suggestion.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use strsim::jaro_winkler; use musicbrainz_rs::entity::release::Track; use serde::Serialize; @@ -6,6 +7,8 @@ use tracing::info; use crate::models::music_files; +const SIMILARITY_THRESHOLD: f64 = 0.85; + #[derive(Debug, Serialize)] pub struct SuggestedTrackFix { pub file_id: i32, @@ -32,20 +35,18 @@ pub fn match_album_metadata( let mut suggestions = Vec::new(); for mb_track in mb_tracks { - let mb_title_norm = mb_track.title.to_lowercase(); + let mb_title_norm = normalize(&mb_track.title); if let Some((local, _)) = local_files .iter() .filter(|f| !used_file_ids.contains(&f.id)) - .map(|f| { - let title_match_score = f - .title - .as_ref() - .map(|t| (t.to_lowercase() == mb_title_norm) as u8) - .unwrap_or(0); - (f, title_match_score) + .filter_map(|f| { + let local_title = f.title.as_ref().map(|t| normalize(t))?; + let score = jaro_winkler(&mb_title_norm, &local_title); + Some((f, score)) }) - .max_by_key(|(_, score)| *score) + .filter(|(_, score)| *score > SIMILARITY_THRESHOLD) + .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap()) { info!( "Matched MB track '{}' with local '{}'", @@ -97,3 +98,11 @@ pub fn match_album_metadata( suggestions } + +fn normalize(s: &str) -> String { + s.to_lowercase() + .replace(['(', ')', '[', ']', '-', '_', '.', '\''], "") + .replace("feat", "") + .replace("ft", "") + .replace(" ", "") +}