From b7b405dc839598892cadd236f009f1d21a76a4d3 Mon Sep 17 00:00:00 2001 From: Shadowghost Date: Sat, 9 May 2026 02:07:26 +0200 Subject: [PATCH] Fix artist duplicates --- .../Item/BaseItemRepository.TranslateQuery.cs | 3 +- .../Item/LinkedChildrenService.cs | 10 +- .../Routines/MergeDuplicateMusicArtists.cs | 204 ++++++++++++++++++ 3 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 Jellyfin.Server/Migrations/Routines/MergeDuplicateMusicArtists.cs diff --git a/Jellyfin.Server.Implementations/Item/BaseItemRepository.TranslateQuery.cs b/Jellyfin.Server.Implementations/Item/BaseItemRepository.TranslateQuery.cs index 0abe981af8..59e61cfd65 100644 --- a/Jellyfin.Server.Implementations/Item/BaseItemRepository.TranslateQuery.cs +++ b/Jellyfin.Server.Implementations/Item/BaseItemRepository.TranslateQuery.cs @@ -390,7 +390,8 @@ public sealed partial class BaseItemRepository { if (filter.UseRawName == true) { - baseQuery = baseQuery.Where(e => e.Name == filter.Name); + var nameLower = filter.Name.ToLowerInvariant(); + baseQuery = baseQuery.Where(e => e.Name!.ToLower() == nameLower); } else { diff --git a/Jellyfin.Server.Implementations/Item/LinkedChildrenService.cs b/Jellyfin.Server.Implementations/Item/LinkedChildrenService.cs index 415510b2f4..9e11b6be62 100644 --- a/Jellyfin.Server.Implementations/Item/LinkedChildrenService.cs +++ b/Jellyfin.Server.Implementations/Item/LinkedChildrenService.cs @@ -1,4 +1,6 @@ #pragma warning disable RS0030 // Do not use banned APIs +#pragma warning disable CA1304 // Specify CultureInfo +#pragma warning disable CA1311 // Specify a culture or use an invariant version using System; using System.Collections.Generic; @@ -62,17 +64,19 @@ public class LinkedChildrenService : ILinkedChildrenService { using var dbContext = _dbProvider.CreateDbContext(); + var lowerNames = artistNames.Select(n => n.ToLowerInvariant()).ToArray(); var artists = dbContext.BaseItems .AsNoTracking() .Where(e => e.Type == _itemTypeLookup.BaseItemKindNames[BaseItemKind.MusicArtist]!) - .Where(e => artistNames.Contains(e.Name)) + .Where(e => lowerNames.Contains(e.Name!.ToLower())) .ToArray(); var lookup = artists - .GroupBy(e => e.Name!) + .GroupBy(e => e.Name!, StringComparer.OrdinalIgnoreCase) .ToDictionary( g => g.Key, - g => g.Select(f => _queryHelpers.DeserializeBaseItem(f)).Where(dto => dto is not null).Cast().ToArray()); + g => g.Select(f => _queryHelpers.DeserializeBaseItem(f)).Where(dto => dto is not null).Cast().ToArray(), + StringComparer.OrdinalIgnoreCase); var result = new Dictionary(artistNames.Count); foreach (var name in artistNames) diff --git a/Jellyfin.Server/Migrations/Routines/MergeDuplicateMusicArtists.cs b/Jellyfin.Server/Migrations/Routines/MergeDuplicateMusicArtists.cs new file mode 100644 index 0000000000..f598848465 --- /dev/null +++ b/Jellyfin.Server/Migrations/Routines/MergeDuplicateMusicArtists.cs @@ -0,0 +1,204 @@ +#pragma warning disable RS0030 // Do not use banned APIs + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Jellyfin.Database.Implementations; +using Jellyfin.Server.ServerSetupApp; +using MediaBrowser.Controller.Library; +using MediaBrowser.Controller.Persistence; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; + +namespace Jellyfin.Server.Migrations.Routines; + +/// +/// Merges MusicArtist records that differ only by Name casing. Prior to the case-insensitive +/// dedup lookup added alongside this migration, the artist validator would create a second +/// MusicArtist whenever a track tagged the artist with a different casing than the +/// resolver-created one (e.g. "Thirty Seconds To Mars" vs. "Thirty Seconds to Mars"). +/// +[JellyfinMigration("2026-05-08T12:00:00", nameof(MergeDuplicateMusicArtists))] +[JellyfinMigrationBackup(JellyfinDb = true)] +public class MergeDuplicateMusicArtists : IAsyncMigrationRoutine +{ + private const string MusicArtistType = "MediaBrowser.Controller.Entities.Audio.MusicArtist"; + + private readonly IStartupLogger _logger; + private readonly IDbContextFactory _dbContextFactory; + private readonly ILibraryManager _libraryManager; + private readonly IItemPersistenceService _persistenceService; + + /// + /// Initializes a new instance of the class. + /// + /// The startup logger. + /// The database context factory. + /// The library manager. + /// The item persistence service. + public MergeDuplicateMusicArtists( + IStartupLogger logger, + IDbContextFactory dbContextFactory, + ILibraryManager libraryManager, + IItemPersistenceService persistenceService) + { + _logger = logger; + _dbContextFactory = dbContextFactory; + _libraryManager = libraryManager; + _persistenceService = persistenceService; + } + + /// + public async Task PerformAsync(CancellationToken cancellationToken) + { + var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false); + await using (context.ConfigureAwait(false)) + { + var artists = await context.BaseItems + .Where(b => b.Type == MusicArtistType && b.Name != null) + .Select(b => new { b.Id, b.Name, b.DateCreated }) + .ToListAsync(cancellationToken) + .ConfigureAwait(false); + + var groups = artists + .GroupBy(a => a.Name!.ToLowerInvariant()) + .Where(g => g.Count() > 1) + .ToList(); + + if (groups.Count == 0) + { + _logger.LogInformation("No case-only duplicate MusicArtist records found."); + return; + } + + _logger.LogInformation("Found {Count} groups of case-only duplicate MusicArtist records.", groups.Count); + + var idsToDelete = new List(); + foreach (var group in groups) + { + cancellationToken.ThrowIfCancellationRequested(); + + var groupIds = group.Select(g => g.Id).ToArray(); + + // Pick the keeper: the artist with the most child references is the "real" one + // (the resolver-created artist with a filesystem path); the duplicates are usually + // empty stubs created by the validator's case-sensitive miss. + var stats = await context.BaseItems + .Where(b => groupIds.Contains(b.Id)) + .Select(b => new + { + b.Id, + b.Name, + b.DateCreated, + ChildCount = context.BaseItems.Count(c => c.ParentId == b.Id), + AncestorCount = context.AncestorIds.Count(a => a.ParentItemId == b.Id), + LinkedCount = context.LinkedChildren.Count(l => l.ParentId == b.Id || l.ChildId == b.Id), + }) + .ToListAsync(cancellationToken) + .ConfigureAwait(false); + + var keeper = stats + .OrderByDescending(s => s.ChildCount) + .ThenByDescending(s => s.AncestorCount) + .ThenByDescending(s => s.LinkedCount) + .ThenBy(s => s.DateCreated) + .First(); + + foreach (var dup in stats.Where(s => s.Id != keeper.Id)) + { + var keeperId = keeper.Id; + var dupId = dup.Id; + + await context.BaseItems + .Where(b => b.ParentId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(b => b.ParentId, keeperId), cancellationToken) + .ConfigureAwait(false); + + await context.BaseItems + .Where(b => b.OwnerId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(b => b.OwnerId, keeperId), cancellationToken) + .ConfigureAwait(false); + + // AncestorIds PK is (ItemId, ParentItemId); drop rows that would collide before redirecting. + await context.AncestorIds + .Where(a => a.ParentItemId == dupId + && context.AncestorIds.Any(k => k.ParentItemId == keeperId && k.ItemId == a.ItemId)) + .ExecuteDeleteAsync(cancellationToken) + .ConfigureAwait(false); + await context.AncestorIds + .Where(a => a.ParentItemId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(a => a.ParentItemId, keeperId), cancellationToken) + .ConfigureAwait(false); + + // LinkedChildren PK is (ParentId, ChildId); drop colliding rows in both directions. + await context.LinkedChildren + .Where(l => l.ParentId == dupId + && context.LinkedChildren.Any(k => k.ParentId == keeperId && k.ChildId == l.ChildId)) + .ExecuteDeleteAsync(cancellationToken) + .ConfigureAwait(false); + await context.LinkedChildren + .Where(l => l.ParentId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(l => l.ParentId, keeperId), cancellationToken) + .ConfigureAwait(false); + await context.LinkedChildren + .Where(l => l.ChildId == dupId + && context.LinkedChildren.Any(k => k.ChildId == keeperId && k.ParentId == l.ParentId)) + .ExecuteDeleteAsync(cancellationToken) + .ConfigureAwait(false); + await context.LinkedChildren + .Where(l => l.ChildId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(l => l.ChildId, keeperId), cancellationToken) + .ConfigureAwait(false); + + // UserData has UNIQUE(UserId, CustomDataKey); keep the dup's row only when the + // keeper has no equivalent row, otherwise the keeper's value wins. + await context.UserData + .Where(u => u.ItemId == dupId + && context.UserData.Any(k => k.ItemId == keeperId && k.UserId == u.UserId && k.CustomDataKey == u.CustomDataKey)) + .ExecuteDeleteAsync(cancellationToken) + .ConfigureAwait(false); + await context.UserData + .Where(u => u.ItemId == dupId) + .ExecuteUpdateAsync(s => s.SetProperty(u => u.ItemId, keeperId), cancellationToken) + .ConfigureAwait(false); + + idsToDelete.Add(dupId); + } + + _logger.LogDebug( + "Merged duplicates for '{Name}' into {KeeperId} ({Removed} removed).", + keeper.Name, + keeper.Id, + stats.Count - 1); + } + + if (idsToDelete.Count == 0) + { + return; + } + + // Resolve via LibraryManager so DeleteItemsUnsafeFast can also remove the + // %MetadataPath%/artists/ directories that the duplicate stubs left behind. + // Fall back to the persistence service for any items the LibraryManager can't resolve. + var itemsToDelete = idsToDelete + .Select(id => _libraryManager.GetItemById(id)) + .Where(item => item is not null) + .ToList(); + if (itemsToDelete.Count > 0) + { + _libraryManager.DeleteItemsUnsafeFast(itemsToDelete!); + } + + var deletedIds = itemsToDelete.Select(i => i!.Id).ToHashSet(); + var unresolvedIds = idsToDelete.Where(id => !deletedIds.Contains(id)).ToList(); + if (unresolvedIds.Count > 0) + { + _persistenceService.DeleteItem(unresolvedIds); + } + + _logger.LogInformation("Removed {Count} duplicate MusicArtist records.", idsToDelete.Count); + } + } +}