using System; using System.Collections.Generic; using System.Linq; using System.Threading; using System.Threading.Tasks; using Jellyfin.Data.Enums; using Jellyfin.Database.Implementations; using Jellyfin.Database.Implementations.Entities; using Jellyfin.Extensions; using MediaBrowser.Controller.Configuration; using MediaBrowser.Controller.Dto; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Entities.Movies; using MediaBrowser.Controller.Library; using MediaBrowser.Controller.Persistence; using MediaBrowser.Model.Configuration; using Microsoft.EntityFrameworkCore; using BaseItemDto = MediaBrowser.Controller.Entities.BaseItem; namespace Emby.Server.Implementations.Library.SimilarItems; /// /// Provides similar items for movies and trailers using weighted scoring. /// public sealed class MovieSimilarItemsProvider : ILocalSimilarItemsProvider, ILocalSimilarItemsProvider, IBatchLocalSimilarItemsProvider { private const int GenreWeight = 10; private const int TagWeight = 5; private const int StudioWeight = 5; private const int DirectorWeight = 50; private const int ActorWeight = 15; // Caps the batch fan-out so downstream IN-list sizes (per-source scores, accessible-id // load, navigation includes) stay bounded regardless of caller input. private const int MaxBatchSourceItems = 64; private static readonly (ItemValueType Type, int Weight)[] _itemValueDimensions = [ (ItemValueType.Genre, GenreWeight), (ItemValueType.Tags, TagWeight), (ItemValueType.Studios, StudioWeight) ]; private static readonly (string[] PersonTypes, int Weight)[] _peopleDimensions = [ ([nameof(PersonKind.Director)], DirectorWeight), ([nameof(PersonKind.Actor), nameof(PersonKind.GuestStar)], ActorWeight) ]; private readonly IDbContextFactory _dbProvider; private readonly IItemQueryHelpers _queryHelpers; private readonly IServerConfigurationManager _serverConfigurationManager; /// /// Initializes a new instance of the class. /// /// The database context factory. /// The shared query helpers. /// The server configuration manager. public MovieSimilarItemsProvider( IDbContextFactory dbProvider, IItemQueryHelpers queryHelpers, IServerConfigurationManager serverConfigurationManager) { _dbProvider = dbProvider; _queryHelpers = queryHelpers; _serverConfigurationManager = serverConfigurationManager; } /// public string Name => "Local Genre/Tag"; /// public MetadataPluginType Type => MetadataPluginType.LocalSimilarityProvider; /// public async Task> GetSimilarItemsAsync(Movie item, SimilarItemsQuery query, CancellationToken cancellationToken) { var results = await GetBatchSimilarItemsAsync([item], query, cancellationToken).ConfigureAwait(false); return results.TryGetValue(item.Id, out var items) ? items : []; } /// public async Task> GetSimilarItemsAsync(Trailer item, SimilarItemsQuery query, CancellationToken cancellationToken) { var results = await GetBatchSimilarItemsAsync([item], query, cancellationToken).ConfigureAwait(false); return results.TryGetValue(item.Id, out var items) ? items : []; } bool ILocalSimilarItemsProvider.Supports(Type itemType) => typeof(Movie).IsAssignableFrom(itemType) || typeof(Trailer).IsAssignableFrom(itemType); Task> ILocalSimilarItemsProvider.GetSimilarItemsAsync(BaseItem item, SimilarItemsQuery query, CancellationToken cancellationToken) => item switch { Movie movie => GetSimilarItemsAsync(movie, query, cancellationToken), Trailer trailer => GetSimilarItemsAsync(trailer, query, cancellationToken), _ => throw new ArgumentException($"Unsupported item type {item.GetType()}", nameof(item)) }; /// public async Task>> GetBatchSimilarItemsAsync( IReadOnlyList sourceItems, SimilarItemsQuery query, CancellationToken cancellationToken) { var includeItemTypes = new List { BaseItemKind.Movie }; if (_serverConfigurationManager.Configuration.EnableExternalContentInSuggestions) { includeItemTypes.Add(BaseItemKind.Trailer); includeItemTypes.Add(BaseItemKind.LiveTvProgram); } var limit = query.Limit ?? 50; var dtoOptions = query.DtoOptions ?? new DtoOptions(); if (sourceItems.Count > MaxBatchSourceItems) { sourceItems = sourceItems.Take(MaxBatchSourceItems).ToList(); } var context = await _dbProvider.CreateDbContextAsync(cancellationToken).ConfigureAwait(false); await using (context.ConfigureAwait(false)) { // Phase 1: Score all candidates per source item var sourceIds = sourceItems.Select(i => i.Id).ToList(); var perSourceScores = await ComputeBatchScoresAsync(sourceIds, context, cancellationToken).ConfigureAwait(false); var allCandidateIds = new HashSet(); foreach (var (_, scores) in perSourceScores) { allCandidateIds.UnionWith( scores.OrderByDescending(kvp => kvp.Value) .Take(limit * 3) .Select(kvp => kvp.Key)); } var result = new Dictionary>(); if (allCandidateIds.Count == 0) { return result; } // Phase 2: One access filter for all candidates var filter = new InternalItemsQuery(query.User) { IncludeItemTypes = [.. includeItemTypes], ExcludeItemIds = [.. query.ExcludeItemIds], DtoOptions = dtoOptions, EnableGroupByMetadataKey = true, EnableTotalRecordCount = false, IsMovie = true, IsPlayed = false }; _queryHelpers.PrepareFilterQuery(filter); var baseQuery = _queryHelpers.PrepareItemQuery(context, filter); baseQuery = _queryHelpers.TranslateQuery(baseQuery, context, filter); var allCandidateIdsList = allCandidateIds.ToList(); var accessibleItems = await baseQuery .WhereOneOrMany(allCandidateIdsList, e => e.Id) .Select(e => new { e.Id, e.PresentationUniqueKey }) .ToListAsync(cancellationToken).ConfigureAwait(false); // Phase 3: Pick top IDs per source, dedup by PresentationUniqueKey var allOrderedIds = new HashSet(); var perSourceOrderedIds = new Dictionary>(); foreach (var item in sourceItems) { if (!perSourceScores.TryGetValue(item.Id, out var scores)) { continue; } var orderedIds = accessibleItems .Where(x => scores.ContainsKey(x.Id)) .OrderByDescending(x => scores.GetValueOrDefault(x.Id)) .DistinctBy(x => x.PresentationUniqueKey) .Take(limit) .Select(x => x.Id) .ToList(); if (orderedIds.Count > 0) { perSourceOrderedIds[item.Id] = orderedIds; allOrderedIds.UnionWith(orderedIds); } } if (allOrderedIds.Count == 0) { return result; } // Phase 4: One entity load for all results. AsSplitQuery avoids a SQL Cartesian // product across the multiple collection Includes added by ApplyNavigations. var allOrderedIdsList = allOrderedIds.ToList(); var entities = await _queryHelpers.ApplyNavigations( context.BaseItems.AsNoTracking().WhereOneOrMany(allOrderedIdsList, e => e.Id), filter) .AsSplitQuery() .ToListAsync(cancellationToken).ConfigureAwait(false); var entitiesById = entities .Select(e => _queryHelpers.DeserializeBaseItem(e, filter.SkipDeserialization)) .Where(dto => dto is not null) .ToDictionary(i => i!.Id); // Phase 5: Split by source, preserving score order foreach (var (sourceId, orderedIds) in perSourceOrderedIds) { var items = orderedIds .Where(entitiesById.ContainsKey) .Select(id => entitiesById[id]!) .ToList(); if (items.Count > 0) { result[sourceId] = items; } } return result; } } private static async Task>> ComputeBatchScoresAsync(List sourceIds, JellyfinDbContext context, CancellationToken cancellationToken) { var result = new Dictionary>(); foreach (var id in sourceIds) { result[id] = []; } foreach (var (valueType, weight) in _itemValueDimensions) { var sourceRows = await context.ItemValuesMap.AsNoTracking() .Where(m => sourceIds.Contains(m.ItemId) && m.ItemValue.Type == valueType) .Select(m => new { m.ItemId, Key = m.ItemValue.CleanValue }) .ToListAsync(cancellationToken).ConfigureAwait(false); var sourceMap = sourceRows.GroupBy(r => r.ItemId).ToDictionary(g => g.Key, g => g.Select(x => x.Key).ToHashSet()); var allKeys = sourceMap.Values.SelectMany(v => v).Distinct().ToList(); if (allKeys.Count == 0) { continue; } var candidateRows = await context.ItemValuesMap.AsNoTracking() .Where(m => m.ItemValue.Type == valueType && allKeys.Contains(m.ItemValue.CleanValue)) .Select(m => new { m.ItemId, Key = m.ItemValue.CleanValue }) .ToListAsync(cancellationToken).ConfigureAwait(false); var keyToCandidates = candidateRows.GroupBy(r => r.Key).ToDictionary(g => g.Key, g => g.Select(x => x.ItemId).ToList()); ApplyDimensionScores(sourceIds, sourceMap, keyToCandidates, weight, result); } foreach (var (personTypes, weight) in _peopleDimensions) { var sourceRows = await context.PeopleBaseItemMap.AsNoTracking() .Where(m => sourceIds.Contains(m.ItemId) && personTypes.Contains(m.People.PersonType)) .Select(m => new { m.ItemId, Key = m.PeopleId }) .ToListAsync(cancellationToken).ConfigureAwait(false); var sourceMap = sourceRows.GroupBy(r => r.ItemId).ToDictionary(g => g.Key, g => g.Select(x => x.Key).ToHashSet()); var allKeys = sourceMap.Values.SelectMany(v => v).Distinct().ToList(); if (allKeys.Count == 0) { continue; } var candidateRows = await context.PeopleBaseItemMap.AsNoTracking() .Where(m => allKeys.Contains(m.PeopleId)) .Select(m => new { m.ItemId, Key = m.PeopleId }) .ToListAsync(cancellationToken).ConfigureAwait(false); var keyToCandidates = candidateRows.GroupBy(r => r.Key).ToDictionary(g => g.Key, g => g.Select(x => x.ItemId).ToList()); ApplyDimensionScores(sourceIds, sourceMap, keyToCandidates, weight, result); } foreach (var sourceId in sourceIds) { var scoreMap = result[sourceId]; scoreMap.Remove(sourceId); if (scoreMap.Count == 0) { result.Remove(sourceId); } } return result; } private static void ApplyDimensionScores( List sourceIds, Dictionary> sourceMap, Dictionary> keyToCandidates, int weight, Dictionary> result) where TKey : notnull { foreach (var sourceId in sourceIds) { if (!sourceMap.TryGetValue(sourceId, out var sourceKeys)) { continue; } var scoreMap = result[sourceId]; foreach (var key in sourceKeys) { if (!keyToCandidates.TryGetValue(key, out var candidates)) { continue; } foreach (var candidateId in candidates) { scoreMap[candidateId] = scoreMap.GetValueOrDefault(candidateId) + weight; } } } } }