calculate similarity at database level

This commit is contained in:
Luke Pulverenti
2016-06-01 01:50:00 -04:00
parent 0915d1f383
commit e1f562e16f
13 changed files with 411 additions and 257 deletions

View File

@@ -6,5 +6,6 @@ namespace MediaBrowser.Server.Implementations.Persistence
public interface IDbConnector
{
Task<IDbConnection> Connect(string dbPath);
void BindSimilarityScoreFunction(IDbConnection connection);
}
}

View File

@@ -0,0 +1,175 @@
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SQLite;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using MediaBrowser.Model.Logging;
namespace MediaBrowser.Server.Implementations.Persistence
{
/// <summary>
/// Class SQLiteExtensions
/// </summary>
public static class SqliteExtensions
{
/// <summary>
/// Connects to db.
/// </summary>
/// <param name="dbPath">The db path.</param>
/// <param name="logger">The logger.</param>
/// <returns>Task{IDbConnection}.</returns>
/// <exception cref="System.ArgumentNullException">dbPath</exception>
public static async Task<IDbConnection> ConnectToDb(string dbPath, ILogger logger)
{
if (string.IsNullOrEmpty(dbPath))
{
throw new ArgumentNullException("dbPath");
}
logger.Info("Sqlite {0} opening {1}", SQLiteConnection.SQLiteVersion, dbPath);
var connectionstr = new SQLiteConnectionStringBuilder
{
PageSize = 4096,
CacheSize = 2000,
SyncMode = SynchronizationModes.Normal,
DataSource = dbPath,
JournalMode = SQLiteJournalModeEnum.Wal
};
var connection = new SQLiteConnection(connectionstr.ConnectionString);
await connection.OpenAsync().ConfigureAwait(false);
return connection;
}
public static void BindGetSimilarityScore(IDbConnection connection, ILogger logger)
{
var sqlConnection = (SQLiteConnection) connection;
SimiliarToFunction.Logger = logger;
sqlConnection.BindFunction(new SimiliarToFunction());
}
public static void BindFunction(this SQLiteConnection connection, SQLiteFunction function)
{
var attributes = function.GetType().GetCustomAttributes(typeof(SQLiteFunctionAttribute), true).Cast<SQLiteFunctionAttribute>().ToArray();
if (attributes.Length == 0)
{
throw new InvalidOperationException("SQLiteFunction doesn't have SQLiteFunctionAttribute");
}
connection.BindFunction(attributes[0], function);
}
}
[SQLiteFunction(Name = "GetSimilarityScore", Arguments = 12, FuncType = FunctionType.Scalar)]
public class SimiliarToFunction : SQLiteFunction
{
internal static ILogger Logger;
public override object Invoke(object[] args)
{
var score = 0;
var inputOfficialRating = args[0] as string;
var rowOfficialRating = args[1] as string;
if (!string.IsNullOrWhiteSpace(inputOfficialRating) && string.Equals(inputOfficialRating, rowOfficialRating))
{
score += 10;
}
long? inputYear = args[2] == null ? (long?)null : (long)args[2];
long? rowYear = args[3] == null ? (long?)null : (long)args[3];
if (inputYear.HasValue && rowYear.HasValue)
{
var diff = Math.Abs(inputYear.Value - rowYear.Value);
// Add if they came out within the same decade
if (diff < 10)
{
score += 2;
}
// And more if within five years
if (diff < 5)
{
score += 2;
}
}
// genres
score += GetListScore(args, 4, 5);
// tags
score += GetListScore(args, 6, 7);
// keywords
score += GetListScore(args, 8, 9);
// studios
score += GetListScore(args, 10, 11, 3);
// TODO: People
// var item2PeopleNames = allPeople.Where(i => i.ItemId == item2.Id)
//.Select(i => i.Name)
//.Where(i => !string.IsNullOrWhiteSpace(i))
//.DistinctNames()
//.ToDictionary(i => i, StringComparer.OrdinalIgnoreCase);
// points += item1People.Where(i => item2PeopleNames.ContainsKey(i.Name)).Sum(i =>
// {
// if (string.Equals(i.Type, PersonType.Director, StringComparison.OrdinalIgnoreCase) || string.Equals(i.Role, PersonType.Director, StringComparison.OrdinalIgnoreCase))
// {
// return 5;
// }
// if (string.Equals(i.Type, PersonType.Actor, StringComparison.OrdinalIgnoreCase) || string.Equals(i.Role, PersonType.Actor, StringComparison.OrdinalIgnoreCase))
// {
// return 3;
// }
// if (string.Equals(i.Type, PersonType.Composer, StringComparison.OrdinalIgnoreCase) || string.Equals(i.Role, PersonType.Composer, StringComparison.OrdinalIgnoreCase))
// {
// return 3;
// }
// if (string.Equals(i.Type, PersonType.GuestStar, StringComparison.OrdinalIgnoreCase) || string.Equals(i.Role, PersonType.GuestStar, StringComparison.OrdinalIgnoreCase))
// {
// return 3;
// }
// if (string.Equals(i.Type, PersonType.Writer, StringComparison.OrdinalIgnoreCase) || string.Equals(i.Role, PersonType.Writer, StringComparison.OrdinalIgnoreCase))
// {
// return 2;
// }
// return 1;
// });
// return points;
//Logger.Debug("Returning score {0}", score);
return score;
}
private int GetListScore(object[] args, int index1, int index2, int value = 10)
{
var score = 0;
var inputGenres = args[index1] as string;
var rowGenres = args[index2] as string;
var inputGenreList = string.IsNullOrWhiteSpace(inputGenres) ? new string[] { } : inputGenres.Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries);
var rowGenresList = string.IsNullOrWhiteSpace(rowGenres) ? new string[] { } : rowGenres.Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var genre in inputGenreList)
{
if (rowGenresList.Contains(genre, StringComparer.OrdinalIgnoreCase))
{
score += value;
}
}
return score;
}
}
}

View File

@@ -15,6 +15,7 @@ using System.Globalization;
using System.IO;
using System.Linq;
using System.Runtime.Serialization;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Common.Extensions;
@@ -258,6 +259,8 @@ namespace MediaBrowser.Server.Implementations.Persistence
new MediaStreamColumns(_connection, Logger).AddColumns();
DataExtensions.Attach(_connection, Path.Combine(_config.ApplicationPaths.DataPath, "userdata_v2.db"), "UserDataDb");
dbConnector.BindSimilarityScoreFunction(_connection);
}
private readonly string[] _retriveItemColumns =
@@ -1575,7 +1578,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
return false;
}
private string[] GetFinalColumnsToSelect(InternalItemsQuery query, string[] startColumns)
private string[] GetFinalColumnsToSelect(InternalItemsQuery query, string[] startColumns, IDbCommand cmd)
{
var list = startColumns.ToList();
@@ -1590,6 +1593,45 @@ namespace MediaBrowser.Server.Implementations.Persistence
list.Add("UserDataDb.UserData.rating");
}
if (query.SimilarTo != null)
{
var item = query.SimilarTo;
var builder = new StringBuilder();
builder.Append("GetSimilarityScore(");
builder.Append("@ItemOfficialRating,");
builder.Append("OfficialRating,");
builder.Append("@ItemProductionYear,");
builder.Append("ProductionYear,");
builder.Append("@ItemGenres,");
builder.Append("Genres,");
builder.Append("@ItemTags,");
builder.Append("Tags,");
builder.Append("@ItemKeywords,");
builder.Append("(select group_concat((Select Value from ItemValues where ItemId=Guid and Type=5), '|')),");
builder.Append("@ItemStudios,");
builder.Append("Studios");
builder.Append(") as SimilarityScore");
list.Add(builder.ToString());
cmd.Parameters.Add(cmd, "@ItemOfficialRating", DbType.String).Value = item.OfficialRating;
cmd.Parameters.Add(cmd, "@ItemProductionYear", DbType.Int32).Value = item.ProductionYear ?? -1;
cmd.Parameters.Add(cmd, "@ItemGenres", DbType.String).Value = string.Join("|", item.Genres.ToArray());
cmd.Parameters.Add(cmd, "@ItemTags", DbType.String).Value = string.Join("|", item.Tags.ToArray());
cmd.Parameters.Add(cmd, "@ItemKeywords", DbType.String).Value = string.Join("|", item.Keywords.ToArray());
cmd.Parameters.Add(cmd, "@ItemStudios", DbType.String).Value = string.Join("|", item.Studios.ToArray());
var excludeIds = query.ExcludeItemIds.ToList();
excludeIds.Add(item.Id.ToString("N"));
query.ExcludeItemIds = excludeIds.ToArray();
}
return list.ToArray();
}
@@ -1616,7 +1658,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
using (var cmd = _connection.CreateCommand())
{
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, _retriveItemColumns)) + " from TypedBaseItems";
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, _retriveItemColumns, cmd)) + " from TypedBaseItems";
cmd.CommandText += GetJoinUserDataText(query);
if (EnableJoinUserData(query))
@@ -1706,7 +1748,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
using (var cmd = _connection.CreateCommand())
{
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, _retriveItemColumns)) + " from TypedBaseItems";
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, _retriveItemColumns, cmd)) + " from TypedBaseItems";
cmd.CommandText += GetJoinUserDataText(query);
if (EnableJoinUserData(query))
@@ -1789,6 +1831,15 @@ namespace MediaBrowser.Server.Implementations.Persistence
private string GetOrderByText(InternalItemsQuery query)
{
if (query.SimilarTo != null)
{
if (query.SortBy == null || query.SortBy.Length == 0)
{
query.SortBy = new[] { "SimilarityScore", "Random" };
query.SortOrder = SortOrder.Descending;
}
}
if (query.SortBy == null || query.SortBy.Length == 0)
{
return string.Empty;
@@ -1879,7 +1930,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
using (var cmd = _connection.CreateCommand())
{
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, new[] { "guid" })) + " from TypedBaseItems";
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, new[] { "guid" }, cmd)) + " from TypedBaseItems";
cmd.CommandText += GetJoinUserDataText(query);
if (EnableJoinUserData(query))
@@ -2022,7 +2073,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
using (var cmd = _connection.CreateCommand())
{
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, new[] { "guid" })) + " from TypedBaseItems";
cmd.CommandText = "select " + string.Join(",", GetFinalColumnsToSelect(query, new[] { "guid" }, cmd)) + " from TypedBaseItems";
var whereClauses = GetWhereClauses(query, cmd);
cmd.CommandText += GetJoinUserDataText(query);
@@ -2148,24 +2199,7 @@ namespace MediaBrowser.Server.Implementations.Persistence
}
else
{
if (query.IsMovie.Value)
{
var typeClauses = new List<string>();
var typeIndex = 0;
foreach (var type in alternateTypes)
{
var paramName = "@AlternateType" + typeIndex.ToString(CultureInfo.InvariantCulture);
typeClauses.Add("Type=" + paramName);
cmd.Parameters.Add(cmd, paramName, DbType.String).Value = type;
typeIndex++;
}
whereClauses.Add("(IsMovie=@IsMovie OR " + string.Join(" OR ", typeClauses.ToArray()) + ")");
}
else
{
whereClauses.Add("(IsMovie is null OR IsMovie=@IsMovie)");
}
whereClauses.Add("(IsMovie is null OR IsMovie=@IsMovie)");
}
cmd.Parameters.Add(cmd, "@IsMovie", DbType.Boolean).Value = query.IsMovie;
}