mirror of
https://github.com/jellyfin/jellyfin.git
synced 2026-03-14 14:16:35 +00:00
Visual Studio Reformat: Emby.Server.Implementations Part T-T
This commit is contained in:
@@ -1,10 +1,8 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using NLangDetect.Core.Utils;
|
||||
using MediaBrowser.Model.Serialization;
|
||||
using System.Linq;
|
||||
using MediaBrowser.Model.Serialization;
|
||||
using NLangDetect.Core.Utils;
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
public enum ErrorCode
|
||||
{
|
||||
NoTextError,
|
||||
FormatError,
|
||||
FileLoadError,
|
||||
DuplicateLangError,
|
||||
NeedLoadProfileError,
|
||||
CantDetectError,
|
||||
CantOpenTrainData,
|
||||
TrainDataFormatError,
|
||||
InitParamError,
|
||||
}
|
||||
public enum ErrorCode
|
||||
{
|
||||
NoTextError,
|
||||
FormatError,
|
||||
FileLoadError,
|
||||
DuplicateLangError,
|
||||
NeedLoadProfileError,
|
||||
CantDetectError,
|
||||
CantOpenTrainData,
|
||||
TrainDataFormatError,
|
||||
InitParamError,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
|
||||
namespace NLangDetect.Core.Extensions
|
||||
{
|
||||
public static class CharExtensions
|
||||
{
|
||||
private const int MIN_CODE_POINT = 0x000000;
|
||||
private const int MAX_CODE_POINT = 0x10ffff;
|
||||
public static class CharExtensions
|
||||
{
|
||||
private const int MIN_CODE_POINT = 0x000000;
|
||||
private const int MAX_CODE_POINT = 0x10ffff;
|
||||
|
||||
private static readonly int[] _unicodeBlockStarts =
|
||||
{
|
||||
private static readonly int[] _unicodeBlockStarts =
|
||||
{
|
||||
#region Unicode block starts
|
||||
|
||||
0x0000, // Basic Latin
|
||||
@@ -165,8 +165,8 @@ namespace NLangDetect.Core.Extensions
|
||||
#endregion
|
||||
};
|
||||
|
||||
private static readonly UnicodeBlock?[] _unicodeBlocks =
|
||||
{
|
||||
private static readonly UnicodeBlock?[] _unicodeBlocks =
|
||||
{
|
||||
#region Unicode blocks
|
||||
UnicodeBlock.BasicLatin,
|
||||
UnicodeBlock.Latin1Supplement,
|
||||
@@ -322,53 +322,53 @@ namespace NLangDetect.Core.Extensions
|
||||
#endregion
|
||||
};
|
||||
|
||||
#region Public methods
|
||||
#region Public methods
|
||||
|
||||
/// <remarks>
|
||||
/// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL
|
||||
/// </remarks>
|
||||
public static UnicodeBlock? GetUnicodeBlock(this char ch)
|
||||
{
|
||||
int codePoint = ch;
|
||||
|
||||
if (!IsValidCodePoint(codePoint))
|
||||
{
|
||||
throw new ArgumentException("Argument is not a valid code point.", nameof(ch));
|
||||
}
|
||||
|
||||
int top, bottom, current;
|
||||
|
||||
bottom = 0;
|
||||
top = _unicodeBlockStarts.Length;
|
||||
current = top / 2;
|
||||
|
||||
// invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
|
||||
while (top - bottom > 1)
|
||||
{
|
||||
if (codePoint >= _unicodeBlockStarts[current])
|
||||
/// <remarks>
|
||||
/// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL
|
||||
/// </remarks>
|
||||
public static UnicodeBlock? GetUnicodeBlock(this char ch)
|
||||
{
|
||||
bottom = current;
|
||||
}
|
||||
else
|
||||
{
|
||||
top = current;
|
||||
int codePoint = ch;
|
||||
|
||||
if (!IsValidCodePoint(codePoint))
|
||||
{
|
||||
throw new ArgumentException("Argument is not a valid code point.", nameof(ch));
|
||||
}
|
||||
|
||||
int top, bottom, current;
|
||||
|
||||
bottom = 0;
|
||||
top = _unicodeBlockStarts.Length;
|
||||
current = top / 2;
|
||||
|
||||
// invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
|
||||
while (top - bottom > 1)
|
||||
{
|
||||
if (codePoint >= _unicodeBlockStarts[current])
|
||||
{
|
||||
bottom = current;
|
||||
}
|
||||
else
|
||||
{
|
||||
top = current;
|
||||
}
|
||||
|
||||
current = (top + bottom) / 2;
|
||||
}
|
||||
|
||||
return _unicodeBlocks[current];
|
||||
}
|
||||
|
||||
current = (top + bottom) / 2;
|
||||
}
|
||||
#endregion
|
||||
|
||||
return _unicodeBlocks[current];
|
||||
#region Private helper methods
|
||||
|
||||
private static bool IsValidCodePoint(int codePoint)
|
||||
{
|
||||
return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private helper methods
|
||||
|
||||
private static bool IsValidCodePoint(int codePoint)
|
||||
{
|
||||
return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,50 +2,50 @@
|
||||
|
||||
namespace NLangDetect.Core.Extensions
|
||||
{
|
||||
public static class RandomExtensions
|
||||
{
|
||||
private const double _Epsilon = 2.22044604925031E-15;
|
||||
|
||||
private static readonly object _mutex = new object();
|
||||
|
||||
private static double _nextNextGaussian;
|
||||
private static bool _hasNextNextGaussian;
|
||||
|
||||
/// <summary>
|
||||
/// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence.
|
||||
/// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian())
|
||||
/// </remarks>
|
||||
public static double NextGaussian(this Random random)
|
||||
public static class RandomExtensions
|
||||
{
|
||||
lock (_mutex)
|
||||
{
|
||||
if (_hasNextNextGaussian)
|
||||
private const double _Epsilon = 2.22044604925031E-15;
|
||||
|
||||
private static readonly object _mutex = new object();
|
||||
|
||||
private static double _nextNextGaussian;
|
||||
private static bool _hasNextNextGaussian;
|
||||
|
||||
/// <summary>
|
||||
/// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence.
|
||||
/// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian())
|
||||
/// </remarks>
|
||||
public static double NextGaussian(this Random random)
|
||||
{
|
||||
_hasNextNextGaussian = false;
|
||||
lock (_mutex)
|
||||
{
|
||||
if (_hasNextNextGaussian)
|
||||
{
|
||||
_hasNextNextGaussian = false;
|
||||
|
||||
return _nextNextGaussian;
|
||||
return _nextNextGaussian;
|
||||
}
|
||||
|
||||
double v1, v2, s;
|
||||
|
||||
do
|
||||
{
|
||||
v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
|
||||
v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
|
||||
s = v1 * v1 + v2 * v2;
|
||||
}
|
||||
while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon);
|
||||
|
||||
double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s);
|
||||
|
||||
_nextNextGaussian = v2 * multiplier;
|
||||
_hasNextNextGaussian = true;
|
||||
|
||||
return v1 * multiplier;
|
||||
}
|
||||
}
|
||||
|
||||
double v1, v2, s;
|
||||
|
||||
do
|
||||
{
|
||||
v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
|
||||
v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
|
||||
s = v1 * v1 + v2 * v2;
|
||||
}
|
||||
while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon);
|
||||
|
||||
double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s);
|
||||
|
||||
_nextNextGaussian = v2 * multiplier;
|
||||
_hasNextNextGaussian = true;
|
||||
|
||||
return v1 * multiplier;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,131 +1,131 @@
|
||||
namespace NLangDetect.Core.Extensions
|
||||
{
|
||||
public enum UnicodeBlock
|
||||
{
|
||||
BasicLatin,
|
||||
Latin1Supplement,
|
||||
LatinExtendedA,
|
||||
LatinExtendedB,
|
||||
IpaExtensions,
|
||||
SpacingModifierLetters,
|
||||
CombiningDiacriticalMarks,
|
||||
Greek,
|
||||
Cyrillic,
|
||||
CyrillicSupplementary,
|
||||
Armenian,
|
||||
Hebrew,
|
||||
Arabic,
|
||||
Syriac,
|
||||
Thaana,
|
||||
Devanagari,
|
||||
Bengali,
|
||||
Gurmukhi,
|
||||
Gujarati,
|
||||
Oriya,
|
||||
Tamil,
|
||||
Telugu,
|
||||
Kannada,
|
||||
Malayalam,
|
||||
Sinhala,
|
||||
Thai,
|
||||
Lao,
|
||||
Tibetan,
|
||||
Myanmar,
|
||||
Georgian,
|
||||
HangulJamo,
|
||||
Ethiopic,
|
||||
Cherokee,
|
||||
UnifiedCanadianAboriginalSyllabics,
|
||||
Ogham,
|
||||
Runic,
|
||||
Tagalog,
|
||||
Hanunoo,
|
||||
Buhid,
|
||||
Tagbanwa,
|
||||
Khmer,
|
||||
Mongolian,
|
||||
Limbu,
|
||||
TaiLe,
|
||||
KhmerSymbols,
|
||||
PhoneticExtensions,
|
||||
LatinExtendedAdditional,
|
||||
GreekExtended,
|
||||
GeneralPunctuation,
|
||||
SuperscriptsAndSubscripts,
|
||||
CurrencySymbols,
|
||||
CombiningMarksForSymbols,
|
||||
LetterlikeSymbols,
|
||||
NumberForms,
|
||||
Arrows,
|
||||
MathematicalOperators,
|
||||
MiscellaneousTechnical,
|
||||
ControlPictures,
|
||||
OpticalCharacterRecognition,
|
||||
EnclosedAlphanumerics,
|
||||
BoxDrawing,
|
||||
BlockElements,
|
||||
GeometricShapes,
|
||||
MiscellaneousSymbols,
|
||||
Dingbats,
|
||||
MiscellaneousMathematicalSymbolsA,
|
||||
SupplementalArrowsA,
|
||||
BraillePatterns,
|
||||
SupplementalArrowsB,
|
||||
MiscellaneousMathematicalSymbolsB,
|
||||
SupplementalMathematicalOperators,
|
||||
MiscellaneousSymbolsAndArrows,
|
||||
CjkRadicalsSupplement,
|
||||
KangxiRadicals,
|
||||
IdeographicDescriptionCharacters,
|
||||
CjkSymbolsAndPunctuation,
|
||||
Hiragana,
|
||||
Katakana,
|
||||
Bopomofo,
|
||||
HangulCompatibilityJamo,
|
||||
Kanbun,
|
||||
BopomofoExtended,
|
||||
KatakanaPhoneticExtensions,
|
||||
EnclosedCjkLettersAndMonths,
|
||||
CjkCompatibility,
|
||||
CjkUnifiedIdeographsExtensionA,
|
||||
YijingHexagramSymbols,
|
||||
CjkUnifiedIdeographs,
|
||||
YiSyllables,
|
||||
YiRadicals,
|
||||
HangulSyllables,
|
||||
HighSurrogates,
|
||||
HighPrivateUseSurrogates,
|
||||
LowSurrogates,
|
||||
PrivateUseArea,
|
||||
CjkCompatibilityIdeographs,
|
||||
AlphabeticPresentationForms,
|
||||
ArabicPresentationFormsA,
|
||||
VariationSelectors,
|
||||
CombiningHalfMarks,
|
||||
CjkCompatibilityForms,
|
||||
SmallFormVariants,
|
||||
ArabicPresentationFormsB,
|
||||
HalfwidthAndFullwidthForms,
|
||||
Specials,
|
||||
LinearBSyllabary,
|
||||
LinearBIdeograms,
|
||||
AegeanNumbers,
|
||||
OldItalic,
|
||||
Gothic,
|
||||
Ugaritic,
|
||||
Deseret,
|
||||
Shavian,
|
||||
Osmanya,
|
||||
CypriotSyllabary,
|
||||
ByzantineMusicalSymbols,
|
||||
MusicalSymbols,
|
||||
TaiXuanJingSymbols,
|
||||
MathematicalAlphanumericSymbols,
|
||||
CjkUnifiedIdeographsExtensionB,
|
||||
CjkCompatibilityIdeographsSupplement,
|
||||
Tags,
|
||||
VariationSelectorsSupplement,
|
||||
SupplementaryPrivateUseAreaA,
|
||||
SupplementaryPrivateUseAreaB,
|
||||
}
|
||||
public enum UnicodeBlock
|
||||
{
|
||||
BasicLatin,
|
||||
Latin1Supplement,
|
||||
LatinExtendedA,
|
||||
LatinExtendedB,
|
||||
IpaExtensions,
|
||||
SpacingModifierLetters,
|
||||
CombiningDiacriticalMarks,
|
||||
Greek,
|
||||
Cyrillic,
|
||||
CyrillicSupplementary,
|
||||
Armenian,
|
||||
Hebrew,
|
||||
Arabic,
|
||||
Syriac,
|
||||
Thaana,
|
||||
Devanagari,
|
||||
Bengali,
|
||||
Gurmukhi,
|
||||
Gujarati,
|
||||
Oriya,
|
||||
Tamil,
|
||||
Telugu,
|
||||
Kannada,
|
||||
Malayalam,
|
||||
Sinhala,
|
||||
Thai,
|
||||
Lao,
|
||||
Tibetan,
|
||||
Myanmar,
|
||||
Georgian,
|
||||
HangulJamo,
|
||||
Ethiopic,
|
||||
Cherokee,
|
||||
UnifiedCanadianAboriginalSyllabics,
|
||||
Ogham,
|
||||
Runic,
|
||||
Tagalog,
|
||||
Hanunoo,
|
||||
Buhid,
|
||||
Tagbanwa,
|
||||
Khmer,
|
||||
Mongolian,
|
||||
Limbu,
|
||||
TaiLe,
|
||||
KhmerSymbols,
|
||||
PhoneticExtensions,
|
||||
LatinExtendedAdditional,
|
||||
GreekExtended,
|
||||
GeneralPunctuation,
|
||||
SuperscriptsAndSubscripts,
|
||||
CurrencySymbols,
|
||||
CombiningMarksForSymbols,
|
||||
LetterlikeSymbols,
|
||||
NumberForms,
|
||||
Arrows,
|
||||
MathematicalOperators,
|
||||
MiscellaneousTechnical,
|
||||
ControlPictures,
|
||||
OpticalCharacterRecognition,
|
||||
EnclosedAlphanumerics,
|
||||
BoxDrawing,
|
||||
BlockElements,
|
||||
GeometricShapes,
|
||||
MiscellaneousSymbols,
|
||||
Dingbats,
|
||||
MiscellaneousMathematicalSymbolsA,
|
||||
SupplementalArrowsA,
|
||||
BraillePatterns,
|
||||
SupplementalArrowsB,
|
||||
MiscellaneousMathematicalSymbolsB,
|
||||
SupplementalMathematicalOperators,
|
||||
MiscellaneousSymbolsAndArrows,
|
||||
CjkRadicalsSupplement,
|
||||
KangxiRadicals,
|
||||
IdeographicDescriptionCharacters,
|
||||
CjkSymbolsAndPunctuation,
|
||||
Hiragana,
|
||||
Katakana,
|
||||
Bopomofo,
|
||||
HangulCompatibilityJamo,
|
||||
Kanbun,
|
||||
BopomofoExtended,
|
||||
KatakanaPhoneticExtensions,
|
||||
EnclosedCjkLettersAndMonths,
|
||||
CjkCompatibility,
|
||||
CjkUnifiedIdeographsExtensionA,
|
||||
YijingHexagramSymbols,
|
||||
CjkUnifiedIdeographs,
|
||||
YiSyllables,
|
||||
YiRadicals,
|
||||
HangulSyllables,
|
||||
HighSurrogates,
|
||||
HighPrivateUseSurrogates,
|
||||
LowSurrogates,
|
||||
PrivateUseArea,
|
||||
CjkCompatibilityIdeographs,
|
||||
AlphabeticPresentationForms,
|
||||
ArabicPresentationFormsA,
|
||||
VariationSelectors,
|
||||
CombiningHalfMarks,
|
||||
CjkCompatibilityForms,
|
||||
SmallFormVariants,
|
||||
ArabicPresentationFormsB,
|
||||
HalfwidthAndFullwidthForms,
|
||||
Specials,
|
||||
LinearBSyllabary,
|
||||
LinearBIdeograms,
|
||||
AegeanNumbers,
|
||||
OldItalic,
|
||||
Gothic,
|
||||
Ugaritic,
|
||||
Deseret,
|
||||
Shavian,
|
||||
Osmanya,
|
||||
CypriotSyllabary,
|
||||
ByzantineMusicalSymbols,
|
||||
MusicalSymbols,
|
||||
TaiXuanJingSymbols,
|
||||
MathematicalAlphanumericSymbols,
|
||||
CjkUnifiedIdeographsExtensionB,
|
||||
CjkCompatibilityIdeographsSupplement,
|
||||
Tags,
|
||||
VariationSelectorsSupplement,
|
||||
SupplementaryPrivateUseAreaA,
|
||||
SupplementaryPrivateUseAreaB,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Xml;
|
||||
using NLangDetect.Core.Utils;
|
||||
using System.IO;
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
// TODO IMM HI: xml reader not tested
|
||||
public static class GenProfile
|
||||
{
|
||||
#region Public methods
|
||||
|
||||
public static LangProfile load(string lang, string file)
|
||||
// TODO IMM HI: xml reader not tested
|
||||
public static class GenProfile
|
||||
{
|
||||
LangProfile profile = new LangProfile(lang);
|
||||
TagExtractor tagextractor = new TagExtractor("abstract", 100);
|
||||
Stream inputStream = null;
|
||||
#region Public methods
|
||||
|
||||
try
|
||||
{
|
||||
inputStream = File.OpenRead(file);
|
||||
|
||||
string extension = Path.GetExtension(file) ?? "";
|
||||
|
||||
if (extension.ToUpper() == ".GZ")
|
||||
public static LangProfile load(string lang, string file)
|
||||
{
|
||||
inputStream = new GZipStream(inputStream, CompressionMode.Decompress);
|
||||
}
|
||||
LangProfile profile = new LangProfile(lang);
|
||||
TagExtractor tagextractor = new TagExtractor("abstract", 100);
|
||||
Stream inputStream = null;
|
||||
|
||||
using (XmlReader xmlReader = XmlReader.Create(inputStream))
|
||||
{
|
||||
while (xmlReader.Read())
|
||||
{
|
||||
switch (xmlReader.NodeType)
|
||||
try
|
||||
{
|
||||
case XmlNodeType.Element:
|
||||
tagextractor.SetTag(xmlReader.Name);
|
||||
break;
|
||||
inputStream = File.OpenRead(file);
|
||||
|
||||
case XmlNodeType.Text:
|
||||
tagextractor.Add(xmlReader.Value);
|
||||
break;
|
||||
string extension = Path.GetExtension(file) ?? "";
|
||||
|
||||
case XmlNodeType.EndElement:
|
||||
tagextractor.CloseTag(profile);
|
||||
break;
|
||||
if (extension.ToUpper() == ".GZ")
|
||||
{
|
||||
inputStream = new GZipStream(inputStream, CompressionMode.Decompress);
|
||||
}
|
||||
|
||||
using (XmlReader xmlReader = XmlReader.Create(inputStream))
|
||||
{
|
||||
while (xmlReader.Read())
|
||||
{
|
||||
switch (xmlReader.NodeType)
|
||||
{
|
||||
case XmlNodeType.Element:
|
||||
tagextractor.SetTag(xmlReader.Name);
|
||||
break;
|
||||
|
||||
case XmlNodeType.Text:
|
||||
tagextractor.Add(xmlReader.Value);
|
||||
break;
|
||||
|
||||
case XmlNodeType.EndElement:
|
||||
tagextractor.CloseTag(profile);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (inputStream != null)
|
||||
{
|
||||
inputStream.Close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (inputStream != null)
|
||||
{
|
||||
inputStream.Close();
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine(lang + ": " + tagextractor.Count);
|
||||
Console.WriteLine(lang + ": " + tagextractor.Count);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,21 +2,21 @@
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
[Serializable]
|
||||
public class InternalException : Exception
|
||||
{
|
||||
#region Constructor(s)
|
||||
|
||||
public InternalException(string message, Exception innerException)
|
||||
: base(message, innerException)
|
||||
[Serializable]
|
||||
public class InternalException : Exception
|
||||
{
|
||||
}
|
||||
#region Constructor(s)
|
||||
|
||||
public InternalException(string message)
|
||||
: this(message, null)
|
||||
{
|
||||
}
|
||||
public InternalException(string message, Exception innerException)
|
||||
: base(message, innerException)
|
||||
{
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
public InternalException(string message)
|
||||
: this(message, null)
|
||||
{
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,44 +2,44 @@ using System.Globalization;
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
// TODO IMM HI: name??
|
||||
public class Language
|
||||
{
|
||||
#region Constructor(s)
|
||||
|
||||
public Language(string name, double probability)
|
||||
// TODO IMM HI: name??
|
||||
public class Language
|
||||
{
|
||||
Name = name;
|
||||
Probability = probability;
|
||||
#region Constructor(s)
|
||||
|
||||
public Language(string name, double probability)
|
||||
{
|
||||
Name = name;
|
||||
Probability = probability;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Object overrides
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
if (Name == null)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
return
|
||||
string.Format(
|
||||
CultureInfo.InvariantCulture.NumberFormat,
|
||||
"{0}:{1:0.000000}",
|
||||
Name,
|
||||
Probability);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public string Name { get; set; }
|
||||
|
||||
public double Probability { get; set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Object overrides
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
if (Name == null)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
return
|
||||
string.Format(
|
||||
CultureInfo.InvariantCulture.NumberFormat,
|
||||
"{0}:{1:0.000000}",
|
||||
Name,
|
||||
Probability);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public string Name { get; set; }
|
||||
|
||||
public double Probability { get; set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,22 +2,22 @@
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
public class NLangDetectException : Exception
|
||||
{
|
||||
#region Constructor(s)
|
||||
|
||||
public NLangDetectException(string message, ErrorCode errorCode)
|
||||
: base(message)
|
||||
public class NLangDetectException : Exception
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
#region Constructor(s)
|
||||
|
||||
public NLangDetectException(string message, ErrorCode errorCode)
|
||||
: base(message)
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public ErrorCode ErrorCode { get; private set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public ErrorCode ErrorCode { get; private set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,33 +3,33 @@ using System.Collections.Generic;
|
||||
|
||||
namespace NLangDetect.Core
|
||||
{
|
||||
public class ProbVector
|
||||
{
|
||||
private readonly Dictionary<int, double> _dict = new Dictionary<int, double>();
|
||||
|
||||
public double this[int key]
|
||||
public class ProbVector
|
||||
{
|
||||
get
|
||||
{
|
||||
double value;
|
||||
private readonly Dictionary<int, double> _dict = new Dictionary<int, double>();
|
||||
|
||||
return _dict.TryGetValue(key, out value) ? value : 0.0;
|
||||
}
|
||||
|
||||
set
|
||||
{
|
||||
if (Math.Abs(value) < double.Epsilon)
|
||||
public double this[int key]
|
||||
{
|
||||
if (_dict.ContainsKey(key))
|
||||
{
|
||||
_dict.Remove(key);
|
||||
}
|
||||
get
|
||||
{
|
||||
double value;
|
||||
|
||||
return;
|
||||
return _dict.TryGetValue(key, out value) ? value : 0.0;
|
||||
}
|
||||
|
||||
set
|
||||
{
|
||||
if (Math.Abs(value) < double.Epsilon)
|
||||
{
|
||||
if (_dict.ContainsKey(key))
|
||||
{
|
||||
_dict.Remove(key);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
_dict[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
_dict[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Reflection;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Linq;
|
||||
using System;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace NLangDetect.Core.Utils
|
||||
{
|
||||
@@ -29,7 +28,7 @@ namespace NLangDetect.Core.Utils
|
||||
|
||||
private static Dictionary<string, string> LoadMessages()
|
||||
{
|
||||
var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1) ;
|
||||
var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1);
|
||||
|
||||
Stream messagesStream =
|
||||
typeof(Messages).Assembly
|
||||
|
||||
@@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions;
|
||||
|
||||
namespace NLangDetect.Core.Utils
|
||||
{
|
||||
public class NGram
|
||||
{
|
||||
public const int GramsCount = 3;
|
||||
public class NGram
|
||||
{
|
||||
public const int GramsCount = 3;
|
||||
|
||||
private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
|
||||
private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
|
||||
|
||||
private static readonly string[] CjkClass =
|
||||
{
|
||||
private static readonly string[] CjkClass =
|
||||
{
|
||||
#region CJK classes
|
||||
|
||||
Messages.getString("NGram.KANJI_1_0"),
|
||||
@@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils
|
||||
#endregion
|
||||
};
|
||||
|
||||
private static readonly Dictionary<char, char> _cjkMap;
|
||||
private static readonly Dictionary<char, char> _cjkMap;
|
||||
|
||||
private StringBuilder _grams;
|
||||
private bool _capitalword;
|
||||
private StringBuilder _grams;
|
||||
private bool _capitalword;
|
||||
|
||||
#region Constructor(s)
|
||||
#region Constructor(s)
|
||||
|
||||
static NGram()
|
||||
{
|
||||
_cjkMap = new Dictionary<char, char>();
|
||||
|
||||
foreach (string cjk_list in CjkClass)
|
||||
{
|
||||
char representative = cjk_list[0];
|
||||
|
||||
for (int i = 0; i < cjk_list.Length; i++)
|
||||
static NGram()
|
||||
{
|
||||
_cjkMap.Add(cjk_list[i], representative);
|
||||
}
|
||||
}
|
||||
}
|
||||
_cjkMap = new Dictionary<char, char>();
|
||||
|
||||
public NGram()
|
||||
{
|
||||
_grams = new StringBuilder(" ");
|
||||
_capitalword = false;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
|
||||
public static char Normalize(char ch)
|
||||
{
|
||||
UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
|
||||
|
||||
if (!unicodeBlock.HasValue)
|
||||
{
|
||||
return ch;
|
||||
}
|
||||
|
||||
switch (unicodeBlock.Value)
|
||||
{
|
||||
case UnicodeBlock.BasicLatin:
|
||||
{
|
||||
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
|
||||
foreach (string cjk_list in CjkClass)
|
||||
{
|
||||
return ' ';
|
||||
char representative = cjk_list[0];
|
||||
|
||||
for (int i = 0; i < cjk_list.Length; i++)
|
||||
{
|
||||
_cjkMap.Add(cjk_list[i], representative);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.Latin1Supplement:
|
||||
{
|
||||
if (Latin1Excluded.IndexOf(ch) >= 0)
|
||||
{
|
||||
return ' ';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.GeneralPunctuation:
|
||||
{
|
||||
return ' ';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Arabic:
|
||||
{
|
||||
if (ch == '\u06cc')
|
||||
{
|
||||
return '\u064a';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.LatinExtendedAdditional:
|
||||
{
|
||||
if (ch >= '\u1ea0')
|
||||
{
|
||||
return '\u1ec3';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.Hiragana:
|
||||
{
|
||||
return '\u3042';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Katakana:
|
||||
{
|
||||
return '\u30a2';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Bopomofo:
|
||||
case UnicodeBlock.BopomofoExtended:
|
||||
{
|
||||
return '\u3105';
|
||||
}
|
||||
|
||||
case UnicodeBlock.CjkUnifiedIdeographs:
|
||||
{
|
||||
if (_cjkMap.ContainsKey(ch))
|
||||
{
|
||||
return _cjkMap[ch];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.HangulSyllables:
|
||||
{
|
||||
return '\uac00';
|
||||
}
|
||||
}
|
||||
|
||||
return ch;
|
||||
}
|
||||
|
||||
public void AddChar(char ch)
|
||||
{
|
||||
ch = Normalize(ch);
|
||||
char lastchar = _grams[_grams.Length - 1];
|
||||
if (lastchar == ' ')
|
||||
{
|
||||
_grams = new StringBuilder(" ");
|
||||
_capitalword = false;
|
||||
if (ch == ' ') return;
|
||||
}
|
||||
else if (_grams.Length >= GramsCount)
|
||||
{
|
||||
_grams.Remove(0, 1);
|
||||
}
|
||||
_grams.Append(ch);
|
||||
|
||||
if (char.IsUpper(ch))
|
||||
{
|
||||
if (char.IsUpper(lastchar)) _capitalword = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_capitalword = false;
|
||||
}
|
||||
}
|
||||
|
||||
public string Get(int n)
|
||||
{
|
||||
if (_capitalword)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
int len = _grams.Length;
|
||||
|
||||
if (n < 1 || n > 3 || len < n)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (n == 1)
|
||||
{
|
||||
char ch = _grams[len - 1];
|
||||
|
||||
if (ch == ' ')
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return ch.ToString();
|
||||
}
|
||||
public NGram()
|
||||
{
|
||||
_grams = new StringBuilder(" ");
|
||||
_capitalword = false;
|
||||
}
|
||||
|
||||
// TODO IMM HI: is ToString() here effective?
|
||||
return _grams.ToString().SubSequence(len - n, len);
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
|
||||
public static char Normalize(char ch)
|
||||
{
|
||||
UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
|
||||
|
||||
if (!unicodeBlock.HasValue)
|
||||
{
|
||||
return ch;
|
||||
}
|
||||
|
||||
switch (unicodeBlock.Value)
|
||||
{
|
||||
case UnicodeBlock.BasicLatin:
|
||||
{
|
||||
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
|
||||
{
|
||||
return ' ';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.Latin1Supplement:
|
||||
{
|
||||
if (Latin1Excluded.IndexOf(ch) >= 0)
|
||||
{
|
||||
return ' ';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.GeneralPunctuation:
|
||||
{
|
||||
return ' ';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Arabic:
|
||||
{
|
||||
if (ch == '\u06cc')
|
||||
{
|
||||
return '\u064a';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.LatinExtendedAdditional:
|
||||
{
|
||||
if (ch >= '\u1ea0')
|
||||
{
|
||||
return '\u1ec3';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.Hiragana:
|
||||
{
|
||||
return '\u3042';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Katakana:
|
||||
{
|
||||
return '\u30a2';
|
||||
}
|
||||
|
||||
case UnicodeBlock.Bopomofo:
|
||||
case UnicodeBlock.BopomofoExtended:
|
||||
{
|
||||
return '\u3105';
|
||||
}
|
||||
|
||||
case UnicodeBlock.CjkUnifiedIdeographs:
|
||||
{
|
||||
if (_cjkMap.ContainsKey(ch))
|
||||
{
|
||||
return _cjkMap[ch];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeBlock.HangulSyllables:
|
||||
{
|
||||
return '\uac00';
|
||||
}
|
||||
}
|
||||
|
||||
return ch;
|
||||
}
|
||||
|
||||
public void AddChar(char ch)
|
||||
{
|
||||
ch = Normalize(ch);
|
||||
char lastchar = _grams[_grams.Length - 1];
|
||||
if (lastchar == ' ')
|
||||
{
|
||||
_grams = new StringBuilder(" ");
|
||||
_capitalword = false;
|
||||
if (ch == ' ') return;
|
||||
}
|
||||
else if (_grams.Length >= GramsCount)
|
||||
{
|
||||
_grams.Remove(0, 1);
|
||||
}
|
||||
_grams.Append(ch);
|
||||
|
||||
if (char.IsUpper(ch))
|
||||
{
|
||||
if (char.IsUpper(lastchar)) _capitalword = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_capitalword = false;
|
||||
}
|
||||
}
|
||||
|
||||
public string Get(int n)
|
||||
{
|
||||
if (_capitalword)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
int len = _grams.Length;
|
||||
|
||||
if (n < 1 || n > 3 || len < n)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (n == 1)
|
||||
{
|
||||
char ch = _grams[len - 1];
|
||||
|
||||
if (ch == ' ')
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return ch.ToString();
|
||||
}
|
||||
|
||||
// TODO IMM HI: is ToString() here effective?
|
||||
return _grams.ToString().SubSequence(len - n, len);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,75 +2,75 @@ using System.Text;
|
||||
|
||||
namespace NLangDetect.Core.Utils
|
||||
{
|
||||
public class TagExtractor
|
||||
{
|
||||
// TODO IMM HI: do the really need to be internal?
|
||||
internal string Target;
|
||||
internal int Threshold;
|
||||
internal StringBuilder StringBuilder;
|
||||
internal string Tag;
|
||||
|
||||
#region Constructor(s)
|
||||
|
||||
public TagExtractor(string tag, int threshold)
|
||||
public class TagExtractor
|
||||
{
|
||||
Target = tag;
|
||||
Threshold = threshold;
|
||||
Count = 0;
|
||||
Clear();
|
||||
}
|
||||
// TODO IMM HI: do the really need to be internal?
|
||||
internal string Target;
|
||||
internal int Threshold;
|
||||
internal StringBuilder StringBuilder;
|
||||
internal string Tag;
|
||||
|
||||
#endregion
|
||||
#region Constructor(s)
|
||||
|
||||
#region Public methods
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
StringBuilder = new StringBuilder();
|
||||
Tag = null;
|
||||
}
|
||||
|
||||
public void SetTag(string tag)
|
||||
{
|
||||
Tag = tag;
|
||||
}
|
||||
|
||||
public void Add(string line)
|
||||
{
|
||||
if (Tag == Target && line != null)
|
||||
{
|
||||
StringBuilder.Append(line);
|
||||
}
|
||||
}
|
||||
|
||||
public void CloseTag(LangProfile profile)
|
||||
{
|
||||
if (profile != null && Tag == Target && StringBuilder.Length > Threshold)
|
||||
{
|
||||
var gram = new NGram();
|
||||
|
||||
for (int i = 0; i < StringBuilder.Length; i++)
|
||||
public TagExtractor(string tag, int threshold)
|
||||
{
|
||||
gram.AddChar(StringBuilder[i]);
|
||||
|
||||
for (int n = 1; n <= NGram.GramsCount; n++)
|
||||
{
|
||||
profile.Add(gram.Get(n));
|
||||
}
|
||||
Target = tag;
|
||||
Threshold = threshold;
|
||||
Count = 0;
|
||||
Clear();
|
||||
}
|
||||
|
||||
Count++;
|
||||
}
|
||||
#endregion
|
||||
|
||||
Clear();
|
||||
#region Public methods
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
StringBuilder = new StringBuilder();
|
||||
Tag = null;
|
||||
}
|
||||
|
||||
public void SetTag(string tag)
|
||||
{
|
||||
Tag = tag;
|
||||
}
|
||||
|
||||
public void Add(string line)
|
||||
{
|
||||
if (Tag == Target && line != null)
|
||||
{
|
||||
StringBuilder.Append(line);
|
||||
}
|
||||
}
|
||||
|
||||
public void CloseTag(LangProfile profile)
|
||||
{
|
||||
if (profile != null && Tag == Target && StringBuilder.Length > Threshold)
|
||||
{
|
||||
var gram = new NGram();
|
||||
|
||||
for (int i = 0; i < StringBuilder.Length; i++)
|
||||
{
|
||||
gram.AddChar(StringBuilder[i]);
|
||||
|
||||
for (int n = 1; n <= NGram.GramsCount; n++)
|
||||
{
|
||||
profile.Add(gram.Get(n));
|
||||
}
|
||||
}
|
||||
|
||||
Count++;
|
||||
}
|
||||
|
||||
Clear();
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public int Count { get; private set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public int Count { get; private set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user