2011-04-27 05:47:29 +03:00
|
|
|
using System;
|
|
|
|
using System.Collections.Generic;
|
|
|
|
using System.IO;
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
using NLog;
|
|
|
|
using NzbDrone.Core.Model;
|
|
|
|
using NzbDrone.Core.Repository.Quality;
|
|
|
|
|
|
|
|
namespace NzbDrone.Core
|
|
|
|
{
|
|
|
|
public static class Parser
|
|
|
|
{
|
|
|
|
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
|
|
|
|
|
|
|
|
private static readonly Regex[] ReportTitleRegex = new[]
|
|
|
|
{
|
2011-06-13 21:25:40 +03:00
|
|
|
//Episodes with airdate
|
2011-04-27 05:47:29 +03:00
|
|
|
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>\d{2})\W+(?<airday>\d{2})\W?(?!\\)",
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
2011-06-13 21:25:40 +03:00
|
|
|
|
|
|
|
//Multi-Part episodes without a title (S01E05.S01E06)
|
|
|
|
new Regex(@"^(?:\W*S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|\sto\s){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)",
|
2011-05-01 03:23:10 +03:00
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
2011-06-13 21:25:40 +03:00
|
|
|
|
|
|
|
//Single episodes or multi-episode (S01E05E06, S01E05-06, etc)
|
|
|
|
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|\sto\s){1,2}(?<episode>\d{1,2}(?!\d+)))+)+\W?(?!\\)",
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
|
|
|
|
//No Title - Single episodes or multi-episode (S01E05E06, S01E05-06, etc)
|
|
|
|
new Regex(@"^(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|\sto\s){1,2}(?<episode>\d{1,2}(?!\d+)))+\W*)+\W?(?!\\)",
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
|
|
|
|
//Supports 103/113 naming
|
2011-04-27 05:47:29 +03:00
|
|
|
new Regex(@"^(?<title>.+?)?\W?(?:\W(?<season>\d+)(?<episode>\d{2}))+\W?(?!\\)",
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
2011-06-13 21:25:40 +03:00
|
|
|
|
|
|
|
//Episodes over 99 (3-digits or more)
|
|
|
|
new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|to)+(?<episode>\d+))+)+\W?(?!\\)",
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
|
|
|
|
//Supports Season only releases
|
2011-04-27 05:47:29 +03:00
|
|
|
new Regex(@"^(?<title>.*?)\W(?:S|Season\W?)?(?<season>\d{1,2}(?!\d+))+\W?(?!\\)",
|
2011-06-13 21:25:40 +03:00
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled)
|
2011-04-27 05:47:29 +03:00
|
|
|
};
|
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
private static readonly Regex NormalizeRegex = new Regex(@"((^|\W)(a|an|the|and|or|of)($|\W))|\W|(?:(?<=[^0-9]+)|\b)(?!(?:19\d{2}|20\d{2}))\d+(?=[^0-9ip]+|\b)",
|
2011-04-27 05:47:29 +03:00
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
|
2011-06-14 08:52:12 +03:00
|
|
|
private static readonly Regex SimpleTitleRegex = new Regex(@"480[i|p]|720[i|p]|1080[i|p]|[x|h]264|\<|\>|\?|\*|\:|\|",
|
2011-05-23 20:32:37 +03:00
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
|
2011-04-27 05:47:29 +03:00
|
|
|
/// <summary>
|
|
|
|
/// Parses a post title into list of episodes it contains
|
|
|
|
/// </summary>
|
|
|
|
/// <param name = "title">Title of the report</param>
|
|
|
|
/// <returns>List of episodes contained to the post</returns>
|
|
|
|
internal static EpisodeParseResult ParseEpisodeInfo(string title)
|
|
|
|
{
|
|
|
|
Logger.Trace("Parsing string '{0}'", title);
|
2011-05-23 20:32:37 +03:00
|
|
|
var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty);
|
2011-04-27 05:47:29 +03:00
|
|
|
|
|
|
|
foreach (var regex in ReportTitleRegex)
|
|
|
|
{
|
2011-05-01 02:25:35 +03:00
|
|
|
//Use only the filename, not the entire path
|
|
|
|
var match = regex.Matches(new FileInfo(simpleTitle).Name);
|
2011-04-27 05:47:29 +03:00
|
|
|
|
|
|
|
if (match.Count != 0)
|
|
|
|
{
|
|
|
|
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
|
|
|
|
|
2011-05-23 20:32:37 +03:00
|
|
|
int airyear;
|
2011-04-27 05:47:29 +03:00
|
|
|
Int32.TryParse(match[0].Groups["airyear"].Value, out airyear);
|
|
|
|
|
|
|
|
EpisodeParseResult parsedEpisode;
|
|
|
|
|
|
|
|
if (airyear < 1)
|
|
|
|
{
|
2011-05-23 20:32:37 +03:00
|
|
|
int season;
|
2011-04-27 05:47:29 +03:00
|
|
|
Int32.TryParse(match[0].Groups["season"].Value, out season);
|
|
|
|
|
|
|
|
parsedEpisode = new EpisodeParseResult
|
|
|
|
{
|
|
|
|
CleanTitle = seriesName,
|
|
|
|
SeasonNumber = season,
|
2011-05-24 07:12:54 +03:00
|
|
|
EpisodeNumbers = new List<int>()
|
2011-04-27 05:47:29 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
foreach (Match matchGroup in match)
|
|
|
|
{
|
2011-04-26 18:26:41 +03:00
|
|
|
var count = matchGroup.Groups["episode"].Captures.Count;
|
|
|
|
|
|
|
|
//Allows use to return a list of 0 episodes (We can handle that as a full season release)
|
|
|
|
if (count > 0)
|
|
|
|
{
|
|
|
|
var first = Convert.ToInt32(matchGroup.Groups["episode"].Captures[0].Value);
|
|
|
|
var last = Convert.ToInt32(matchGroup.Groups["episode"].Captures[count - 1].Value);
|
|
|
|
|
|
|
|
for (int i = first; i <= last; i++)
|
|
|
|
{
|
2011-05-24 07:12:54 +03:00
|
|
|
parsedEpisode.EpisodeNumbers.Add(i);
|
2011-04-26 18:26:41 +03:00
|
|
|
}
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
//Try to Parse as a daily show
|
|
|
|
if (airyear > 0)
|
|
|
|
{
|
|
|
|
var airmonth = Convert.ToInt32(match[0].Groups["airmonth"].Value);
|
|
|
|
var airday = Convert.ToInt32(match[0].Groups["airday"].Value);
|
|
|
|
|
|
|
|
parsedEpisode = new EpisodeParseResult
|
|
|
|
{
|
|
|
|
CleanTitle = seriesName,
|
2011-05-09 09:16:26 +03:00
|
|
|
AirDate = new DateTime(airyear, airmonth, airday),
|
|
|
|
Language = ParseLanguage(simpleTitle)
|
2011-04-27 05:47:29 +03:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
//Something went wrong with this one... return null
|
|
|
|
else
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
parsedEpisode.Quality = ParseQuality(title);
|
|
|
|
|
|
|
|
Logger.Trace("Episode Parsed. {0}", parsedEpisode);
|
|
|
|
|
|
|
|
return parsedEpisode;
|
|
|
|
}
|
|
|
|
}
|
2011-04-29 09:06:13 +03:00
|
|
|
Logger.Warn("Unable to parse text into episode info. {0}", title);
|
2011-04-27 05:47:29 +03:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Parses a post title into season it contains
|
|
|
|
/// </summary>
|
|
|
|
/// <param name = "title">Title of the report</param>
|
|
|
|
/// <returns>Season information contained in the post</returns>
|
|
|
|
internal static SeasonParseResult ParseSeasonInfo(string title)
|
|
|
|
{
|
|
|
|
Logger.Trace("Parsing string '{0}'", title);
|
|
|
|
|
|
|
|
foreach (var regex in ReportTitleRegex)
|
|
|
|
{
|
|
|
|
var match = regex.Matches(title);
|
|
|
|
|
|
|
|
if (match.Count != 0)
|
|
|
|
{
|
|
|
|
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
|
|
|
|
int year;
|
|
|
|
Int32.TryParse(match[0].Groups["year"].Value, out year);
|
|
|
|
|
|
|
|
if (year < 1900 || year > DateTime.Now.Year + 1)
|
|
|
|
{
|
|
|
|
year = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
var seasonNumber = Convert.ToInt32(match[0].Groups["season"].Value);
|
|
|
|
|
|
|
|
var result = new SeasonParseResult
|
|
|
|
{
|
|
|
|
SeriesTitle = seriesName,
|
|
|
|
SeasonNumber = seasonNumber,
|
|
|
|
Year = year,
|
|
|
|
Quality = ParseQuality(title)
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Logger.Trace("Season Parsed. {0}", result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return null; //Return null
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Parses a post title to find the series that relates to it
|
|
|
|
/// </summary>
|
|
|
|
/// <param name = "title">Title of the report</param>
|
|
|
|
/// <returns>Normalized Series Name</returns>
|
|
|
|
internal static string ParseSeriesName(string title)
|
|
|
|
{
|
|
|
|
Logger.Trace("Parsing string '{0}'", title);
|
|
|
|
|
|
|
|
foreach (var regex in ReportTitleRegex)
|
|
|
|
{
|
|
|
|
var match = regex.Matches(title);
|
|
|
|
|
|
|
|
if (match.Count != 0)
|
|
|
|
{
|
|
|
|
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
|
|
|
|
|
|
|
|
Logger.Trace("Series Parsed. {0}", seriesName);
|
|
|
|
return seriesName;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return String.Empty;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Parses proper status out of a report title
|
|
|
|
/// </summary>
|
|
|
|
/// <param name = "title">Title of the report</param>
|
|
|
|
/// <returns></returns>
|
|
|
|
internal static bool ParseProper(string title)
|
|
|
|
{
|
|
|
|
return title.ToLower().Contains("proper");
|
|
|
|
}
|
|
|
|
|
2011-05-28 22:23:35 +03:00
|
|
|
internal static Quality ParseQuality(string name)
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
|
|
|
Logger.Trace("Trying to parse quality for {0}", name);
|
|
|
|
|
2011-05-26 06:13:39 +03:00
|
|
|
name = name.Trim();
|
2011-06-13 21:25:40 +03:00
|
|
|
var normalizedName = NormalizeTitle(name);
|
2011-05-28 22:23:35 +03:00
|
|
|
var result = new Quality { QualityType = QualityTypes.Unknown };
|
2011-06-13 21:25:40 +03:00
|
|
|
result.Proper = normalizedName.Contains("proper");
|
2011-04-27 05:47:29 +03:00
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("dvd") || normalizedName.Contains("bdrip") || normalizedName.Contains("brrip"))
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.DVD;
|
|
|
|
return result;
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("xvid") || normalizedName.Contains("divx"))
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("bluray"))
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.DVD;
|
|
|
|
return result;
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
|
|
|
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.SDTV;
|
|
|
|
return result;
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("bluray"))
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("720p"))
|
2011-05-28 22:23:35 +03:00
|
|
|
{
|
|
|
|
result.QualityType = QualityTypes.Bluray720p;
|
|
|
|
return result;
|
|
|
|
}
|
2011-04-27 05:47:29 +03:00
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("1080p"))
|
2011-05-28 22:23:35 +03:00
|
|
|
{
|
|
|
|
result.QualityType = QualityTypes.Bluray1080p;
|
|
|
|
return result;
|
|
|
|
}
|
2011-04-27 05:47:29 +03:00
|
|
|
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.Bluray720p;
|
|
|
|
return result;
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("webdl"))
|
2011-05-28 22:23:35 +03:00
|
|
|
{
|
|
|
|
result.QualityType = QualityTypes.WEBDL;
|
|
|
|
return result;
|
|
|
|
}
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("x264") || normalizedName.Contains("h264") || normalizedName.Contains("720p"))
|
2011-05-28 22:23:35 +03:00
|
|
|
{
|
|
|
|
result.QualityType = QualityTypes.HDTV;
|
|
|
|
return result;
|
|
|
|
}
|
2011-04-27 05:47:29 +03:00
|
|
|
//Based on extension
|
2011-05-23 09:48:52 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
2011-05-28 22:23:35 +03:00
|
|
|
if (result.QualityType == QualityTypes.Unknown)
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-05-23 09:48:52 +03:00
|
|
|
try
|
2011-04-27 05:47:29 +03:00
|
|
|
{
|
2011-05-23 09:48:52 +03:00
|
|
|
switch (Path.GetExtension(name).ToLower())
|
|
|
|
{
|
|
|
|
case ".avi":
|
|
|
|
case ".xvid":
|
|
|
|
case ".wmv":
|
|
|
|
case ".mp4":
|
|
|
|
{
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.SDTV;
|
2011-05-23 09:48:52 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ".mkv":
|
|
|
|
{
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.HDTV;
|
2011-05-23 09:48:52 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
2011-05-23 09:48:52 +03:00
|
|
|
catch (ArgumentException)
|
|
|
|
{
|
|
|
|
//Swallow exception for cases where string contains illegal
|
|
|
|
//path characters.
|
|
|
|
}
|
|
|
|
}
|
2011-05-24 07:41:57 +03:00
|
|
|
|
2011-06-13 21:25:40 +03:00
|
|
|
if (normalizedName.Contains("sdtv") || (result.QualityType == QualityTypes.Unknown && normalizedName.Contains("hdtv")))
|
2011-05-23 09:48:52 +03:00
|
|
|
{
|
2011-05-28 22:23:35 +03:00
|
|
|
result.QualityType = QualityTypes.SDTV;
|
|
|
|
return result;
|
2011-04-27 05:47:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
Logger.Trace("Quality Parsed:{0} Title:", result, name);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2011-05-28 22:23:35 +03:00
|
|
|
|
|
|
|
|
2011-05-09 09:16:26 +03:00
|
|
|
internal static LanguageType ParseLanguage(string title)
|
|
|
|
{
|
|
|
|
if (title.ToLower().Contains("english"))
|
|
|
|
return LanguageType.English;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("french"))
|
|
|
|
return LanguageType.French;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("spanish"))
|
|
|
|
return LanguageType.Spanish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("german"))
|
|
|
|
{
|
|
|
|
//Make sure it doesn't contain Germany (Since we're not using REGEX for all this)
|
|
|
|
if (!title.ToLower().Contains("germany"))
|
|
|
|
return LanguageType.German;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("italian"))
|
|
|
|
return LanguageType.Italian;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("danish"))
|
|
|
|
return LanguageType.Danish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("dutch"))
|
|
|
|
return LanguageType.Dutch;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("japanese"))
|
|
|
|
return LanguageType.Japanese;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("cantonese"))
|
|
|
|
return LanguageType.Cantonese;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("mandarin"))
|
|
|
|
return LanguageType.Mandarin;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("korean"))
|
|
|
|
return LanguageType.Korean;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("russian"))
|
|
|
|
return LanguageType.Russian;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("polish"))
|
|
|
|
return LanguageType.Polish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("vietnamese"))
|
|
|
|
return LanguageType.Vietnamese;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("swedish"))
|
|
|
|
return LanguageType.Swedish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("norwegian"))
|
|
|
|
return LanguageType.Norwegian;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("finnish"))
|
|
|
|
return LanguageType.Finnish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("turkish"))
|
|
|
|
return LanguageType.Turkish;
|
|
|
|
|
|
|
|
if (title.ToLower().Contains("portuguese"))
|
|
|
|
return LanguageType.Portuguese;
|
|
|
|
|
|
|
|
return LanguageType.English;
|
|
|
|
}
|
|
|
|
|
2011-04-27 05:47:29 +03:00
|
|
|
/// <summary>
|
|
|
|
/// Normalizes the title. removing all non-word characters as well as common tokens
|
|
|
|
/// such as 'the' and 'and'
|
|
|
|
/// </summary>
|
|
|
|
/// <param name = "title">title</param>
|
|
|
|
/// <returns></returns>
|
|
|
|
public static string NormalizeTitle(string title)
|
|
|
|
{
|
|
|
|
return NormalizeRegex.Replace(title, String.Empty).ToLower();
|
|
|
|
}
|
|
|
|
|
|
|
|
public static string NormalizePath(string path)
|
|
|
|
{
|
|
|
|
if (String.IsNullOrWhiteSpace(path))
|
|
|
|
throw new ArgumentException("Path can not be null or empty");
|
|
|
|
|
|
|
|
var info = new FileInfo(path);
|
|
|
|
|
|
|
|
if (info.FullName.StartsWith(@"\\")) //UNC
|
|
|
|
{
|
|
|
|
return info.FullName.TrimEnd('/', '\\', ' ');
|
|
|
|
}
|
|
|
|
|
|
|
|
return info.FullName.Trim('/', '\\', ' ');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} |