mirror of
https://github.com/Sonarr/Sonarr.git
synced 2024-12-16 11:37:58 +02:00
8818e39c63
Closes #1145
771 lines
36 KiB
C#
771 lines
36 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using NLog;
|
|
using NzbDrone.Common.Extensions;
|
|
using NzbDrone.Common.Instrumentation;
|
|
using NzbDrone.Core.Parser.Model;
|
|
using NzbDrone.Core.Tv;
|
|
|
|
namespace NzbDrone.Core.Parser
|
|
{
|
|
public static class Parser
|
|
{
|
|
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser));
|
|
|
|
private static readonly Regex[] ReportTitleRegex = new[]
|
|
{
|
|
//Anime - Absolute Episode Number + Title + Season+Episode
|
|
//Todo: This currently breaks series that start with numbers
|
|
// new Regex(@"^(?:(?<absoluteepisode>\d{2,3})(?:_|-|\s|\.)+)+(?<title>.+?)(?:\W|_)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)",
|
|
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Multi-Part episodes without a title (S01E05.S01E06)
|
|
new Regex(@"^(?:\W*S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
|
|
new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title Absolute Episode Number + Season+Episode
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>\d{2,3}))+(?:_|-|\s|\.)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+).*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title Season+Episode + Absolute Episode Number
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title Season+Episode
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:\s|\.).*?(?<hash>\[\w{8}\])?(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title with trailing number Absolute Episode Number
|
|
new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title - Absolute Episode Number
|
|
new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[. ]-[. ](?<absoluteepisode>\d{2,3}(?!\d+|[-])))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - [SubGroup] Title Absolute Episode Number
|
|
new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{2,3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - Title Absolute Episode Number [SubGroup]
|
|
new Regex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{3}(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - Title Absolute Episode Number [Hash]
|
|
new Regex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{2,3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?[-_. ]+.*?(?<hash>\[\w{8}\])(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes with airdate AND season/episode number
|
|
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Multi-episode Repeated (S01E05 - S01E06, 1x05 - 1x06, etc)
|
|
new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc) **
|
|
new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
|
|
new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Mini-Series, treated as season 1, episodes are labelled as XofY
|
|
new Regex(@"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Supports Season 01 Episode 03
|
|
new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:[-_\W](?<![()\[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Multi-episode release with no space between series title and season (S01E11E12)
|
|
new Regex(@"(?:.*(?:^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{2}(?!\d+))(?:E(?<episode>(?<!\d+)\d{2}(?!\d+)))+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Multi-episode with single episode numbers (S6.E1-E2, S6.E1E2, S6E1E2, etc)
|
|
new Regex(@"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Single episode season or episode S1E1 or S1-E1
|
|
new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//3 digit season S010E05
|
|
new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{3}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//5 digit episode number with a title
|
|
new Regex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//5 digit multi-episode with a title
|
|
new Regex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:[-_. ]{1,3}ep){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Season only releases
|
|
new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//4 digit season only releases
|
|
new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{4}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes with a title and season/episode in square brackets
|
|
new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+\[S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+|i|p)))+\])\W?(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Supports 103/113 naming
|
|
new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes with airdate
|
|
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9])",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Supports 1103/1113 naming
|
|
new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<season>(?<!\d+|\(|\[|e|x)\d{2})(?<episode>(?<!e|x)\d{2}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//4 digit episode number
|
|
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
|
|
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//4 digit episode number
|
|
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
|
|
new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Episodes with single digit episode number (S01E1, S01E5E6, etc)
|
|
new Regex(@"^(?<title>.*?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//iTunes Season 1\05 Title (Quality).ext
|
|
new Regex(@"^(?:Season(?:_|-|\s|\.)(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:_|-|\s|\.)(?<episode>(?<!\d+)\d{1,2}(?!\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - Title Absolute Episode Number (e66)
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,3}))+.*?(?<hash>\[\w{8}\])?(?:$|\.)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - Title Absolute Episode Number
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Anime - Title {Absolute Episode Number}
|
|
new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
//Extant, terrible multi-episode naming (extant.10708.hdtv-lol.mp4)
|
|
new Regex(@"^(?<title>.+?)[-_. ](?<season>[0]?\d?)(?:(?<episode>\d{2}){2}(?!\d+))[-_. ]",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled)
|
|
};
|
|
|
|
private static readonly Regex[] RejectHashedReleasesRegex = new Regex[]
|
|
{
|
|
// Generic match for md5 and mixed-case hashes.
|
|
new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled),
|
|
|
|
// Generic match for shorter lower-case hashes.
|
|
new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled),
|
|
|
|
// Format seen on some NZBGeek releases
|
|
// Be very strict with these coz they are very close to the valid 101 ep numbering.
|
|
new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled),
|
|
new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled),
|
|
|
|
//Backup filename (Unknown origins)
|
|
new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled),
|
|
|
|
//123 - Started appearing December 2014
|
|
new Regex(@"^123$", RegexOptions.Compiled),
|
|
|
|
//abc - Started appearing January 2015
|
|
new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
|
|
|
//b00bs - Started appearing January 2015
|
|
new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase)
|
|
};
|
|
|
|
//Regex to detect whether the title was reversed.
|
|
private static readonly Regex ReversedTitleRegex = new Regex(@"[-._ ](p027|p0801|\d{2}E\d{2}S)[-._ ]", RegexOptions.Compiled);
|
|
|
|
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex SimpleTitleRegex = new Regex(@"(?:480[ip]|720[ip]|1080[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080|(8|10)b(it)?)\s*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex WebsitePrefixRegex = new Regex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex AirDateRegex = new Regex(@"^(.*?)(?<!\d)((?<airyear>\d{4})[_.-](?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])|(?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])[_.-](?<airyear>\d{4}))(?!\d)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex SixDigitAirDateRegex = new Regex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex CleanReleaseGroupRegex = new Regex(@"^(.*?[-._ ](S\d+E\d+)[-._ ])|(-(RP|1|NZBGeek|Obfuscated|sample))+$",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex CleanTorrentSuffixRegex = new Regex(@"\[(?:ettv|rartv|rarbg|cttv)\]$",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?<releasegroup>[a-z0-9]+)\b(?<!WEB-DL|480p|720p|1080p)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex AnimeReleaseGroupRegex = new Regex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled);
|
|
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
|
|
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
|
|
|
|
private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled);
|
|
|
|
public static ParsedEpisodeInfo ParsePath(string path)
|
|
{
|
|
var fileInfo = new FileInfo(path);
|
|
|
|
var result = ParseTitle(fileInfo.Name);
|
|
|
|
if (result == null)
|
|
{
|
|
Logger.Debug("Attempting to parse episode info using directory and file names. {0}", fileInfo.Directory.Name);
|
|
result = ParseTitle(fileInfo.Directory.Name + " " + fileInfo.Name + fileInfo.Extension);
|
|
}
|
|
|
|
if (result == null)
|
|
{
|
|
Logger.Debug("Attempting to parse episode info using directory name. {0}", fileInfo.Directory.Name);
|
|
result = ParseTitle(fileInfo.Directory.Name + fileInfo.Extension);
|
|
}
|
|
|
|
if (result == null)
|
|
{
|
|
Logger.Warn("Unable to parse episode info from path {0}", path);
|
|
return null;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
public static ParsedEpisodeInfo ParseTitle(string title)
|
|
{
|
|
try
|
|
{
|
|
if (!ValidateBeforeParsing(title)) return null;
|
|
|
|
Logger.Debug("Parsing string '{0}'", title);
|
|
|
|
if (ReversedTitleRegex.IsMatch(title))
|
|
{
|
|
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray();
|
|
Array.Reverse(titleWithoutExtension);
|
|
|
|
title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length);
|
|
|
|
Logger.Debug("Reversed name detected. Converted to '{0}'", title);
|
|
}
|
|
|
|
var simpleTitle = SimpleTitleRegex.Replace(title, string.Empty);
|
|
|
|
simpleTitle = RemoveFileExtension(simpleTitle);
|
|
|
|
// TODO: Quick fix stripping [url] - prefixes.
|
|
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle, string.Empty);
|
|
|
|
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle, string.Empty);
|
|
|
|
var airDateMatch = AirDateRegex.Match(simpleTitle);
|
|
if (airDateMatch.Success)
|
|
{
|
|
simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value;
|
|
}
|
|
|
|
var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle);
|
|
if (sixDigitAirDateMatch.Success)
|
|
{
|
|
var fixedDate = string.Format("20{0}.{1}.{2}", sixDigitAirDateMatch.Groups["airyear"].Value,
|
|
sixDigitAirDateMatch.Groups["airmonth"].Value,
|
|
sixDigitAirDateMatch.Groups["airday"].Value);
|
|
|
|
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
|
|
}
|
|
|
|
foreach (var regex in ReportTitleRegex)
|
|
{
|
|
var match = regex.Matches(simpleTitle);
|
|
|
|
if (match.Count != 0)
|
|
{
|
|
Logger.Trace(regex);
|
|
try
|
|
{
|
|
var result = ParseMatchCollection(match);
|
|
|
|
if (result != null)
|
|
{
|
|
if (result.FullSeason && title.ContainsIgnoreCase("Special"))
|
|
{
|
|
result.FullSeason = false;
|
|
result.Special = true;
|
|
}
|
|
|
|
result.Language = ParseLanguage(title);
|
|
Logger.Debug("Language parsed: {0}", result.Language);
|
|
|
|
result.Quality = QualityParser.ParseQuality(title);
|
|
Logger.Debug("Quality parsed: {0}", result.Quality);
|
|
|
|
result.ReleaseGroup = ParseReleaseGroup(title);
|
|
|
|
var subGroup = GetSubGroup(match);
|
|
if (!subGroup.IsNullOrWhiteSpace())
|
|
{
|
|
result.ReleaseGroup = subGroup;
|
|
}
|
|
|
|
Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup);
|
|
|
|
result.ReleaseHash = GetReleaseHash(match);
|
|
if (!result.ReleaseHash.IsNullOrWhiteSpace())
|
|
{
|
|
Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
catch (InvalidDateException ex)
|
|
{
|
|
Logger.Debug(ex, ex.Message);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
|
|
Logger.Error(e, "An error has occurred while trying to parse " + title);
|
|
}
|
|
|
|
Logger.Debug("Unable to parse {0}", title);
|
|
return null;
|
|
}
|
|
|
|
public static string ParseSeriesName(string title)
|
|
{
|
|
Logger.Debug("Parsing string '{0}'", title);
|
|
|
|
var parseResult = ParseTitle(title);
|
|
|
|
if (parseResult == null)
|
|
{
|
|
return CleanSeriesTitle(title);
|
|
}
|
|
|
|
return parseResult.SeriesTitle;
|
|
}
|
|
|
|
public static string CleanSeriesTitle(this string title)
|
|
{
|
|
long number = 0;
|
|
|
|
//If Title only contains numbers return it as is.
|
|
if (long.TryParse(title, out number))
|
|
return title;
|
|
|
|
return NormalizeRegex.Replace(title, string.Empty).ToLower().RemoveAccent();
|
|
}
|
|
|
|
public static string NormalizeEpisodeTitle(string title)
|
|
{
|
|
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
|
|
title = PunctuationRegex.Replace(title, " ");
|
|
title = DuplicateSpacesRegex.Replace(title, " ");
|
|
|
|
return title.Trim()
|
|
.ToLower();
|
|
}
|
|
|
|
public static string NormalizeTitle(string title)
|
|
{
|
|
title = WordDelimiterRegex.Replace(title, " ");
|
|
title = PunctuationRegex.Replace(title, string.Empty);
|
|
title = CommonWordRegex.Replace(title, string.Empty);
|
|
title = DuplicateSpacesRegex.Replace(title, " ");
|
|
|
|
return title.Trim().ToLower();
|
|
}
|
|
|
|
public static string ParseReleaseGroup(string title)
|
|
{
|
|
title = title.Trim();
|
|
title = RemoveFileExtension(title);
|
|
title = WebsitePrefixRegex.Replace(title, "");
|
|
|
|
var animeMatch = AnimeReleaseGroupRegex.Match(title);
|
|
|
|
if (animeMatch.Success)
|
|
{
|
|
return animeMatch.Groups["subgroup"].Value;
|
|
}
|
|
|
|
title = CleanReleaseGroupRegex.Replace(title, "");
|
|
|
|
var matches = ReleaseGroupRegex.Matches(title);
|
|
|
|
if (matches.Count != 0)
|
|
{
|
|
var group = matches.OfType<Match>().Last().Groups["releasegroup"].Value;
|
|
int groupIsNumeric;
|
|
|
|
if (int.TryParse(group, out groupIsNumeric))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return group;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
public static string RemoveFileExtension(string title)
|
|
{
|
|
title = FileExtensionRegex.Replace(title, m =>
|
|
{
|
|
var extension = m.Value.ToLower();
|
|
if (MediaFiles.MediaFileExtensions.Extensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension))
|
|
{
|
|
return string.Empty;
|
|
}
|
|
return m.Value;
|
|
});
|
|
|
|
return title;
|
|
}
|
|
|
|
public static Language ParseLanguage(string title)
|
|
{
|
|
var lowerTitle = title.ToLower();
|
|
|
|
if (lowerTitle.Contains("english"))
|
|
return Language.English;
|
|
|
|
if (lowerTitle.Contains("french"))
|
|
return Language.French;
|
|
|
|
if (lowerTitle.Contains("spanish"))
|
|
return Language.Spanish;
|
|
|
|
if (lowerTitle.Contains("danish"))
|
|
return Language.Danish;
|
|
|
|
if (lowerTitle.Contains("dutch"))
|
|
return Language.Dutch;
|
|
|
|
if (lowerTitle.Contains("japanese"))
|
|
return Language.Japanese;
|
|
|
|
if (lowerTitle.Contains("cantonese"))
|
|
return Language.Cantonese;
|
|
|
|
if (lowerTitle.Contains("mandarin"))
|
|
return Language.Mandarin;
|
|
|
|
if (lowerTitle.Contains("korean"))
|
|
return Language.Korean;
|
|
|
|
if (lowerTitle.Contains("russian"))
|
|
return Language.Russian;
|
|
|
|
if (lowerTitle.Contains("polish"))
|
|
return Language.Polish;
|
|
|
|
if (lowerTitle.Contains("vietnamese"))
|
|
return Language.Vietnamese;
|
|
|
|
if (lowerTitle.Contains("swedish"))
|
|
return Language.Swedish;
|
|
|
|
if (lowerTitle.Contains("norwegian"))
|
|
return Language.Norwegian;
|
|
|
|
if (lowerTitle.Contains("nordic"))
|
|
return Language.Norwegian;
|
|
|
|
if (lowerTitle.Contains("finnish"))
|
|
return Language.Finnish;
|
|
|
|
if (lowerTitle.Contains("turkish"))
|
|
return Language.Turkish;
|
|
|
|
if (lowerTitle.Contains("portuguese"))
|
|
return Language.Portuguese;
|
|
|
|
if (lowerTitle.Contains("hungarian"))
|
|
return Language.Hungarian;
|
|
|
|
var match = LanguageRegex.Match(title);
|
|
|
|
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
|
|
return Language.Italian;
|
|
|
|
if (match.Groups["german"].Captures.Cast<Capture>().Any())
|
|
return Language.German;
|
|
|
|
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
|
|
return Language.Flemish;
|
|
|
|
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
|
|
return Language.Greek;
|
|
|
|
if (match.Groups["french"].Success)
|
|
return Language.French;
|
|
|
|
if (match.Groups["russian"].Success)
|
|
return Language.Russian;
|
|
|
|
if (match.Groups["dutch"].Success)
|
|
return Language.Dutch;
|
|
|
|
if (match.Groups["hungarian"].Success)
|
|
return Language.Hungarian;
|
|
|
|
return Language.English;
|
|
}
|
|
|
|
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
|
|
{
|
|
var seriesTitleInfo = new SeriesTitleInfo();
|
|
seriesTitleInfo.Title = title;
|
|
|
|
var match = YearInTitleRegex.Match(title);
|
|
|
|
if (!match.Success)
|
|
{
|
|
seriesTitleInfo.TitleWithoutYear = title;
|
|
}
|
|
|
|
else
|
|
{
|
|
seriesTitleInfo.TitleWithoutYear = match.Groups["title"].Value;
|
|
seriesTitleInfo.Year = Convert.ToInt32(match.Groups["year"].Value);
|
|
}
|
|
|
|
return seriesTitleInfo;
|
|
}
|
|
|
|
private static ParsedEpisodeInfo ParseMatchCollection(MatchCollection matchCollection)
|
|
{
|
|
var seriesName = matchCollection[0].Groups["title"].Value.Replace('.', ' ').Replace('_', ' ');
|
|
seriesName = RequestInfoRegex.Replace(seriesName, "").Trim(' ');
|
|
|
|
int airYear;
|
|
int.TryParse(matchCollection[0].Groups["airyear"].Value, out airYear);
|
|
|
|
ParsedEpisodeInfo result;
|
|
|
|
if (airYear < 1900)
|
|
{
|
|
var seasons = new List<int>();
|
|
|
|
foreach (Capture seasonCapture in matchCollection[0].Groups["season"].Captures)
|
|
{
|
|
int parsedSeason;
|
|
if (int.TryParse(seasonCapture.Value, out parsedSeason))
|
|
seasons.Add(parsedSeason);
|
|
}
|
|
|
|
//If no season was found it should be treated as a mini series and season 1
|
|
if (seasons.Count == 0) seasons.Add(1);
|
|
|
|
//If more than 1 season was parsed go to the next REGEX (A multi-season release is unlikely)
|
|
if (seasons.Distinct().Count() > 1) return null;
|
|
|
|
result = new ParsedEpisodeInfo
|
|
{
|
|
SeasonNumber = seasons.First(),
|
|
EpisodeNumbers = new int[0],
|
|
AbsoluteEpisodeNumbers = new int[0]
|
|
};
|
|
|
|
foreach (Match matchGroup in matchCollection)
|
|
{
|
|
var episodeCaptures = matchGroup.Groups["episode"].Captures.Cast<Capture>().ToList();
|
|
var absoluteEpisodeCaptures = matchGroup.Groups["absoluteepisode"].Captures.Cast<Capture>().ToList();
|
|
|
|
//Allows use to return a list of 0 episodes (We can handle that as a full season release)
|
|
if (episodeCaptures.Any())
|
|
{
|
|
var first = Convert.ToInt32(episodeCaptures.First().Value);
|
|
var last = Convert.ToInt32(episodeCaptures.Last().Value);
|
|
|
|
if (first > last)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var count = last - first + 1;
|
|
result.EpisodeNumbers = Enumerable.Range(first, count).ToArray();
|
|
}
|
|
|
|
if (absoluteEpisodeCaptures.Any())
|
|
{
|
|
var first = Convert.ToInt32(absoluteEpisodeCaptures.First().Value);
|
|
var last = Convert.ToInt32(absoluteEpisodeCaptures.Last().Value);
|
|
|
|
if (first > last)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var count = last - first + 1;
|
|
result.AbsoluteEpisodeNumbers = Enumerable.Range(first, count).ToArray();
|
|
|
|
if (matchGroup.Groups["special"].Success)
|
|
{
|
|
result.Special = true;
|
|
}
|
|
}
|
|
|
|
if (!episodeCaptures.Any() && !absoluteEpisodeCaptures.Any())
|
|
{
|
|
//Check to see if this is an "Extras" or "SUBPACK" release, if it is, return NULL
|
|
//Todo: Set a "Extras" flag in EpisodeParseResult if we want to download them ever
|
|
if (!matchCollection[0].Groups["extras"].Value.IsNullOrWhiteSpace()) return null;
|
|
|
|
result.FullSeason = true;
|
|
}
|
|
}
|
|
|
|
if (result.AbsoluteEpisodeNumbers.Any() && !result.EpisodeNumbers.Any())
|
|
{
|
|
result.SeasonNumber = 0;
|
|
}
|
|
}
|
|
|
|
else
|
|
{
|
|
//Try to Parse as a daily show
|
|
var airmonth = Convert.ToInt32(matchCollection[0].Groups["airmonth"].Value);
|
|
var airday = Convert.ToInt32(matchCollection[0].Groups["airday"].Value);
|
|
|
|
//Swap day and month if month is bigger than 12 (scene fail)
|
|
if (airmonth > 12)
|
|
{
|
|
var tempDay = airday;
|
|
airday = airmonth;
|
|
airmonth = tempDay;
|
|
}
|
|
|
|
DateTime airDate;
|
|
|
|
try
|
|
{
|
|
airDate = new DateTime(airYear, airmonth, airday);
|
|
}
|
|
catch (Exception)
|
|
{
|
|
throw new InvalidDateException("Invalid date found: {0}-{1}-{2}", airYear, airmonth, airday);
|
|
}
|
|
|
|
//Check if episode is in the future (most likely a parse error)
|
|
if (airDate > DateTime.Now.AddDays(1).Date || airDate < new DateTime(1970, 1, 1))
|
|
{
|
|
throw new InvalidDateException("Invalid date found: {0}", airDate);
|
|
}
|
|
|
|
result = new ParsedEpisodeInfo
|
|
{
|
|
AirDate = airDate.ToString(Episode.AIR_DATE_FORMAT),
|
|
};
|
|
}
|
|
|
|
result.SeriesTitle = seriesName;
|
|
result.SeriesTitleInfo = GetSeriesTitleInfo(result.SeriesTitle);
|
|
|
|
Logger.Debug("Episode Parsed. {0}", result);
|
|
|
|
return result;
|
|
}
|
|
|
|
private static bool ValidateBeforeParsing(string title)
|
|
{
|
|
if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc"))
|
|
{
|
|
Logger.Debug("");
|
|
return false;
|
|
}
|
|
|
|
if (!title.Any(char.IsLetterOrDigit))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var titleWithoutExtension = RemoveFileExtension(title);
|
|
|
|
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
|
|
{
|
|
Logger.Debug("Rejected Hashed Release Title: " + title);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static string GetSubGroup(MatchCollection matchCollection)
|
|
{
|
|
var subGroup = matchCollection[0].Groups["subgroup"];
|
|
|
|
if (subGroup.Success)
|
|
{
|
|
return subGroup.Value;
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
|
|
private static string GetReleaseHash(MatchCollection matchCollection)
|
|
{
|
|
var hash = matchCollection[0].Groups["hash"];
|
|
|
|
if (hash.Success)
|
|
{
|
|
var hashValue = hash.Value.Trim('[', ']');
|
|
|
|
if (hashValue.Equals("1280x720"))
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
return hashValue;
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
}
|
|
}
|