From c459cdf168679a91c053bbd9b5614e739c15c0c6 Mon Sep 17 00:00:00 2001 From: Icer Addis Date: Tue, 7 Jan 2014 21:54:23 -0800 Subject: [PATCH] Fixes in response to code review ParseSpecialEpisode now follows similar pattern to Map() method and accepts TvRageId and SearchCriteria Fixed normalize episode title to handle punctuation separately from spaces and removed special episode words Removed comments --- .../DecisionEngine/DownloadDecisionMaker.cs | 13 +--- src/NzbDrone.Core/Parser/Parser.cs | 17 +++-- src/NzbDrone.Core/Parser/ParsingService.cs | 71 +++++++++++-------- src/NzbDrone.Core/Tv/SeriesService.cs | 4 -- 4 files changed, 54 insertions(+), 51 deletions(-) diff --git a/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs b/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs index 093e16a89..b8b77c5a3 100644 --- a/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs +++ b/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs @@ -52,13 +52,6 @@ private IEnumerable GetDecisions(List reports, Se _logger.ProgressInfo("No reports found"); } - // get series from search criteria - Tv.Series series = null; - if (searchCriteria != null) - { - series = searchCriteria.Series; - } - var reportNumber = 1; foreach (var report in reports) @@ -68,17 +61,13 @@ private IEnumerable GetDecisions(List reports, Se try { - // use parsing service to parse episode info (this allows us to do episode title searches against the episode repository) var parsedEpisodeInfo = Parser.Parser.ParseTitle(report.Title); - // do we have a possible special episode? if (parsedEpisodeInfo == null || parsedEpisodeInfo.IsPossibleSpecialEpisode()) { - // try to parse as a special episode - var specialEpisodeInfo = _parsingService.ParseSpecialEpisodeTitle(report.Title, series); + var specialEpisodeInfo = _parsingService.ParseSpecialEpisodeTitle(report.Title, report.TvRageId, searchCriteria); if (specialEpisodeInfo != null) { - // use special episode parsedEpisodeInfo = specialEpisodeInfo; } } diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index d59ce4e80..f307e421e 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -114,8 +114,11 @@ public static class Parser private static readonly Regex YearInTitleRegex = new Regex(@"^(?.+?)(?:\W|_)?(?<year>\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly Regex NonWordRegex = new Regex(@"\W+", RegexOptions.Compiled); - private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of|part)\b\s?", + private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled); + private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled); + private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); @@ -227,11 +230,11 @@ public static string CleanupEpisodeTitle(string title) public static string NormalizeEpisodeTitle(string title) { - // convert any non-word characters to a single space - string normalizedSpaces = NonWordRegex.Replace(title, " ").ToLower(); - // remove common words - string normalized = CommonWordRegex.Replace(normalizedSpaces, String.Empty); - return normalized; + string singleSpaces = WordDelimiterRegex.Replace(title, " "); + string noPunctuation = PunctuationRegex.Replace(singleSpaces, String.Empty); + string noCommonWords = CommonWordRegex.Replace(noPunctuation, String.Empty); + string normalized = SpecialEpisodeWordRegex.Replace(noCommonWords, String.Empty); + return normalized.Trim().ToLower(); } public static string ParseReleaseGroup(string title) diff --git a/src/NzbDrone.Core/Parser/ParsingService.cs b/src/NzbDrone.Core/Parser/ParsingService.cs index a2dfee939..a0682a138 100644 --- a/src/NzbDrone.Core/Parser/ParsingService.cs +++ b/src/NzbDrone.Core/Parser/ParsingService.cs @@ -12,6 +12,7 @@ namespace NzbDrone.Core.Parser { public interface IParsingService { + ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, int tvRageId, SearchCriteriaBase searchCriteria = null); ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series); LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource); Series GetSeries(string title); @@ -40,51 +41,65 @@ public ParsingService(IEpisodeService episodeService, _logger = logger; } - public ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series) + public ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, int tvRageId, SearchCriteriaBase searchCriteria = null) { - try + if (searchCriteria != null) { - if (series == null) + var tvdbId = _sceneMappingService.GetTvDbId(title); + if (tvdbId.HasValue) { - // find series if we dont have it already - // we use an inexact match here since the series name is often mangled with the episode title - series = _seriesService.FindByTitleInexact(title); - if (series == null) + if (searchCriteria.Series.TvdbId == tvdbId) { - // no series matched - return null; + return ParseSpecialEpisodeTitle(title, searchCriteria.Series); } } - // find special episode in series season 0 - Episode episode = _episodeService.FindEpisodeByName(series.Id, 0, title); - if (episode != null) + if (tvRageId == searchCriteria.Series.TvRageId) { - // created parsed info from tv episode that we found - var info = new ParsedEpisodeInfo(); - info.SeriesTitle = series.Title; - info.SeriesTitleInfo = new SeriesTitleInfo(); - info.SeriesTitleInfo.Title = info.SeriesTitle; - info.SeasonNumber = episode.SeasonNumber; - info.EpisodeNumbers = new int[1] { episode.EpisodeNumber }; - info.FullSeason = false; - info.Quality = QualityParser.ParseQuality(title); - info.ReleaseGroup = Parser.ParseReleaseGroup(title); - - _logger.Info("Found special episode {0} for title '{1}'", info, title); - return info; + return ParseSpecialEpisodeTitle(title, searchCriteria.Series); } } - catch (Exception e) + + var series = _seriesService.FindByTitleInexact(title); + if (series == null && tvRageId > 0) { - _logger.ErrorException("An error has occurred while trying to parse special episode " + title, e); + series = _seriesService.FindByTvRageId(tvRageId); + } + + if (series == null) + { + _logger.Trace("No matching series {0}", title); + return null; + } + + return ParseSpecialEpisodeTitle(title, series); + } + + public ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series) + { + // find special episode in series season 0 + var episode = _episodeService.FindEpisodeByName(series.Id, 0, title); + if (episode != null) + { + // create parsed info from tv episode + var info = new ParsedEpisodeInfo(); + info.SeriesTitle = series.Title; + info.SeriesTitleInfo = new SeriesTitleInfo(); + info.SeriesTitleInfo.Title = info.SeriesTitle; + info.SeasonNumber = episode.SeasonNumber; + info.EpisodeNumbers = new int[1] { episode.EpisodeNumber }; + info.FullSeason = false; + info.Quality = QualityParser.ParseQuality(title); + info.ReleaseGroup = Parser.ParseReleaseGroup(title); + + _logger.Info("Found special episode {0} for title '{1}'", info, title); + return info; } return null; } - public LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource) { var parsedEpisodeInfo = Parser.ParsePath(filename); diff --git a/src/NzbDrone.Core/Tv/SeriesService.cs b/src/NzbDrone.Core/Tv/SeriesService.cs index 60937f7a9..93405f8f5 100644 --- a/src/NzbDrone.Core/Tv/SeriesService.cs +++ b/src/NzbDrone.Core/Tv/SeriesService.cs @@ -103,9 +103,6 @@ public Series FindByTitle(string title) public Series FindByTitleInexact(string title) { - // perform fuzzy matching of series name - // TODO: can replace this search mechanism with something smarter/faster/better - // find any series clean title within the provided release title string cleanTitle = Parser.Parser.CleanSeriesTitle(title); var list = _seriesRepository.All().Where(s => cleanTitle.Contains(s.CleanTitle)).ToList(); @@ -137,7 +134,6 @@ public Series FindByTitleInexact(string title) // get the leftmost series that is the longest // series are usually the first thing in release title, so we select the leftmost and longest match - // we could have multiple matches for series which have a common prefix like "Love it", "Love it Too" so we pick the longest one var match = query.First().series; _logger.Trace("Multiple series matched {0} from title {1}", match.Title, title);