diff --git a/cps/fs.py b/cps/fs.py
new file mode 100644
index 00000000..699d5991
--- /dev/null
+++ b/cps/fs.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
+# Copyright (C) 2020 mmonkey
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import division, print_function, unicode_literals
+from .constants import CACHE_DIR
+from os import listdir, makedirs, remove
+from os.path import isdir, isfile, join
+from shutil import rmtree
+
+CACHE_TYPE_THUMBNAILS = 'thumbnails'
+
+
+class FileSystem:
+ _instance = None
+ _cache_dir = CACHE_DIR
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super(FileSystem, cls).__new__(cls)
+ return cls._instance
+
+ def get_cache_dir(self, cache_type=None):
+ if not isdir(self._cache_dir):
+ makedirs(self._cache_dir)
+
+ if cache_type and not isdir(join(self._cache_dir, cache_type)):
+ makedirs(join(self._cache_dir, cache_type))
+
+ return join(self._cache_dir, cache_type) if cache_type else self._cache_dir
+
+ def get_cache_file_path(self, filename, cache_type=None):
+ return join(self.get_cache_dir(cache_type), filename) if filename else None
+
+ def list_cache_files(self, cache_type=None):
+ path = self.get_cache_dir(cache_type)
+ return [file for file in listdir(path) if isfile(join(path, file))]
+
+ def delete_cache_dir(self, cache_type=None):
+ if not cache_type and isdir(self._cache_dir):
+ rmtree(self._cache_dir)
+ if cache_type and isdir(join(self._cache_dir, cache_type)):
+ rmtree(join(self._cache_dir, cache_type))
+
+ def delete_cache_file(self, filename, cache_type=None):
+ if isfile(join(self.get_cache_dir(cache_type), filename)):
+ remove(join(self.get_cache_dir(cache_type), filename))
diff --git a/cps/helper.py b/cps/helper.py
index d3420a11..271ab3e9 100644
--- a/cps/helper.py
+++ b/cps/helper.py
@@ -52,7 +52,7 @@ except ImportError:
from . import calibre_db
from .tasks.convert import TaskConvert
-from . import logger, config, get_locale, db, thumbnails, ub
+from . import logger, config, get_locale, db, fs, ub
from . import gdriveutils as gd
from .constants import STATIC_DIR as _STATIC_DIR
from .subproc_wrapper import process_wait
@@ -555,8 +555,9 @@ def get_book_cover_internal(book, use_generic_cover_on_failure, resolution=1, di
if not disable_thumbnail:
thumbnail = get_book_cover_thumbnail(book, resolution)
if thumbnail:
- if os.path.isfile(thumbnails.get_thumbnail_cache_path(thumbnail)):
- return send_from_directory(thumbnails.get_thumbnail_cache_dir(), thumbnail.filename)
+ cache = fs.FileSystem()
+ if cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS):
+ return send_from_directory(cache.get_cache_dir(fs.CACHE_TYPE_THUMBNAILS), thumbnail.filename)
# Send the book cover from Google Drive if configured
if config.config_use_google_drive:
diff --git a/cps/schedule.py b/cps/schedule.py
index 5d2c94b9..5c658e41 100644
--- a/cps/schedule.py
+++ b/cps/schedule.py
@@ -18,9 +18,9 @@
from __future__ import division, print_function, unicode_literals
-from . import logger
+from . import config, db, logger, ub
from .services.background_scheduler import BackgroundScheduler
-from .tasks.thumbnail import TaskThumbnail
+from .tasks.thumbnail import TaskCleanupCoverThumbnailCache, TaskGenerateCoverThumbnails
log = logger.create()
@@ -29,6 +29,16 @@ def register_jobs():
scheduler = BackgroundScheduler()
# Generate 100 book cover thumbnails every 5 minutes
- scheduler.add_task(user=None, task=lambda: TaskThumbnail(limit=100), trigger='interval', minutes=5)
+ scheduler.add_task(user=None, task=lambda: TaskGenerateCoverThumbnails(limit=100), trigger='interval', minutes=5)
- # TODO: validate thumbnail scheduled task
+ # Cleanup book cover cache every day at 4am
+ scheduler.add_task(user=None, task=lambda: TaskCleanupCoverThumbnailCache(), trigger='cron', hour=4)
+
+ # Reconnect metadata.db every 4 hours
+ scheduler.add(func=reconnect_db_job, trigger='interval', hours=4)
+
+
+def reconnect_db_job():
+ log.info('Running background task: reconnect to calibre database')
+ calibre_db = db.CalibreDB()
+ calibre_db.reconnect_db(config, ub.app_DB_path)
diff --git a/cps/services/worker.py b/cps/services/worker.py
index 072674a0..2b6816db 100644
--- a/cps/services/worker.py
+++ b/cps/services/worker.py
@@ -35,7 +35,6 @@ def _get_main_thread():
raise Exception("main thread not found?!")
-
class ImprovedQueue(queue.Queue):
def to_list(self):
"""
@@ -45,7 +44,8 @@ class ImprovedQueue(queue.Queue):
with self.mutex:
return list(self.queue)
-#Class for all worker tasks in the background
+
+# Class for all worker tasks in the background
class WorkerThread(threading.Thread):
_instance = None
@@ -127,6 +127,10 @@ class WorkerThread(threading.Thread):
# CalibreTask.start() should wrap all exceptions in it's own error handling
item.task.start(self)
+ # remove self_cleanup tasks from list
+ if item.task.self_cleanup:
+ self.dequeued.remove(item)
+
self.queue.task_done()
@@ -141,6 +145,7 @@ class CalibreTask:
self.end_time = None
self.message = message
self.id = uuid.uuid4()
+ self.self_cleanup = False
@abc.abstractmethod
def run(self, worker_thread):
@@ -209,6 +214,14 @@ class CalibreTask:
# todo: throw error if outside of [0,1]
self._progress = x
+ @property
+ def self_cleanup(self):
+ return self._self_cleanup
+
+ @self_cleanup.setter
+ def self_cleanup(self, is_self_cleanup):
+ self._self_cleanup = is_self_cleanup
+
def _handleError(self, error_message):
self.stat = STAT_FAIL
self.progress = 1
diff --git a/cps/tasks/thumbnail.py b/cps/tasks/thumbnail.py
index 4e0c6db4..f61eb4a7 100644
--- a/cps/tasks/thumbnail.py
+++ b/cps/tasks/thumbnail.py
@@ -19,13 +19,15 @@
from __future__ import division, print_function, unicode_literals
import os
-from cps import config, db, gdriveutils, logger, ub
-from cps.constants import CACHE_DIR as _CACHE_DIR
+from cps import config, db, fs, gdriveutils, logger, ub
from cps.services.worker import CalibreTask
-from cps.thumbnails import THUMBNAIL_RESOLUTION_1X, THUMBNAIL_RESOLUTION_2X
from datetime import datetime, timedelta
from sqlalchemy import func
-from urllib.request import urlopen
+
+try:
+ from urllib.request import urlopen
+except ImportError as e:
+ from urllib2 import urlopen
try:
from wand.image import Image
@@ -33,73 +35,92 @@ try:
except (ImportError, RuntimeError) as e:
use_IM = False
+THUMBNAIL_RESOLUTION_1X = 1
+THUMBNAIL_RESOLUTION_2X = 2
-class TaskThumbnail(CalibreTask):
+
+class TaskGenerateCoverThumbnails(CalibreTask):
def __init__(self, limit=100, task_message=u'Generating cover thumbnails'):
- super(TaskThumbnail, self).__init__(task_message)
+ super(TaskGenerateCoverThumbnails, self).__init__(task_message)
+ self.self_cleanup = True
self.limit = limit
self.log = logger.create()
self.app_db_session = ub.get_new_session_instance()
- self.worker_db = db.CalibreDB(expire_on_commit=False)
+ self.calibre_db = db.CalibreDB(expire_on_commit=False)
+ self.cache = fs.FileSystem()
+ self.resolutions = [
+ THUMBNAIL_RESOLUTION_1X,
+ THUMBNAIL_RESOLUTION_2X
+ ]
def run(self, worker_thread):
- if self.worker_db.session and use_IM:
- thumbnails = self.get_thumbnail_book_ids()
- thumbnail_book_ids = list(map(lambda t: t.book_id, thumbnails))
+ if self.calibre_db.session and use_IM:
+ expired_thumbnails = self.get_expired_thumbnails()
+ thumbnail_book_ids = self.get_thumbnail_book_ids()
books_without_thumbnails = self.get_books_without_thumbnails(thumbnail_book_ids)
count = len(books_without_thumbnails)
for i, book in enumerate(books_without_thumbnails):
- thumbnails = self.get_thumbnails_for_book(thumbnails, book)
- if thumbnails:
- for thumbnail in thumbnails:
- self.update_book_thumbnail(book, thumbnail)
-
- else:
- self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_1X)
- self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_2X)
+ for resolution in self.resolutions:
+ expired_thumbnail = self.get_expired_thumbnail_for_book_and_resolution(
+ book,
+ resolution,
+ expired_thumbnails
+ )
+ if expired_thumbnail:
+ self.update_book_thumbnail(book, expired_thumbnail)
+ else:
+ self.create_book_thumbnail(book, resolution)
self.progress = (1.0 / count) * i
self._handleSuccess()
- self.app_db_session.close()
+ self.app_db_session.remove()
+
+ def get_expired_thumbnails(self):
+ return self.app_db_session\
+ .query(ub.Thumbnail)\
+ .filter(ub.Thumbnail.expiration < datetime.utcnow())\
+ .all()
def get_thumbnail_book_ids(self):
return self.app_db_session\
- .query(ub.Thumbnail)\
+ .query(ub.Thumbnail.book_id)\
.group_by(ub.Thumbnail.book_id)\
.having(func.min(ub.Thumbnail.expiration) > datetime.utcnow())\
- .all()
+ .distinct()
def get_books_without_thumbnails(self, thumbnail_book_ids):
- return self.worker_db.session\
+ return self.calibre_db.session\
.query(db.Books)\
.filter(db.Books.has_cover == 1)\
.filter(db.Books.id.notin_(thumbnail_book_ids))\
.limit(self.limit)\
.all()
- def get_thumbnails_for_book(self, thumbnails, book):
- results = list()
- for thumbnail in thumbnails:
- if thumbnail.book_id == book.id:
- results.append(thumbnail)
+ def get_expired_thumbnail_for_book_and_resolution(self, book, resolution, expired_thumbnails):
+ for thumbnail in expired_thumbnails:
+ if thumbnail.book_id == book.id and thumbnail.resolution == resolution:
+ return thumbnail
- return results
+ return None
def update_book_thumbnail(self, book, thumbnail):
+ thumbnail.generated_at = datetime.utcnow()
thumbnail.expiration = datetime.utcnow() + timedelta(days=30)
try:
self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail)
except Exception as ex:
+ self.log.info(u'Error updating book thumbnail: ' + str(ex))
self._handleError(u'Error updating book thumbnail: ' + str(ex))
self.app_db_session.rollback()
def create_book_thumbnail(self, book, resolution):
thumbnail = ub.Thumbnail()
thumbnail.book_id = book.id
+ thumbnail.format = 'jpeg'
thumbnail.resolution = resolution
self.app_db_session.add(thumbnail)
@@ -107,6 +128,7 @@ class TaskThumbnail(CalibreTask):
self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail)
except Exception as ex:
+ self.log.info(u'Error creating book thumbnail: ' + str(ex))
self._handleError(u'Error creating book thumbnail: ' + str(ex))
self.app_db_session.rollback()
@@ -128,9 +150,12 @@ class TaskThumbnail(CalibreTask):
if img.height > height:
width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos')
- img.save(filename=self.get_thumbnail_cache_path(thumbnail))
+ img.format = thumbnail.format
+ filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
+ img.save(filename=filename)
except Exception as ex:
# Bubble exception to calling function
+ self.log.info(u'Error generating thumbnail file: ' + str(ex))
raise ex
finally:
stream.close()
@@ -144,7 +169,9 @@ class TaskThumbnail(CalibreTask):
if img.height > height:
width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos')
- img.save(filename=self.get_thumbnail_cache_path(thumbnail))
+ img.format = thumbnail.format
+ filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
+ img.save(filename=filename)
def get_thumbnail_height(self, thumbnail):
return int(225 * thumbnail.resolution)
@@ -153,20 +180,88 @@ class TaskThumbnail(CalibreTask):
percent = (height / float(img.height))
return int((float(img.width) * float(percent)))
- def get_thumbnail_cache_dir(self):
- if not os.path.isdir(_CACHE_DIR):
- os.makedirs(_CACHE_DIR)
+ @property
+ def name(self):
+ return "GenerateCoverThumbnails"
- if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
- os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
- return os.path.join(_CACHE_DIR, 'thumbnails')
+class TaskCleanupCoverThumbnailCache(CalibreTask):
+ def __init__(self, task_message=u'Validating cover thumbnail cache'):
+ super(TaskCleanupCoverThumbnailCache, self).__init__(task_message)
+ self.log = logger.create()
+ self.app_db_session = ub.get_new_session_instance()
+ self.calibre_db = db.CalibreDB(expire_on_commit=False)
+ self.cache = fs.FileSystem()
- def get_thumbnail_cache_path(self, thumbnail):
- if thumbnail:
- return os.path.join(self.get_thumbnail_cache_dir(), thumbnail.filename)
- return None
+ def run(self, worker_thread):
+ cached_thumbnail_files = self.cache.list_cache_files(fs.CACHE_TYPE_THUMBNAILS)
+
+ # Expire thumbnails in the database if the cached file is missing
+ # This case will happen if a user deletes the cache dir or cached files
+ if self.app_db_session:
+ self.expire_missing_thumbnails(cached_thumbnail_files)
+ self.progress = 0.33
+
+ # Delete thumbnails in the database if the book has been removed
+ # This case will happen if a book is removed in Calibre and the metadata.db file is updated in the filesystem
+ if self.app_db_session and self.calibre_db:
+ book_ids = self.get_book_ids()
+ self.delete_thumbnails_for_missing_books(book_ids)
+ self.progress = 0.66
+
+ # Delete extraneous cached thumbnail files
+ # This case will happen if a book was deleted and the thumbnail OR the metadata.db file was changed externally
+ if self.app_db_session:
+ db_thumbnail_files = self.get_thumbnail_filenames()
+ self.delete_extraneous_thumbnail_files(cached_thumbnail_files, db_thumbnail_files)
+
+ self._handleSuccess()
+ self.app_db_session.remove()
+
+ def expire_missing_thumbnails(self, filenames):
+ try:
+ self.app_db_session\
+ .query(ub.Thumbnail)\
+ .filter(ub.Thumbnail.filename.notin_(filenames))\
+ .update({"expiration": datetime.utcnow()}, synchronize_session=False)
+ self.app_db_session.commit()
+ except Exception as ex:
+ self.log.info(u'Error expiring thumbnails for missing cache files: ' + str(ex))
+ self._handleError(u'Error expiring thumbnails for missing cache files: ' + str(ex))
+ self.app_db_session.rollback()
+
+ def get_book_ids(self):
+ results = self.calibre_db.session\
+ .query(db.Books.id)\
+ .filter(db.Books.has_cover == 1)\
+ .distinct()
+
+ return [value for value, in results]
+
+ def delete_thumbnails_for_missing_books(self, book_ids):
+ try:
+ self.app_db_session\
+ .query(ub.Thumbnail)\
+ .filter(ub.Thumbnail.book_id.notin_(book_ids))\
+ .delete(synchronize_session=False)
+ self.app_db_session.commit()
+ except Exception as ex:
+ self.log.info(str(ex))
+ self._handleError(u'Error deleting thumbnails for missing books: ' + str(ex))
+ self.app_db_session.rollback()
+
+ def get_thumbnail_filenames(self):
+ results = self.app_db_session\
+ .query(ub.Thumbnail.filename)\
+ .all()
+
+ return [thumbnail for thumbnail, in results]
+
+ def delete_extraneous_thumbnail_files(self, cached_thumbnail_files, db_thumbnail_files):
+ extraneous_files = list(set(cached_thumbnail_files).difference(db_thumbnail_files))
+ for file in extraneous_files:
+ self.cache.delete_cache_file(file, fs.CACHE_TYPE_THUMBNAILS)
@property
def name(self):
- return "Thumbnail"
+ return "CleanupCoverThumbnailCache"
diff --git a/cps/thumbnails.py b/cps/thumbnails.py
deleted file mode 100644
index ea7aac86..00000000
--- a/cps/thumbnails.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
-# Copyright (C) 2020 mmonkey
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-from __future__ import division, print_function, unicode_literals
-import os
-
-from . import logger, ub
-from .constants import CACHE_DIR as _CACHE_DIR
-
-from datetime import datetime
-
-THUMBNAIL_RESOLUTION_1X = 1
-THUMBNAIL_RESOLUTION_2X = 2
-
-log = logger.create()
-
-
-def get_thumbnail_cache_dir():
- if not os.path.isdir(_CACHE_DIR):
- os.makedirs(_CACHE_DIR)
- if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
- os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
- return os.path.join(_CACHE_DIR, 'thumbnails')
-
-
-def get_thumbnail_cache_path(thumbnail):
- if thumbnail:
- return os.path.join(get_thumbnail_cache_dir(), thumbnail.filename)
- return None
-
-
-def cover_thumbnail_exists_for_book(book):
- if book and book.has_cover:
- thumbnail = ub.session.query(ub.Thumbnail).filter(ub.Thumbnail.book_id == book.id).first()
- if thumbnail and thumbnail.expiration > datetime.utcnow():
- thumbnail_path = get_thumbnail_cache_path(thumbnail)
- return thumbnail_path and os.path.isfile(thumbnail_path)
-
- return False
diff --git a/cps/ub.py b/cps/ub.py
index 0b5a65e7..30abd728 100644
--- a/cps/ub.py
+++ b/cps/ub.py
@@ -18,6 +18,7 @@
# along with this program. If not, see .
from __future__ import division, print_function, unicode_literals
+import atexit
import os
import sys
import datetime
@@ -42,12 +43,11 @@ from sqlalchemy import create_engine, exc, exists, event
from sqlalchemy import Column, ForeignKey
from sqlalchemy import String, Integer, SmallInteger, Boolean, DateTime, Float, JSON
from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm import backref, relationship, sessionmaker, Session, scoped_session
from werkzeug.security import generate_password_hash
-from . import cli, constants
+from . import cli, constants, logger
session = None
@@ -435,6 +435,14 @@ class RemoteAuthToken(Base):
return '' % self.id
+def filename(context):
+ file_format = context.get_current_parameters()['format']
+ if file_format == 'jpeg':
+ return context.get_current_parameters()['uuid'] + '.jpg'
+ else:
+ return context.get_current_parameters()['uuid'] + '.' + file_format
+
+
class Thumbnail(Base):
__tablename__ = 'thumbnail'
@@ -443,19 +451,10 @@ class Thumbnail(Base):
uuid = Column(String, default=lambda: str(uuid.uuid4()), unique=True)
format = Column(String, default='jpeg')
resolution = Column(SmallInteger, default=1)
+ filename = Column(String, default=filename)
+ generated_at = Column(DateTime, default=lambda: datetime.datetime.utcnow())
expiration = Column(DateTime, default=lambda: datetime.datetime.utcnow() + datetime.timedelta(days=30))
- @hybrid_property
- def extension(self):
- if self.format == 'jpeg':
- return 'jpg'
- else:
- return self.format
-
- @hybrid_property
- def filename(self):
- return self.uuid + '.' + self.extension
-
# Migrate database to current version, has to be updated after every database change. Currently migration from
# everywhere to current should work. Migration is done by checking if relevant columns are existing, and than adding
@@ -705,6 +704,9 @@ def get_new_session_instance():
new_engine = create_engine(u'sqlite:///{0}'.format(cli.settingspath), echo=False)
new_session = scoped_session(sessionmaker())
new_session.configure(bind=new_engine)
+
+ atexit.register(lambda: new_session.remove() if new_session else True)
+
return new_session