1
0
mirror of https://github.com/janeczku/calibre-web.git synced 2025-01-10 04:19:00 +02:00

Fix detect correct encoding for txt-reader

This commit is contained in:
Ozzieisaacs 2020-12-09 11:04:29 +01:00
parent dcab8af8ab
commit d957b2d20f
2 changed files with 31 additions and 11 deletions

View File

@ -20,6 +20,7 @@ from __future__ import division, print_function, unicode_literals
import os import os
import json import json
import shutil import shutil
import chardet
from flask import Response, stream_with_context from flask import Response, stream_with_context
from sqlalchemy import create_engine from sqlalchemy import create_engine
@ -30,16 +31,25 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.exc import OperationalError, InvalidRequestError from sqlalchemy.exc import OperationalError, InvalidRequestError
try: try:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from pydrive.auth import RefreshError
from apiclient import errors from apiclient import errors
from httplib2 import ServerNotFoundError from httplib2 import ServerNotFoundError
gdrive_support = True
importError = None importError = None
except ImportError as err: gdrive_support = True
importError = err except ImportError as e:
importError = e
gdrive_support = False gdrive_support = False
try:
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from pydrive2.auth import RefreshError
except ImportError as err:
try:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from pydrive.auth import RefreshError
except ImportError as err:
importError = err
gdrive_support = False
from . import logger, cli, config from . import logger, cli, config
from .constants import CONFIG_DIR as _CONFIG_DIR from .constants import CONFIG_DIR as _CONFIG_DIR
@ -545,21 +555,24 @@ def partial(total_byte_len, part_size_limit):
return s return s
# downloads files in chunks from gdrive # downloads files in chunks from gdrive
def do_gdrive_download(df, headers): def do_gdrive_download(df, headers, convert_encoding=False):
total_size = int(df.metadata.get('fileSize')) total_size = int(df.metadata.get('fileSize'))
download_url = df.metadata.get('downloadUrl') download_url = df.metadata.get('downloadUrl')
s = partial(total_size, 1024 * 1024) # I'm downloading BIG files, so 100M chunk size is fine for me s = partial(total_size, 1024 * 1024) # I'm downloading BIG files, so 100M chunk size is fine for me
def stream(): def stream(convert_encoding):
for byte in s: for byte in s:
headers = {"Range": 'bytes=%s-%s' % (byte[0], byte[1])} headers = {"Range": 'bytes=%s-%s' % (byte[0], byte[1])}
resp, content = df.auth.Get_Http_Object().request(download_url, headers=headers) resp, content = df.auth.Get_Http_Object().request(download_url, headers=headers)
if resp.status == 206: if resp.status == 206:
if convert_encoding:
result = chardet.detect(content)
content = content.decode(result['encoding']).encode('utf-8')
yield content yield content
else: else:
log.warning('An error occurred: %s', resp) log.warning('An error occurred: %s', resp)
return return
return Response(stream_with_context(stream()), headers=headers) return Response(stream_with_context(stream(convert_encoding)), headers=headers)
_SETTINGS_YAML_TEMPLATE = """ _SETTINGS_YAML_TEMPLATE = """

View File

@ -29,13 +29,14 @@ import mimetypes
import traceback import traceback
import binascii import binascii
import re import re
import chardet # dependency of requests
from babel.dates import format_date from babel.dates import format_date
from babel import Locale as LC from babel import Locale as LC
from babel.core import UnknownLocaleError from babel.core import UnknownLocaleError
from flask import Blueprint, jsonify from flask import Blueprint, jsonify
from flask import render_template, request, redirect, send_from_directory, make_response, g, flash, abort, url_for from flask import render_template, request, redirect, send_from_directory, make_response, g, flash, abort, url_for
from flask import session as flask_session from flask import session as flask_session, send_file
from flask_babel import gettext as _ from flask_babel import gettext as _
from flask_login import login_user, logout_user, login_required, current_user, confirm_login from flask_login import login_user, logout_user, login_required, current_user, confirm_login
from sqlalchemy.exc import IntegrityError, InvalidRequestError, OperationalError from sqlalchemy.exc import IntegrityError, InvalidRequestError, OperationalError
@ -1495,8 +1496,14 @@ def serve_book(book_id, book_format, anyname):
headers = Headers() headers = Headers()
headers["Content-Type"] = mimetypes.types_map.get('.' + book_format, "application/octet-stream") headers["Content-Type"] = mimetypes.types_map.get('.' + book_format, "application/octet-stream")
df = getFileFromEbooksFolder(book.path, data.name + "." + book_format) df = getFileFromEbooksFolder(book.path, data.name + "." + book_format)
return do_gdrive_download(df, headers) return do_gdrive_download(df, headers, (book_format.upper() == 'TXT'))
else: else:
if book_format.upper() == 'TXT':
rawdata = open(os.path.join(config.config_calibre_dir, book.path, data.name + "." + book_format),
"rb").read()
result = chardet.detect(rawdata)
return make_response(
rawdata.decode(result['encoding']).encode('utf-8'))
return send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + book_format) return send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + book_format)