1
0
mirror of https://github.com/LibreTranslate/LibreTranslate.git synced 2025-01-23 17:52:54 +02:00

1201 lines
41 KiB
Python
Raw Normal View History

2021-10-24 19:14:09 +02:00
import io
2024-02-05 15:03:27 -05:00
import math
import os
import re
2023-07-09 12:29:11 +02:00
import tempfile
2021-10-24 18:27:37 +02:00
import uuid
2023-07-09 12:29:11 +02:00
from datetime import datetime
from functools import wraps
from html import unescape
2022-12-26 16:10:43 -05:00
from timeit import default_timer
2021-10-24 18:27:37 +02:00
import argostranslatefiles
from argostranslatefiles import get_supported_formats
2024-09-30 11:59:00 -04:00
from flask import Blueprint, Flask, Response, abort, jsonify, render_template, request, send_file, session, url_for, make_response
2023-07-09 12:29:11 +02:00
from flask_babel import Babel
from flask_session import Session
2020-12-20 15:05:22 -05:00
from flask_swagger import swagger
from flask_swagger_ui import get_swaggerui_blueprint
2021-10-24 18:27:37 +02:00
from translatehtml import translate_html
2022-12-26 16:10:43 -05:00
from werkzeug.exceptions import HTTPException
2023-03-09 14:05:09 -05:00
from werkzeug.http import http_date
2023-07-09 12:29:11 +02:00
from werkzeug.utils import secure_filename
2023-07-09 12:29:11 +02:00
from libretranslate import flood, remove_translated_files, scheduler, secret, security, storage
from libretranslate.language import detect_languages, improve_translation_formatting
2023-07-09 12:29:11 +02:00
from libretranslate.locales import (
_,
_lazy,
get_alternate_locale_links,
get_available_locale_codes,
get_available_locales,
gettext_escaped,
gettext_html,
lazy_swag,
)
2022-06-21 14:57:32 -04:00
from .api_keys import Database, RemoteDatabase
2021-10-09 11:44:00 +02:00
from .suggestions import Database as SuggestionsDatabase
2021-10-26 15:32:06 -04:00
def get_version():
try:
with open("VERSION") as f:
return f.read().strip()
except:
return "?"
2021-10-25 11:06:39 +02:00
def get_upload_dir():
upload_dir = os.path.join(tempfile.gettempdir(), "libretranslate-files-translate")
if not os.path.isdir(upload_dir):
os.mkdir(upload_dir)
return upload_dir
def get_req_api_key():
if request.is_json:
json = get_json_dict(request)
ak = json.get("api_key")
else:
ak = request.values.get("api_key")
return ak
2023-03-09 13:59:25 -05:00
def get_req_secret():
if request.is_json:
json = get_json_dict(request)
ak = json.get("secret")
else:
ak = request.values.get("secret")
return ak
def get_json_dict(request):
d = request.get_json()
if not isinstance(d, dict):
2023-01-04 12:15:18 -05:00
abort(400, description=_("Invalid JSON format"))
return d
2021-05-18 09:11:02 +05:30
2020-12-21 11:16:49 -05:00
def get_remote_address():
if request.headers.getlist("X-Forwarded-For"):
2021-09-08 21:41:12 +02:00
ip = request.headers.getlist("X-Forwarded-For")[0].split(",")[0]
2020-12-21 11:16:49 -05:00
else:
2021-05-18 09:11:02 +05:30
ip = request.remote_addr or "127.0.0.1"
2020-12-21 11:16:49 -05:00
return ip
2020-12-20 15:05:22 -05:00
2021-06-03 10:36:25 -04:00
2023-11-16 15:43:47 -05:00
def get_req_limits(default_limit, api_keys_db, db_multiplier=1, multiplier=1):
2021-06-03 10:36:25 -04:00
req_limit = default_limit
if api_keys_db:
api_key = get_req_api_key()
2021-06-03 10:36:25 -04:00
if api_key:
2024-01-21 12:48:42 -05:00
api_key_limits = api_keys_db.lookup(api_key)
if api_key_limits is not None:
req_limit = api_key_limits[0] * db_multiplier
2021-10-24 18:27:37 +02:00
2023-11-16 15:43:47 -05:00
return int(req_limit * multiplier)
2021-06-03 10:36:25 -04:00
2024-01-21 12:48:42 -05:00
def get_char_limit(default_limit, api_keys_db):
char_limit = default_limit
if api_keys_db:
api_key = get_req_api_key()
if api_key:
api_key_limits = api_keys_db.lookup(api_key)
if api_key_limits is not None:
if api_key_limits[1] is not None:
char_limit = api_key_limits[1]
return char_limit
2023-11-16 15:43:47 -05:00
def get_routes_limits(args, api_keys_db):
default_req_limit = args.req_limit
if default_req_limit == -1:
# TODO: better way?
default_req_limit = 9999999999999
2021-06-03 10:36:25 -04:00
def minute_limits():
return "%s per minute" % get_req_limits(default_req_limit, api_keys_db)
2023-11-16 15:43:47 -05:00
def hourly_limits(n):
def func():
decay = (0.75 ** (n - 1))
return "{} per {} hour".format(get_req_limits(args.hourly_req_limit * n, api_keys_db, int(os.environ.get("LT_HOURLY_REQ_LIMIT_MULTIPLIER", 60) * n), decay), n)
return func
2023-11-10 14:56:52 -05:00
2021-06-03 10:36:25 -04:00
def daily_limits():
2023-11-16 15:43:47 -05:00
return "%s per day" % get_req_limits(args.daily_req_limit, api_keys_db, int(os.environ.get("LT_DAILY_REQ_LIMIT_MULTIPLIER", 1440)))
2021-03-31 17:57:02 +02:00
2021-06-03 10:36:25 -04:00
res = [minute_limits]
2021-05-16 10:57:19 -04:00
2023-11-16 15:43:47 -05:00
if args.hourly_req_limit > 0:
for n in range(1, args.hourly_req_limit_decay + 2):
res.append(hourly_limits(n))
2023-11-10 14:56:52 -05:00
2023-11-16 15:43:47 -05:00
if args.daily_req_limit > 0:
2021-06-03 10:36:25 -04:00
res.append(daily_limits)
2021-05-16 10:57:19 -04:00
return res
2024-06-03 13:20:48 -04:00
def filter_unique(seq, extra):
seen = set({extra, ""})
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
2021-05-17 11:41:15 -04:00
def create_app(args):
from libretranslate.init import boot
2021-05-18 09:11:02 +05:30
boot(args.load_only, args.update_models, args.force_update_models)
2021-01-19 17:51:10 +01:00
from libretranslate.language import load_languages
2021-05-18 09:11:02 +05:30
swagger_url = args.url_prefix + "/docs" # Swagger UI (w/o trailing '/')
api_url = args.url_prefix + "/spec"
2022-12-31 14:44:25 -07:00
2022-12-30 21:23:50 -07:00
bp = Blueprint('Main app', __name__)
2020-12-20 15:05:22 -05:00
2023-03-09 16:09:04 -05:00
storage.setup(args.shared_storage)
if not args.disable_files_translation:
remove_translated_files.setup(get_upload_dir())
languages = load_languages()
2022-12-09 16:36:12 -05:00
language_pairs = {}
for lang in languages:
2022-12-10 00:03:21 -05:00
language_pairs[lang.code] = sorted([l.to_lang.code for l in lang.translations_from])
# Map userdefined frontend languages to argos language object.
if args.frontend_language_source == "auto":
2021-05-18 09:11:02 +05:30
frontend_argos_language_source = type(
2023-01-04 12:15:18 -05:00
"obj", (object,), {"code": "auto", "name": _("Auto Detect")}
2021-05-18 09:11:02 +05:30
)
2021-01-15 12:01:16 -05:00
else:
2021-05-18 09:11:02 +05:30
frontend_argos_language_source = next(
iter([l for l in languages if l.code == args.frontend_language_source]),
None,
)
if frontend_argos_language_source is None:
frontend_argos_language_source = languages[0]
2021-01-19 17:51:10 +01:00
2023-07-09 12:29:11 +02:00
language_target_fallback = languages[1] if len(languages) >= 2 else languages[0]
if args.frontend_language_target == "locale":
def resolve_language_locale():
loc = get_locale()
language_target = next(
iter([l for l in languages if l.code == loc]), None
)
if language_target is None:
language_target = language_target_fallback
return language_target
frontend_argos_language_target = resolve_language_locale
else:
language_target = next(
iter([l for l in languages if l.code == args.frontend_language_target]), None
)
if language_target is None:
language_target = language_target_fallback
frontend_argos_language_target = lambda: language_target
2021-01-19 17:51:10 +01:00
frontend_argos_supported_files_format = []
for file_format in get_supported_formats():
for ff in file_format.supported_file_extensions:
frontend_argos_supported_files_format.append(ff)
2021-05-17 11:41:15 -04:00
api_keys_db = None
2023-11-10 14:56:52 -05:00
if args.req_limit > 0 or args.api_keys or args.daily_req_limit > 0 or args.hourly_req_limit > 0:
2022-06-21 14:57:32 -04:00
api_keys_db = None
if args.api_keys:
2023-07-09 12:29:11 +02:00
api_keys_db = RemoteDatabase(args.api_keys_remote) if args.api_keys_remote else Database(args.api_keys_db_path)
2021-05-17 11:41:15 -04:00
2020-12-20 15:05:22 -05:00
from flask_limiter import Limiter
2021-05-18 09:11:02 +05:30
2024-02-05 15:03:27 -05:00
def limits_cost():
req_cost = getattr(request, 'req_cost', 1)
if args.req_time_cost > 0:
return max(req_cost, int(math.ceil(getattr(request, 'duration', 0) / args.req_time_cost)))
else:
return req_cost
2020-12-20 15:05:22 -05:00
limiter = Limiter(
key_func=get_remote_address,
2021-05-18 09:11:02 +05:30
default_limits=get_routes_limits(
2023-11-16 15:43:47 -05:00
args, api_keys_db
2021-05-18 09:11:02 +05:30
),
2022-10-14 13:27:34 -04:00
storage_uri=args.req_limit_storage,
2023-11-15 14:34:20 -05:00
default_limits_deduct_when=lambda req: True, # Force cost to be called after the request
default_limits_cost=limits_cost,
2024-07-16 15:33:51 -04:00
strategy="moving-window",
2020-12-20 15:05:22 -05:00
)
2021-02-26 09:58:29 -05:00
else:
2021-05-18 09:11:02 +05:30
from .no_limiter import Limiter
limiter = Limiter()
2023-03-09 23:07:12 -05:00
if not "gunicorn" in os.environ.get("SERVER_SOFTWARE", ""):
# Gunicorn starts the scheduler in the master process
scheduler.setup(args)
flood.setup(args)
secret.setup(args)
2020-12-20 15:05:22 -05:00
2022-12-26 16:10:43 -05:00
measure_request = None
gauge_request = None
if args.metrics:
if os.environ.get("PROMETHEUS_MULTIPROC_DIR") is None:
default_mp_dir = os.path.abspath(os.path.join("db", "prometheus"))
if not os.path.isdir(default_mp_dir):
os.mkdir(default_mp_dir)
os.environ["PROMETHEUS_MULTIPROC_DIR"] = default_mp_dir
2023-07-09 12:29:11 +02:00
from prometheus_client import CONTENT_TYPE_LATEST, CollectorRegistry, Gauge, Summary, generate_latest, multiprocess
2022-12-26 16:10:43 -05:00
2022-12-30 21:23:50 -07:00
@bp.route("/metrics")
2022-12-26 20:00:55 -05:00
@limiter.exempt
2022-12-26 16:10:43 -05:00
def prometheus_metrics():
if args.metrics_auth_token:
authorization = request.headers.get('Authorization')
if authorization != "Bearer " + args.metrics_auth_token:
2023-01-05 13:12:35 -05:00
abort(401, description=_("Unauthorized"))
2022-12-26 16:10:43 -05:00
registry = CollectorRegistry()
multiprocess.MultiProcessCollector(registry)
return Response(generate_latest(registry), mimetype=CONTENT_TYPE_LATEST)
measure_request = Summary('libretranslate_http_request_duration_seconds', 'Time spent on request', ['endpoint', 'status', 'request_ip', 'api_key'])
2022-12-26 16:10:43 -05:00
measure_request.labels('/translate', 200, '127.0.0.1', '')
gauge_request = Gauge('libretranslate_http_requests_in_flight', 'Active requests', ['endpoint', 'request_ip', 'api_key'], multiprocess_mode='livesum')
2022-12-26 16:10:43 -05:00
gauge_request.labels('/translate', '127.0.0.1', '')
2021-05-17 11:41:15 -04:00
def access_check(f):
@wraps(f)
def func(*a, **kw):
ip = get_remote_address()
if flood.is_banned(ip):
2023-01-05 13:12:35 -05:00
abort(403, description=_("Too many request limits violations"))
2021-05-18 09:11:02 +05:30
if args.api_keys:
ak = get_req_api_key()
2023-03-09 13:59:25 -05:00
if ak and api_keys_db.lookup(ak) is None:
abort(
403,
2023-01-05 13:12:35 -05:00
description=_("Invalid API key"),
)
2023-03-09 13:59:25 -05:00
else:
need_key = False
key_missing = api_keys_db.lookup(ak) is None
2023-03-09 13:59:25 -05:00
if (args.require_api_key_origin
and key_missing
and not re.match(args.require_api_key_origin, request.headers.get("Origin", ""))
):
need_key = True
2024-09-30 11:59:00 -04:00
req_secret = get_req_secret()
2023-03-09 13:59:25 -05:00
if (args.require_api_key_secret
and key_missing
2024-09-30 11:59:00 -04:00
and not secret.secret_match(req_secret)
2023-03-09 13:59:25 -05:00
):
need_key = True
2024-09-30 11:59:00 -04:00
if secret.secret_bogus_match(req_secret):
abort(make_response(jsonify({
'translatedText': secret.get_emoji(),
'alternatives': [],
'detectedLanguage': { 'confidence': 100, 'language': 'en' }
}), 200))
2023-03-09 16:09:04 -05:00
2023-03-09 13:59:25 -05:00
if need_key:
2023-01-05 13:12:35 -05:00
description = _("Please contact the server operator to get an API key")
if args.get_api_key_link:
2023-01-05 13:12:35 -05:00
description = _("Visit %(url)s to get an API key", url=args.get_api_key_link)
2021-05-18 09:11:02 +05:30
abort(
2023-03-09 22:00:27 -05:00
400,
description=description,
2021-05-18 09:11:02 +05:30
)
2021-05-17 11:41:15 -04:00
return f(*a, **kw)
2022-12-26 16:10:43 -05:00
if args.metrics:
@wraps(func)
def measure_func(*a, **kw):
start_t = default_timer()
status = 200
ip = get_remote_address()
ak = get_req_api_key() or ''
g = gauge_request.labels(request.path, ip, ak)
try:
g.inc()
return func(*a, **kw)
except HTTPException as e:
status = e.code
raise e
finally:
2024-02-05 15:03:27 -05:00
request.duration = max(default_timer() - start_t, 0)
measure_request.labels(request.path, status, ip, ak).observe(request.duration)
2022-12-26 16:10:43 -05:00
g.dec()
return measure_func
else:
2024-02-05 15:03:27 -05:00
@wraps(func)
def time_func(*a, **kw):
start_t = default_timer()
try:
return func(*a, **kw)
finally:
request.duration = max(default_timer() - start_t, 0)
return time_func
2022-12-30 21:23:50 -07:00
@bp.errorhandler(400)
2020-12-20 15:05:22 -05:00
def invalid_api(e):
return jsonify({"error": str(e.description)}), 400
2022-12-30 21:23:50 -07:00
@bp.errorhandler(500)
2020-12-20 15:05:22 -05:00
def server_error(e):
return jsonify({"error": str(e.description)}), 500
2022-12-30 21:23:50 -07:00
@bp.errorhandler(429)
2020-12-20 15:05:22 -05:00
def slow_down_error(e):
2021-05-16 11:50:22 -04:00
flood.report(get_remote_address())
2023-01-05 13:12:35 -05:00
return jsonify({"error": _("Slowdown:") + " " + str(e.description)}), 429
2020-12-20 15:05:22 -05:00
2022-12-30 21:23:50 -07:00
@bp.errorhandler(403)
2021-05-16 11:50:22 -04:00
def denied(e):
return jsonify({"error": str(e.description)}), 403
2022-12-30 21:23:50 -07:00
@bp.route("/")
@limiter.exempt
2020-12-20 15:05:22 -05:00
def index():
2022-02-07 11:02:32 +01:00
if args.disable_web_ui:
abort(404)
2023-01-06 11:50:51 -05:00
langcode = request.args.get('lang')
if langcode and langcode in get_available_locale_codes(not args.debug):
session.update(preferred_lang=langcode)
2024-09-30 12:42:02 -04:00
resp = make_response(render_template(
2021-05-18 09:11:02 +05:30
"index.html",
gaId=args.ga_id,
frontendTimeout=args.frontend_timeout,
api_keys=args.api_keys,
2022-06-21 15:17:42 -04:00
get_api_key_link=args.get_api_key_link,
2021-05-18 09:11:02 +05:30
web_version=os.environ.get("LT_WEB") is not None,
2022-12-31 14:44:25 -07:00
version=get_version(),
swagger_url=swagger_url,
2023-01-06 11:50:51 -05:00
available_locales=[{'code': l['code'], 'name': _lazy(l['name'])} for l in get_available_locales(not args.debug)],
2023-01-05 14:36:50 -05:00
current_locale=get_locale(),
alternate_locales=get_alternate_locale_links()
2024-09-30 12:42:02 -04:00
))
if args.require_api_key_secret:
resp.set_cookie('r', '1')
return resp
2020-12-20 15:05:22 -05:00
2023-01-06 11:50:51 -05:00
@bp.route("/js/app.js")
2023-01-04 12:15:18 -05:00
@limiter.exempt
def appjs():
if args.disable_web_ui:
2024-09-30 11:59:00 -04:00
abort(404)
api_secret = ""
bogus_api_secret = ""
if args.require_api_key_secret:
bogus_api_secret = secret.get_bogus_secret_b64()
2023-01-04 12:15:18 -05:00
2024-09-30 12:42:02 -04:00
if 'User-Agent' in request.headers and request.cookies.get('r'):
2024-09-30 11:59:00 -04:00
api_secret = secret.get_current_secret_js()
else:
api_secret = secret.get_bogus_secret_js()
response = Response(render_template("app.js.template",
2023-01-06 11:50:51 -05:00
url_prefix=args.url_prefix,
2023-03-09 13:59:25 -05:00
get_api_key_link=args.get_api_key_link,
2024-09-30 11:59:00 -04:00
api_secret=api_secret,
bogus_api_secret=bogus_api_secret), content_type='application/javascript; charset=utf-8')
2023-03-09 13:59:25 -05:00
if args.require_api_key_secret:
2023-03-09 14:05:09 -05:00
response.headers['Last-Modified'] = http_date(datetime.now())
2023-03-09 13:59:25 -05:00
response.headers['Cache-Control'] = 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0, max-age=0'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = '-1'
2023-03-09 13:59:25 -05:00
return response
2023-01-04 12:15:18 -05:00
2022-12-30 21:23:50 -07:00
@bp.get("/languages")
@limiter.exempt
2020-12-20 15:05:22 -05:00
def langs():
"""
Retrieve list of supported languages
---
tags:
- translate
responses:
200:
description: List of languages
2021-01-10 08:38:11 +00:00
schema:
id: languages
type: array
items:
type: object
properties:
code:
type: string
description: Language code
name:
type: string
description: Human-readable language name (in English)
targets:
type: array
items:
type: string
description: Supported target language codes
2020-12-20 15:05:22 -05:00
"""
2023-01-05 13:12:35 -05:00
return jsonify([{"code": l.code, "name": _lazy(l.name), "targets": language_pairs.get(l.code, [])} for l in languages])
2020-12-20 15:05:22 -05:00
# Add cors
2022-12-30 21:23:50 -07:00
@bp.after_request
2020-12-20 15:05:22 -05:00
def after_request(response):
2021-05-18 09:11:02 +05:30
response.headers.add("Access-Control-Allow-Origin", "*")
response.headers.add(
"Access-Control-Allow-Headers", "Authorization, Content-Type"
)
response.headers.add("Access-Control-Expose-Headers", "Authorization")
response.headers.add("Access-Control-Allow-Methods", "GET, POST")
response.headers.add("Access-Control-Allow-Credentials", "true")
response.headers.add("Access-Control-Max-Age", 60 * 60 * 24 * 20)
2020-12-20 15:05:22 -05:00
return response
2022-12-30 21:23:50 -07:00
@bp.post("/translate")
2021-05-17 11:41:15 -04:00
@access_check
2020-12-20 15:05:22 -05:00
def translate():
"""
Translate text from a language to another
---
tags:
- translate
parameters:
- in: formData
name: q
schema:
2021-01-19 17:51:10 +01:00
oneOf:
- type: string
example: Hello world!
- type: array
example: ['Hello world!']
2020-12-20 15:05:22 -05:00
required: true
2021-01-19 17:51:10 +01:00
description: Text(s) to translate
2020-12-20 15:05:22 -05:00
- in: formData
name: source
schema:
type: string
example: en
required: true
2021-01-19 17:51:10 +01:00
description: Source language code
2020-12-20 15:05:22 -05:00
- in: formData
name: target
schema:
type: string
example: es
required: true
description: Target language code
2021-09-11 15:08:57 +02:00
- in: formData
name: format
schema:
type: string
enum: [text, html]
default: text
example: text
2021-09-11 15:08:57 +02:00
required: false
description: >
Format of source text:
* `text` - Plain text
* `html` - HTML markup
- in: formData
name: alternatives
schema:
type: integer
default: 0
example: 3
required: false
description: Preferred number of alternative translations
- in: formData
name: api_key
schema:
type: string
example: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
required: false
description: API key
2020-12-20 15:05:22 -05:00
responses:
200:
description: Translated text
2021-01-10 08:38:11 +00:00
schema:
id: translate
type: object
properties:
translatedText:
2021-01-19 17:51:10 +01:00
oneOf:
- type: string
- type: array
description: Translated text(s)
2020-12-20 15:05:22 -05:00
400:
description: Invalid request
2021-01-10 08:38:11 +00:00
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
2020-12-20 15:05:22 -05:00
500:
description: Translation error
2021-01-10 08:38:11 +00:00
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
2020-12-20 15:05:22 -05:00
429:
description: Slow down
2021-01-10 08:38:11 +00:00
schema:
id: error-slow-down
type: object
properties:
error:
type: string
description: Reason for slow down
2021-05-16 11:50:22 -04:00
403:
description: Banned
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
2020-12-20 15:05:22 -05:00
"""
if request.is_json:
json = get_json_dict(request)
2021-05-18 09:11:02 +05:30
q = json.get("q")
source_lang = json.get("source")
target_lang = json.get("target")
2021-09-11 15:08:57 +02:00
text_format = json.get("format")
num_alternatives = int(json.get("alternatives", 0))
2020-12-20 15:05:22 -05:00
else:
q = request.values.get("q")
source_lang = request.values.get("source")
target_lang = request.values.get("target")
2021-09-11 15:08:57 +02:00
text_format = request.values.get("format")
num_alternatives = request.values.get("alternatives", 0)
2020-12-20 15:05:22 -05:00
if not q:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='q'))
2020-12-20 15:05:22 -05:00
if not source_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='source'))
2020-12-20 15:05:22 -05:00
if not target_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='target'))
try:
2024-06-03 12:55:35 -04:00
num_alternatives = max(0, int(num_alternatives))
except ValueError:
2024-06-03 12:55:35 -04:00
abort(400, description=_("Invalid request: %(name)s parameter is not a number", name='alternatives'))
if args.alternatives_limit != -1 and num_alternatives > args.alternatives_limit:
2024-06-03 12:55:35 -04:00
abort(400, description=_("Invalid request: %(name)s parameter must be <= %(value)s", name='alternatives', value=args.alternatives_limit))
2020-12-20 15:05:22 -05:00
if not request.is_json:
# Normalize line endings to UNIX style (LF) only so we can consistently
# enforce character limits.
# https://www.rfc-editor.org/rfc/rfc2046#section-4.1.1
q = "\n".join(q.splitlines())
2024-01-21 12:48:42 -05:00
char_limit = get_char_limit(args.char_limit, api_keys_db)
2021-01-19 17:51:10 +01:00
batch = isinstance(q, list)
if batch and args.batch_limit != -1:
2021-05-18 09:11:02 +05:30
batch_size = len(q)
if args.batch_limit < batch_size:
abort(
400,
2023-01-05 13:12:35 -05:00
description=_("Invalid request: request (%(size)s) exceeds text limit (%(limit)s)", size=batch_size, limit=args.batch_limit),
2021-05-18 09:11:02 +05:30
)
2021-01-19 18:53:53 +01:00
2023-11-15 14:47:44 -05:00
src_texts = q if batch else [q]
2023-11-15 14:34:20 -05:00
2024-01-21 12:48:42 -05:00
if char_limit != -1:
2023-11-15 14:47:44 -05:00
for text in src_texts:
2024-01-21 12:48:42 -05:00
if len(text) > char_limit:
2023-11-15 14:47:44 -05:00
abort(
400,
2024-01-21 12:48:42 -05:00
description=_("Invalid request: request (%(size)s) exceeds text limit (%(limit)s)", size=len(text), limit=char_limit),
2023-11-15 14:47:44 -05:00
)
2021-01-19 17:51:10 +01:00
2023-11-15 14:47:44 -05:00
if batch:
request.req_cost = max(1, len(q))
2020-12-20 15:05:22 -05:00
2021-05-18 09:11:02 +05:30
if source_lang == "auto":
2023-11-15 14:47:44 -05:00
candidate_langs = detect_languages(src_texts)
2023-11-08 21:49:56 -05:00
detected_src_lang = candidate_langs[0]
else:
2023-11-08 21:49:56 -05:00
detected_src_lang = {"confidence": 100.0, "language": source_lang}
2021-01-19 17:51:10 +01:00
2023-11-08 21:49:56 -05:00
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
2021-10-24 18:27:37 +02:00
2023-11-08 21:49:56 -05:00
if src_lang is None:
abort(400, description=_("%(lang)s is not supported", lang=source_lang))
2020-12-20 15:05:22 -05:00
tgt_lang = next(iter([l for l in languages if l.code == target_lang]), None)
2021-01-19 17:51:10 +01:00
2020-12-20 15:05:22 -05:00
if tgt_lang is None:
2023-01-05 13:12:35 -05:00
abort(400, description=_("%(lang)s is not supported",lang=target_lang))
2020-12-20 15:05:22 -05:00
2021-09-11 15:08:57 +02:00
if not text_format:
text_format = "text"
if text_format not in ["text", "html"]:
2023-01-05 13:12:35 -05:00
abort(400, description=_("%(format)s format is not supported", format=text_format))
2021-09-11 15:08:57 +02:00
2020-12-20 15:05:22 -05:00
try:
2021-05-18 09:11:02 +05:30
if batch:
batch_results = []
batch_alternatives = []
2023-11-08 21:58:59 -05:00
for text in q:
2023-11-08 21:49:56 -05:00
translator = src_lang.get_translation(tgt_lang)
2022-12-10 00:03:21 -05:00
if translator is None:
2023-11-08 21:49:56 -05:00
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
2022-12-10 00:03:21 -05:00
2021-10-24 18:27:37 +02:00
if text_format == "html":
translated_text = unescape(str(translate_html(translator, text)))
alternatives = [] # Not supported for html yet
2021-10-24 18:27:37 +02:00
else:
hypotheses = translator.hypotheses(text, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
2024-06-03 13:20:48 -04:00
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
batch_results.append(translated_text)
batch_alternatives.append(alternatives)
result = {"translatedText": batch_results}
2021-09-11 15:08:57 +02:00
if source_lang == "auto":
result["detectedLanguage"] = [detected_src_lang] * len(q)
if num_alternatives > 0:
result["alternatives"] = batch_alternatives
return jsonify(result)
2021-05-18 09:11:02 +05:30
else:
2023-11-08 21:49:56 -05:00
translator = src_lang.get_translation(tgt_lang)
2022-12-10 00:03:21 -05:00
if translator is None:
2023-11-08 21:49:56 -05:00
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
2021-09-11 15:08:57 +02:00
if text_format == "html":
translated_text = unescape(str(translate_html(translator, q)))
alternatives = [] # Not supported for html yet
2021-09-11 15:08:57 +02:00
else:
hypotheses = translator.hypotheses(q, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
2024-06-03 13:20:48 -04:00
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
result = {"translatedText": translated_text}
if source_lang == "auto":
result["detectedLanguage"] = detected_src_lang
if num_alternatives > 0:
result["alternatives"] = alternatives
return jsonify(result)
2020-12-20 15:05:22 -05:00
except Exception as e:
2023-10-30 13:09:39 -04:00
raise e
2023-01-05 13:12:35 -05:00
abort(500, description=_("Cannot translate text: %(text)s", text=str(e)))
2020-12-20 15:05:22 -05:00
2022-12-30 21:23:50 -07:00
@bp.post("/translate_file")
2021-10-24 18:27:37 +02:00
@access_check
def translate_file():
"""
Translate file from a language to another
---
tags:
- translate
consumes:
- multipart/form-data
parameters:
- in: formData
name: file
type: file
required: true
2021-10-26 16:04:50 -04:00
description: File to translate
2021-10-24 18:27:37 +02:00
- in: formData
name: source
schema:
type: string
example: en
required: true
description: Source language code
- in: formData
name: target
schema:
type: string
example: es
required: true
description: Target language code
- in: formData
name: api_key
schema:
type: string
example: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
required: false
description: API key
responses:
200:
description: Translated file
schema:
id: translate-file
2021-10-24 18:27:37 +02:00
type: object
properties:
translatedFileUrl:
type: string
description: Translated file url
2021-10-24 18:27:37 +02:00
400:
description: Invalid request
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
500:
description: Translation error
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
429:
description: Slow down
schema:
id: error-slow-down
type: object
properties:
error:
type: string
description: Reason for slow down
403:
description: Banned
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
"""
2021-10-25 17:09:23 +02:00
if args.disable_files_translation:
2023-01-05 13:12:35 -05:00
abort(403, description=_("Files translation are disabled on this server."))
2021-10-25 17:09:23 +02:00
2021-10-24 19:14:09 +02:00
source_lang = request.form.get("source")
target_lang = request.form.get("target")
2021-10-24 18:27:37 +02:00
file = request.files['file']
2024-01-21 12:48:42 -05:00
char_limit = get_char_limit(args.char_limit, api_keys_db)
2021-10-24 18:27:37 +02:00
if not file:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='file'))
2021-10-24 18:27:37 +02:00
if not source_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='source'))
2021-10-24 18:27:37 +02:00
if not target_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='target'))
2021-10-24 18:27:37 +02:00
if file.filename == '':
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: empty file"))
2021-10-24 18:27:37 +02:00
if os.path.splitext(file.filename)[1] not in frontend_argos_supported_files_format:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: file format not supported"))
2021-10-24 18:27:37 +02:00
2023-11-08 21:55:55 -05:00
src_lang = next(iter([l for l in languages if l.code == source_lang]), None)
2021-10-24 18:27:37 +02:00
if src_lang is None and source_lang != "auto":
2023-11-08 21:55:55 -05:00
abort(400, description=_("%(lang)s is not supported", lang=source_lang))
2021-10-24 18:27:37 +02:00
tgt_lang = next(iter([l for l in languages if l.code == target_lang]), None)
if tgt_lang is None:
2023-01-05 13:12:35 -05:00
abort(400, description=_("%(lang)s is not supported", lang=target_lang))
2021-10-24 18:27:37 +02:00
try:
filename = str(uuid.uuid4()) + '.' + secure_filename(file.filename)
2021-10-25 11:06:39 +02:00
filepath = os.path.join(get_upload_dir(), filename)
2021-10-24 18:27:37 +02:00
file.save(filepath)
2023-11-15 14:34:20 -05:00
# Not an exact science: take the number of bytes and divide by
# the character limit. Assuming a plain text file, this will
# set the cost of the request to N = bytes / char_limit, which is
# roughly equivalent to a batch process of N batches assuming
# each batch uses all available limits
2024-01-21 12:48:42 -05:00
if char_limit > 0:
request.req_cost = max(1, int(os.path.getsize(filepath) / char_limit))
2023-11-15 14:34:20 -05:00
if source_lang == "auto":
src_texts = argostranslatefiles.get_texts(filepath)
candidate_langs = detect_languages(src_texts)
detected_src_lang = candidate_langs[0]
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
if src_lang is None:
2024-10-28 16:12:59 -04:00
abort(400, description=_("%(lang)s is not supported", lang=detected_src_lang["language"]))
2023-11-08 21:55:55 -05:00
translated_file_path = argostranslatefiles.translate_file(src_lang.get_translation(tgt_lang), filepath)
2021-10-24 18:38:35 +02:00
translated_filename = os.path.basename(translated_file_path)
2021-10-25 10:56:17 +02:00
2021-10-24 18:27:37 +02:00
return jsonify(
{
"translatedFileUrl": url_for('Main app.download_file', filename=translated_filename, _external=True)
2021-10-24 18:27:37 +02:00
}
)
except Exception as e:
abort(500, description=e)
2022-12-30 21:23:50 -07:00
@bp.get("/download_file/<string:filename>")
2021-10-24 18:44:35 +02:00
def download_file(filename: str):
2021-10-24 18:38:35 +02:00
"""
Download a translated file
"""
2021-10-25 17:09:23 +02:00
if args.disable_files_translation:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Files translation are disabled on this server."))
2021-10-25 11:06:39 +02:00
filepath = os.path.join(get_upload_dir(), filename)
2021-10-26 15:41:14 -04:00
try:
checked_filepath = security.path_traversal_check(filepath, get_upload_dir())
if os.path.isfile(checked_filepath):
filepath = checked_filepath
except security.SuspiciousFileOperationError:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid filename"))
2021-10-24 19:14:09 +02:00
return_data = io.BytesIO()
with open(filepath, 'rb') as fo:
return_data.write(fo.read())
return_data.seek(0)
download_filename = filename.split('.')
download_filename.pop(0)
download_filename = '.'.join(download_filename)
2021-10-24 18:44:35 +02:00
2022-04-07 15:56:57 +02:00
return send_file(return_data, as_attachment=True, download_name=download_filename)
2021-10-24 18:38:35 +02:00
2022-12-30 21:23:50 -07:00
@bp.post("/detect")
2021-05-17 11:41:15 -04:00
@access_check
2021-02-10 10:51:17 -05:00
def detect():
"""
Detect the language of a single text
---
tags:
- translate
parameters:
- in: formData
name: q
schema:
type: string
example: What language is this?
2021-02-10 10:51:17 -05:00
required: true
description: Text to detect
- in: formData
name: api_key
schema:
type: string
example: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
required: false
description: API key
2021-02-10 10:51:17 -05:00
responses:
200:
description: Detections
schema:
id: detections
type: array
items:
type: object
properties:
confidence:
type: number
format: integer
2021-02-10 10:51:17 -05:00
minimum: 0
maximum: 100
2021-02-10 10:51:17 -05:00
description: Confidence value
example: 100
2021-02-10 10:51:17 -05:00
language:
type: string
description: Language code
example: en
400:
description: Invalid request
schema:
id: error-response
type: object
properties:
error:
type: string
2021-05-18 09:11:02 +05:30
description: Error message
2021-02-10 10:51:17 -05:00
500:
description: Detection error
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
429:
description: Slow down
schema:
id: error-slow-down
type: object
properties:
error:
type: string
description: Reason for slow down
2021-05-16 11:50:22 -04:00
403:
description: Banned
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
2021-02-10 10:51:17 -05:00
"""
if request.is_json:
json = get_json_dict(request)
2021-05-18 09:11:02 +05:30
q = json.get("q")
2021-02-10 10:51:17 -05:00
else:
q = request.values.get("q")
if not q:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='q'))
2021-02-10 10:51:17 -05:00
return jsonify(detect_languages(q))
2021-02-10 10:51:17 -05:00
2022-12-30 21:23:50 -07:00
@bp.route("/frontend/settings")
@limiter.exempt
def frontend_settings():
"""
Retrieve frontend specific settings
---
tags:
- frontend
responses:
200:
description: frontend settings
schema:
id: frontend-settings
type: object
properties:
2021-01-10 09:24:42 +00:00
charLimit:
type: integer
description: Character input limit for this language (-1 indicates no limit)
frontendTimeout:
type: integer
description: Frontend translation timeout
apiKeys:
type: boolean
description: Whether the API key database is enabled.
keyRequired:
type: boolean
description: Whether an API key is required.
2021-10-09 10:04:16 -04:00
suggestions:
type: boolean
description: Whether submitting suggestions is enabled.
supportedFilesFormat:
type: array
items:
type: string
description: Supported files format
language:
type: object
properties:
source:
type: object
properties:
code:
type: string
description: Language code
name:
type: string
description: Human-readable language name (in English)
target:
type: object
properties:
code:
type: string
description: Language code
name:
type: string
description: Human-readable language name (in English)
"""
target_lang = frontend_argos_language_target()
2021-05-18 09:11:02 +05:30
return jsonify(
{
"charLimit": args.char_limit,
"frontendTimeout": args.frontend_timeout,
"apiKeys": args.api_keys,
"keyRequired": bool(args.api_keys and args.require_api_key_origin),
2021-10-09 15:45:58 +02:00
"suggestions": args.suggestions,
2021-10-25 17:09:23 +02:00
"filesTranslation": not args.disable_files_translation,
"supportedFilesFormat": [] if args.disable_files_translation else frontend_argos_supported_files_format,
2021-05-18 09:11:02 +05:30
"language": {
"source": {
"code": frontend_argos_language_source.code,
2023-01-05 13:12:35 -05:00
"name": _lazy(frontend_argos_language_source.name),
2021-05-18 09:11:02 +05:30
},
"target": {
"code": target_lang.code,
"name": _lazy(target_lang.name),
2021-05-18 09:11:02 +05:30
},
},
}
)
2020-12-20 15:05:22 -05:00
2022-12-30 21:23:50 -07:00
@bp.post("/suggest")
2021-10-09 11:25:56 +02:00
def suggest():
2021-10-09 10:04:16 -04:00
"""
Submit a suggestion to improve a translation
---
tags:
- feedback
parameters:
- in: formData
name: q
schema:
type: string
example: Hello world!
required: true
description: Original text
- in: formData
name: s
schema:
type: string
example: ¡Hola mundo!
required: true
description: Suggested translation
- in: formData
name: source
schema:
type: string
example: en
required: true
description: Language of original text
- in: formData
name: target
schema:
type: string
example: es
required: true
description: Language of suggested translation
responses:
200:
description: Success
schema:
id: suggest-response
type: object
properties:
success:
type: boolean
description: Whether submission was successful
403:
description: Not authorized
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
"""
if not args.suggestions:
2023-01-05 13:12:35 -05:00
abort(403, description=_("Suggestions are disabled on this server."))
2021-10-09 15:45:58 +02:00
2023-09-29 13:49:52 -07:00
if request.is_json:
json = get_json_dict(request)
q = json.get("q")
s = json.get("s")
source_lang = json.get("source")
target_lang = json.get("target")
else:
q = request.values.get("q")
s = request.values.get("s")
source_lang = request.values.get("source")
target_lang = request.values.get("target")
2021-10-09 11:44:00 +02:00
if not q:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='q'))
if not s:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='s'))
if not source_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='source'))
if not target_lang:
2023-01-05 13:12:35 -05:00
abort(400, description=_("Invalid request: missing %(name)s parameter", name='target'))
2021-10-09 11:44:00 +02:00
SuggestionsDatabase().add(q, s, source_lang, target_lang)
2021-10-09 11:25:56 +02:00
return jsonify({"success": True})
2022-12-30 21:23:50 -07:00
app = Flask(__name__)
2023-01-06 11:50:51 -05:00
app.config["SESSION_TYPE"] = "filesystem"
app.config["SESSION_FILE_DIR"] = os.path.join("db", "sessions")
2023-05-29 16:47:32 -04:00
app.config["JSON_AS_ASCII"] = False
2023-01-06 11:50:51 -05:00
Session(app)
2022-12-30 21:23:50 -07:00
if args.debug:
app.config["TEMPLATES_AUTO_RELOAD"] = True
if args.url_prefix:
app.register_blueprint(bp, url_prefix=args.url_prefix)
else:
app.register_blueprint(bp)
2023-01-01 13:18:00 -05:00
limiter.init_app(app)
2022-12-30 21:23:50 -07:00
2021-10-09 10:04:16 -04:00
swag = swagger(app)
2022-12-31 14:44:25 -07:00
swag["info"]["version"] = get_version()
2021-10-09 10:04:16 -04:00
swag["info"]["title"] = "LibreTranslate"
@app.route(api_url)
2021-10-09 10:04:16 -04:00
@limiter.exempt
def spec():
2023-01-05 14:07:39 -05:00
return jsonify(lazy_swag(swag))
2021-10-09 10:04:16 -04:00
2023-01-04 17:54:07 -05:00
app.config["BABEL_TRANSLATION_DIRECTORIES"] = 'locales'
2023-01-04 12:15:18 -05:00
def get_locale():
2023-01-06 11:50:51 -05:00
override_lang = request.headers.get('X-Override-Accept-Language')
if override_lang and override_lang in get_available_locale_codes():
return override_lang
return session.get('preferred_lang', request.accept_languages.best_match(get_available_locale_codes()))
2023-01-04 12:15:18 -05:00
2023-07-09 12:29:11 +02:00
Babel(app, locale_selector=get_locale)
2023-01-04 17:54:07 -05:00
app.jinja_env.globals.update(_e=gettext_escaped, _h=gettext_html)
2023-01-04 12:15:18 -05:00
2020-12-20 15:05:22 -05:00
# Call factory function to create our blueprint
swaggerui_blueprint = get_swaggerui_blueprint(swagger_url, api_url)
2022-12-30 21:23:50 -07:00
if args.url_prefix:
app.register_blueprint(swaggerui_blueprint, url_prefix=swagger_url)
2022-12-30 21:23:50 -07:00
else:
app.register_blueprint(swaggerui_blueprint)
2020-12-20 15:05:22 -05:00
2020-12-20 18:17:06 -05:00
return app