You've already forked Mailu
mirror of
https://github.com/Mailu/Mailu.git
synced 2025-08-10 22:31:47 +02:00
Tika v1
This commit is contained in:
@@ -75,6 +75,8 @@ DEFAULT_CONFIG = {
|
||||
'API': False,
|
||||
'WEB_API': '/api',
|
||||
'API_TOKEN': None,
|
||||
'FULL_TEXT_SEARCH': 'en',
|
||||
'FULL_TEXT_SEARCH_ATTACHMENTS': False,
|
||||
'LOG_LEVEL': 'INFO',
|
||||
'SESSION_KEY_BITS': 128,
|
||||
'SESSION_TIMEOUT': 3600,
|
||||
|
@@ -78,6 +78,7 @@ ENV \
|
||||
\
|
||||
ADMIN_ADDRESS="127.0.0.1" \
|
||||
FRONT_ADDRESS="127.0.0.1" \
|
||||
FTS_ATTACHMENTS_ADDRESS="127.0.0.1" \
|
||||
SMTP_ADDRESS="127.0.0.1" \
|
||||
IMAP_ADDRESS="127.0.0.1" \
|
||||
REDIS_ADDRESS="127.0.0.1" \
|
||||
|
@@ -81,6 +81,7 @@ ENV \
|
||||
PATH="/app/venv/bin:${PATH}" \
|
||||
ADMIN_ADDRESS="admin" \
|
||||
FRONT_ADDRESS="front" \
|
||||
FTS_ATTACHMENTS_ADDRESS="tika" \
|
||||
SMTP_ADDRESS="smtp" \
|
||||
IMAP_ADDRESS="imap" \
|
||||
OLETOOLS_ADDRESS="oletools" \
|
||||
|
@@ -7,8 +7,8 @@ ARG VERSION
|
||||
LABEL version=$VERSION
|
||||
|
||||
RUN set -euxo pipefail \
|
||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/main 'dovecot<2.4' dovecot-lmtpd dovecot-pigeonhole-plugin dovecot-pop3d dovecot-submissiond poppler-utils \
|
||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing dovecot-fts-flatcurve catdoc \
|
||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/main 'dovecot<2.4' dovecot-lmtpd dovecot-pigeonhole-plugin dovecot-pop3d dovecot-submissiond \
|
||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing dovecot-fts-flatcurve \
|
||||
; apk add --no-cache rspamd-client \
|
||||
; mkdir /var/lib/dovecot
|
||||
|
||||
|
@@ -63,11 +63,14 @@ plugin {
|
||||
fts_autoindex = yes
|
||||
fts_enforced = yes
|
||||
fts_autoindex_exclude = \Trash
|
||||
fts_autoindex_exclude1 = \Junk
|
||||
fts_filters = normalizer-icu stopwords
|
||||
fts_filters_en = lowercase english-possessive stopwords
|
||||
fts_filters_fr = lowercase contractions stopwords
|
||||
|
||||
fts_decoder = decode2text
|
||||
fts_header_excludes = Received DKIM-* ARC-* X-* x-* Comments Delivered-To Return-Path Authentication-Results Message-ID References In-Reply-To Thread-* Accept-Language Content-* MIME-Version
|
||||
{% if FULL_TEXT_SEARCH_ATTACHMENTS %}
|
||||
fts_tika = http://{{ FTS_ATTACHMENTS_ADDRESS }}:9998/tika/
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% if COMPRESSION in [ 'gz', 'bz2', 'lz4', 'zstd' ] %}
|
||||
@@ -79,16 +82,6 @@ plugin {
|
||||
{% endif %}
|
||||
}
|
||||
|
||||
{% if FULL_TEXT_SEARCH %}
|
||||
service decode2text {
|
||||
executable = script /usr/libexec/dovecot/decode2text.sh
|
||||
user = nobody
|
||||
unix_listener decode2text {
|
||||
mode = 0666
|
||||
}
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
###############
|
||||
# Authentication
|
||||
###############
|
||||
|
@@ -98,8 +98,16 @@ services:
|
||||
volumes:
|
||||
- "{{ root }}/mail:/mail"
|
||||
- "{{ root }}/overrides/dovecot:/overrides:ro"
|
||||
networks:
|
||||
- default
|
||||
{% if tika_enabled %}
|
||||
- fts_attachments
|
||||
{% endif %}
|
||||
depends_on:
|
||||
- front
|
||||
{% if tika_enabled %}
|
||||
- fts_attachments
|
||||
{% endif %}
|
||||
{% if resolver_enabled %}
|
||||
- resolver
|
||||
dns:
|
||||
@@ -140,6 +148,21 @@ services:
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% if tika_enabled %}
|
||||
fts_attachments:
|
||||
image: apache/tika:2.9.0.0-full
|
||||
hostname: tika
|
||||
restart: always
|
||||
networks:
|
||||
- fts_attachments
|
||||
depends_on:
|
||||
{% if resolver_enabled %}
|
||||
- resolver
|
||||
dns:
|
||||
- {{ dns }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
antispam:
|
||||
image: ${DOCKER_ORG:-ghcr.io/mailu}/${DOCKER_PREFIX:-}rspamd:${MAILU_VERSION:-{{ version }}}
|
||||
hostname: antispam
|
||||
@@ -257,3 +280,8 @@ networks:
|
||||
driver: bridge
|
||||
internal: true
|
||||
{% endif %}
|
||||
{% if tika_enabled %}
|
||||
fts_attachments:
|
||||
driver: bridge
|
||||
internal: true
|
||||
{% endif %}
|
||||
|
@@ -113,7 +113,7 @@ COMPRESSION_LEVEL={{ compression_level }}
|
||||
# IMAP full-text search is enabled by default.
|
||||
# Set the following variable to off in order to disable the feature
|
||||
# or a comma separated list of language codes to support
|
||||
# FULL_TEXT_SEARCH=off
|
||||
FULL_TEXT_SEARCH=en
|
||||
|
||||
###################################
|
||||
# Web settings
|
||||
@@ -188,3 +188,5 @@ DEFAULT_SPAM_THRESHOLD=80
|
||||
# This is a mandatory setting for using the RESTful API.
|
||||
API_TOKEN={{ api_token }}
|
||||
|
||||
# Whether tika should be enabled (scan/OCR email attachements)
|
||||
FULL_TEXT_SEARCH_ATTACHMENTS={{ tika_enabled }}
|
||||
|
@@ -64,6 +64,15 @@ the security implications caused by such an increase of attack surface.<p>
|
||||
<i>Oletools scans documents in email attachements for malicious macros. It has a much lower memory footprint than a full-fledged anti-virus.</i>
|
||||
</div>
|
||||
|
||||
<div class="form-check form-check-inline">
|
||||
<label class="form-check-label">
|
||||
<input class="form-check-input" type="checkbox" name="tika_enabled" value="true">
|
||||
Enable Tika
|
||||
</label>
|
||||
|
||||
<i>Tika scans documents in email attachments, process (OCR, keyword extraction) and then index them in a way they can be efficiently searched. This requires significant ressources (RAM, CPU and storage).</i>
|
||||
</div>
|
||||
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
||||
|
||||
<script type="text/javascript" src="{{ url_for('static', filename='render.js') }}"></script>
|
||||
|
@@ -1,5 +1,5 @@
|
||||
- Switch from fts-xapian to fts-flatcurve. This should address the problem with indexes getting too big and will be the default in dovecot 2.4
|
||||
- Enable full-text search of email attachments
|
||||
- Enable full-text search of email attachments if configured (via Tika: you'll need to re-run setup)
|
||||
|
||||
If you would like more than english to be supported, please ensure you update your FULL_TEXT_SEARCH configuration variable.
|
||||
|
||||
|
Reference in New Issue
Block a user