You've already forked Mailu
mirror of
https://github.com/Mailu/Mailu.git
synced 2025-08-10 22:31:47 +02:00
Tika v1
This commit is contained in:
@@ -75,6 +75,8 @@ DEFAULT_CONFIG = {
|
|||||||
'API': False,
|
'API': False,
|
||||||
'WEB_API': '/api',
|
'WEB_API': '/api',
|
||||||
'API_TOKEN': None,
|
'API_TOKEN': None,
|
||||||
|
'FULL_TEXT_SEARCH': 'en',
|
||||||
|
'FULL_TEXT_SEARCH_ATTACHMENTS': False,
|
||||||
'LOG_LEVEL': 'INFO',
|
'LOG_LEVEL': 'INFO',
|
||||||
'SESSION_KEY_BITS': 128,
|
'SESSION_KEY_BITS': 128,
|
||||||
'SESSION_TIMEOUT': 3600,
|
'SESSION_TIMEOUT': 3600,
|
||||||
|
@@ -78,6 +78,7 @@ ENV \
|
|||||||
\
|
\
|
||||||
ADMIN_ADDRESS="127.0.0.1" \
|
ADMIN_ADDRESS="127.0.0.1" \
|
||||||
FRONT_ADDRESS="127.0.0.1" \
|
FRONT_ADDRESS="127.0.0.1" \
|
||||||
|
FTS_ATTACHMENTS_ADDRESS="127.0.0.1" \
|
||||||
SMTP_ADDRESS="127.0.0.1" \
|
SMTP_ADDRESS="127.0.0.1" \
|
||||||
IMAP_ADDRESS="127.0.0.1" \
|
IMAP_ADDRESS="127.0.0.1" \
|
||||||
REDIS_ADDRESS="127.0.0.1" \
|
REDIS_ADDRESS="127.0.0.1" \
|
||||||
|
@@ -81,6 +81,7 @@ ENV \
|
|||||||
PATH="/app/venv/bin:${PATH}" \
|
PATH="/app/venv/bin:${PATH}" \
|
||||||
ADMIN_ADDRESS="admin" \
|
ADMIN_ADDRESS="admin" \
|
||||||
FRONT_ADDRESS="front" \
|
FRONT_ADDRESS="front" \
|
||||||
|
FTS_ATTACHMENTS_ADDRESS="tika" \
|
||||||
SMTP_ADDRESS="smtp" \
|
SMTP_ADDRESS="smtp" \
|
||||||
IMAP_ADDRESS="imap" \
|
IMAP_ADDRESS="imap" \
|
||||||
OLETOOLS_ADDRESS="oletools" \
|
OLETOOLS_ADDRESS="oletools" \
|
||||||
|
@@ -7,8 +7,8 @@ ARG VERSION
|
|||||||
LABEL version=$VERSION
|
LABEL version=$VERSION
|
||||||
|
|
||||||
RUN set -euxo pipefail \
|
RUN set -euxo pipefail \
|
||||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/main 'dovecot<2.4' dovecot-lmtpd dovecot-pigeonhole-plugin dovecot-pop3d dovecot-submissiond poppler-utils \
|
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/main 'dovecot<2.4' dovecot-lmtpd dovecot-pigeonhole-plugin dovecot-pop3d dovecot-submissiond \
|
||||||
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing dovecot-fts-flatcurve catdoc \
|
; apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing dovecot-fts-flatcurve \
|
||||||
; apk add --no-cache rspamd-client \
|
; apk add --no-cache rspamd-client \
|
||||||
; mkdir /var/lib/dovecot
|
; mkdir /var/lib/dovecot
|
||||||
|
|
||||||
|
@@ -63,11 +63,14 @@ plugin {
|
|||||||
fts_autoindex = yes
|
fts_autoindex = yes
|
||||||
fts_enforced = yes
|
fts_enforced = yes
|
||||||
fts_autoindex_exclude = \Trash
|
fts_autoindex_exclude = \Trash
|
||||||
|
fts_autoindex_exclude1 = \Junk
|
||||||
fts_filters = normalizer-icu stopwords
|
fts_filters = normalizer-icu stopwords
|
||||||
fts_filters_en = lowercase english-possessive stopwords
|
fts_filters_en = lowercase english-possessive stopwords
|
||||||
fts_filters_fr = lowercase contractions stopwords
|
fts_filters_fr = lowercase contractions stopwords
|
||||||
|
fts_header_excludes = Received DKIM-* ARC-* X-* x-* Comments Delivered-To Return-Path Authentication-Results Message-ID References In-Reply-To Thread-* Accept-Language Content-* MIME-Version
|
||||||
fts_decoder = decode2text
|
{% if FULL_TEXT_SEARCH_ATTACHMENTS %}
|
||||||
|
fts_tika = http://{{ FTS_ATTACHMENTS_ADDRESS }}:9998/tika/
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if COMPRESSION in [ 'gz', 'bz2', 'lz4', 'zstd' ] %}
|
{% if COMPRESSION in [ 'gz', 'bz2', 'lz4', 'zstd' ] %}
|
||||||
@@ -79,16 +82,6 @@ plugin {
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
}
|
}
|
||||||
|
|
||||||
{% if FULL_TEXT_SEARCH %}
|
|
||||||
service decode2text {
|
|
||||||
executable = script /usr/libexec/dovecot/decode2text.sh
|
|
||||||
user = nobody
|
|
||||||
unix_listener decode2text {
|
|
||||||
mode = 0666
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
###############
|
###############
|
||||||
# Authentication
|
# Authentication
|
||||||
###############
|
###############
|
||||||
|
@@ -98,8 +98,16 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- "{{ root }}/mail:/mail"
|
- "{{ root }}/mail:/mail"
|
||||||
- "{{ root }}/overrides/dovecot:/overrides:ro"
|
- "{{ root }}/overrides/dovecot:/overrides:ro"
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
{% if tika_enabled %}
|
||||||
|
- fts_attachments
|
||||||
|
{% endif %}
|
||||||
depends_on:
|
depends_on:
|
||||||
- front
|
- front
|
||||||
|
{% if tika_enabled %}
|
||||||
|
- fts_attachments
|
||||||
|
{% endif %}
|
||||||
{% if resolver_enabled %}
|
{% if resolver_enabled %}
|
||||||
- resolver
|
- resolver
|
||||||
dns:
|
dns:
|
||||||
@@ -140,6 +148,21 @@ services:
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tika_enabled %}
|
||||||
|
fts_attachments:
|
||||||
|
image: apache/tika:2.9.0.0-full
|
||||||
|
hostname: tika
|
||||||
|
restart: always
|
||||||
|
networks:
|
||||||
|
- fts_attachments
|
||||||
|
depends_on:
|
||||||
|
{% if resolver_enabled %}
|
||||||
|
- resolver
|
||||||
|
dns:
|
||||||
|
- {{ dns }}
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
antispam:
|
antispam:
|
||||||
image: ${DOCKER_ORG:-ghcr.io/mailu}/${DOCKER_PREFIX:-}rspamd:${MAILU_VERSION:-{{ version }}}
|
image: ${DOCKER_ORG:-ghcr.io/mailu}/${DOCKER_PREFIX:-}rspamd:${MAILU_VERSION:-{{ version }}}
|
||||||
hostname: antispam
|
hostname: antispam
|
||||||
@@ -257,3 +280,8 @@ networks:
|
|||||||
driver: bridge
|
driver: bridge
|
||||||
internal: true
|
internal: true
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if tika_enabled %}
|
||||||
|
fts_attachments:
|
||||||
|
driver: bridge
|
||||||
|
internal: true
|
||||||
|
{% endif %}
|
||||||
|
@@ -113,7 +113,7 @@ COMPRESSION_LEVEL={{ compression_level }}
|
|||||||
# IMAP full-text search is enabled by default.
|
# IMAP full-text search is enabled by default.
|
||||||
# Set the following variable to off in order to disable the feature
|
# Set the following variable to off in order to disable the feature
|
||||||
# or a comma separated list of language codes to support
|
# or a comma separated list of language codes to support
|
||||||
# FULL_TEXT_SEARCH=off
|
FULL_TEXT_SEARCH=en
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# Web settings
|
# Web settings
|
||||||
@@ -188,3 +188,5 @@ DEFAULT_SPAM_THRESHOLD=80
|
|||||||
# This is a mandatory setting for using the RESTful API.
|
# This is a mandatory setting for using the RESTful API.
|
||||||
API_TOKEN={{ api_token }}
|
API_TOKEN={{ api_token }}
|
||||||
|
|
||||||
|
# Whether tika should be enabled (scan/OCR email attachements)
|
||||||
|
FULL_TEXT_SEARCH_ATTACHMENTS={{ tika_enabled }}
|
||||||
|
@@ -64,6 +64,15 @@ the security implications caused by such an increase of attack surface.<p>
|
|||||||
<i>Oletools scans documents in email attachements for malicious macros. It has a much lower memory footprint than a full-fledged anti-virus.</i>
|
<i>Oletools scans documents in email attachements for malicious macros. It has a much lower memory footprint than a full-fledged anti-virus.</i>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="form-check form-check-inline">
|
||||||
|
<label class="form-check-label">
|
||||||
|
<input class="form-check-input" type="checkbox" name="tika_enabled" value="true">
|
||||||
|
Enable Tika
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<i>Tika scans documents in email attachments, process (OCR, keyword extraction) and then index them in a way they can be efficiently searched. This requires significant ressources (RAM, CPU and storage).</i>
|
||||||
|
</div>
|
||||||
|
|
||||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{ url_for('static', filename='render.js') }}"></script>
|
<script type="text/javascript" src="{{ url_for('static', filename='render.js') }}"></script>
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
- Switch from fts-xapian to fts-flatcurve. This should address the problem with indexes getting too big and will be the default in dovecot 2.4
|
- Switch from fts-xapian to fts-flatcurve. This should address the problem with indexes getting too big and will be the default in dovecot 2.4
|
||||||
- Enable full-text search of email attachments
|
- Enable full-text search of email attachments if configured (via Tika: you'll need to re-run setup)
|
||||||
|
|
||||||
If you would like more than english to be supported, please ensure you update your FULL_TEXT_SEARCH configuration variable.
|
If you would like more than english to be supported, please ensure you update your FULL_TEXT_SEARCH configuration variable.
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user