mirror of
https://github.com/vimagick/dockerfiles.git
synced 2025-05-19 22:23:22 +02:00
update airflow
This commit is contained in:
parent
afe70aaf74
commit
bdaea151ce
@ -9,9 +9,6 @@ ENV AIRFLOW_EXTRAS=async,all_dbs,celery,crypto,devel_hadoop,jdbc,ldap,password,r
|
|||||||
ENV AIRFLOW_HOME=/opt/airflow
|
ENV AIRFLOW_HOME=/opt/airflow
|
||||||
ENV AIRFLOW_CONFIG=/opt/airflow/airflow.cfg
|
ENV AIRFLOW_CONFIG=/opt/airflow/airflow.cfg
|
||||||
|
|
||||||
ARG FERNET_KEY=4XHGZH0dZ40iOv6z5cyfrXVg5qg3s_d06A7BFfbSsqA=
|
|
||||||
ENV FERNET_KEY=${FERNET_KEY}
|
|
||||||
|
|
||||||
RUN set -xe \
|
RUN set -xe \
|
||||||
&& apk add --no-cache \
|
&& apk add --no-cache \
|
||||||
build-base \
|
build-base \
|
||||||
@ -23,7 +20,7 @@ RUN set -xe \
|
|||||||
mariadb-dev \
|
mariadb-dev \
|
||||||
postgresql-dev \
|
postgresql-dev \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
&& pip install cython numpy \
|
&& pip install cython gunicorn numpy psycopg2-binary \
|
||||||
&& pip install apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION} \
|
&& pip install apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION} \
|
||||||
&& pip install "websocket-client<0.55.0,>=0.35" \
|
&& pip install "websocket-client<0.55.0,>=0.35" \
|
||||||
&& apk del \
|
&& apk del \
|
||||||
|
@ -22,9 +22,6 @@ airflow
|
|||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ python -c 'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'
|
|
||||||
4XHGZH0dZ40iOv6z5cyfrXVg5qg3s_d06A7BFfbSsqA=
|
|
||||||
|
|
||||||
$ docker stack deploy -c docker-stack.yaml airflow
|
$ docker stack deploy -c docker-stack.yaml airflow
|
||||||
$ docker service update --replicas-max-per-node=1 airflow_worker
|
$ docker service update --replicas-max-per-node=1 airflow_worker
|
||||||
$ docker service update --replicas 3 airflow_worker
|
$ docker service update --replicas 3 airflow_worker
|
||||||
@ -33,5 +30,11 @@ $ curl http://localhost:8080/
|
|||||||
$ curl http://localhost:5555/
|
$ curl http://localhost:5555/
|
||||||
```
|
```
|
||||||
|
|
||||||
> :warning: This docker image was built with a static `FERNET_KEY` environment variable.
|
> :warning: You need to prepare nfs server with `airflow.cfg`.
|
||||||
> You should set another value to it in `docker-stack.yaml`.
|
|
||||||
|
```
|
||||||
|
$ python -c 'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'
|
||||||
|
CD2wL7G0zt1SLuO4JQpLJuHtBaBEcXWKbQyvkvf2cZ8=
|
||||||
|
```
|
||||||
|
|
||||||
|
> :warning: You should set another value to `fernet_key` in `airflow.cfg` to improve security.
|
||||||
|
@ -1,43 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing,
|
|
||||||
# software distributed under the License is distributed on an
|
|
||||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
# KIND, either express or implied. See the License for the
|
|
||||||
# specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# This is the template for Airflow's default configuration. When Airflow is
|
|
||||||
# imported, it looks for a configuration file at $AIRFLOW_HOME/airflow.cfg. If
|
|
||||||
# it doesn't exist, Airflow uses this template to generate it by replacing
|
|
||||||
# variables in curly braces with their global values from configuration.py.
|
|
||||||
|
|
||||||
# Users should not modify this file; they should customize the generated
|
|
||||||
# airflow.cfg instead.
|
|
||||||
|
|
||||||
|
|
||||||
# ----------------------- TEMPLATE BEGINS HERE -----------------------
|
|
||||||
|
|
||||||
[core]
|
[core]
|
||||||
# The folder where your airflow pipelines live, most likely a
|
# The folder where your airflow pipelines live, most likely a
|
||||||
# subfolder in a code repository
|
# subfolder in a code repository
|
||||||
# This path must be absolute
|
# This path must be absolute
|
||||||
dags_folder = {AIRFLOW_HOME}/dags
|
dags_folder = /opt/airflow/dags
|
||||||
|
|
||||||
# The folder where airflow should store its log files
|
# The folder where airflow should store its log files
|
||||||
# This path must be absolute
|
# This path must be absolute
|
||||||
base_log_folder = {AIRFLOW_HOME}/logs
|
base_log_folder = /opt/airflow/logs
|
||||||
|
|
||||||
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
|
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
|
||||||
# Users must supply an Airflow connection id that provides access to the storage
|
# Users must supply an Airflow connection id that provides access to the storage
|
||||||
@ -61,20 +30,16 @@ logging_config_class =
|
|||||||
# Log format
|
# Log format
|
||||||
# Colour the logs when the controlling terminal is a TTY.
|
# Colour the logs when the controlling terminal is a TTY.
|
||||||
colored_console_log = True
|
colored_console_log = True
|
||||||
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
|
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
|
||||||
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
|
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
|
||||||
|
|
||||||
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
|
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
|
||||||
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
|
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
|
||||||
|
|
||||||
# Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter
|
|
||||||
# task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{try_number}}
|
|
||||||
task_log_prefix_template =
|
|
||||||
|
|
||||||
# Log filename format
|
# Log filename format
|
||||||
log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log
|
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
|
||||||
log_processor_filename_template = {{{{ filename }}}}.log
|
log_processor_filename_template = {{ filename }}.log
|
||||||
dag_processor_manager_log_location = {AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log
|
dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log
|
||||||
|
|
||||||
# Hostname by providing a path to a callable, which will resolve the hostname
|
# Hostname by providing a path to a callable, which will resolve the hostname
|
||||||
# The format is "package:function". For example,
|
# The format is "package:function". For example,
|
||||||
@ -94,7 +59,7 @@ executor = CeleryExecutor
|
|||||||
# The SqlAlchemy connection string to the metadata database.
|
# The SqlAlchemy connection string to the metadata database.
|
||||||
# SqlAlchemy supports many different database engine, more information
|
# SqlAlchemy supports many different database engine, more information
|
||||||
# their website
|
# their website
|
||||||
sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
|
sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postges:5432/airflow
|
||||||
|
|
||||||
# The encoding for the databases
|
# The encoding for the databases
|
||||||
sql_engine_encoding = utf-8
|
sql_engine_encoding = utf-8
|
||||||
@ -122,10 +87,9 @@ sql_alchemy_max_overflow = 10
|
|||||||
# a lower config value will allow the system to recover faster.
|
# a lower config value will allow the system to recover faster.
|
||||||
sql_alchemy_pool_recycle = 1800
|
sql_alchemy_pool_recycle = 1800
|
||||||
|
|
||||||
# Check connection at the start of each connection pool checkout.
|
# How many seconds to retry re-establishing a DB connection after
|
||||||
# Typically, this is a simple statement like “SELECT 1”.
|
# disconnects. Setting this to 0 disables retries.
|
||||||
# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
|
sql_alchemy_reconnect_timeout = 300
|
||||||
sql_alchemy_pool_pre_ping = True
|
|
||||||
|
|
||||||
# The schema to use for the metadata database
|
# The schema to use for the metadata database
|
||||||
# SqlAlchemy supports databases with the concept of multiple schemas.
|
# SqlAlchemy supports databases with the concept of multiple schemas.
|
||||||
@ -151,20 +115,17 @@ max_active_runs_per_dag = 16
|
|||||||
load_examples = False
|
load_examples = False
|
||||||
|
|
||||||
# Where your Airflow plugins are stored
|
# Where your Airflow plugins are stored
|
||||||
plugins_folder = {AIRFLOW_HOME}/plugins
|
plugins_folder = /opt/airflow/plugins
|
||||||
|
|
||||||
# Secret key to save connection passwords in the db
|
# Secret key to save connection passwords in the db
|
||||||
fernet_key = {FERNET_KEY}
|
fernet_key = CD2wL7G0zt1SLuO4JQpLJuHtBaBEcXWKbQyvkvf2cZ8=
|
||||||
|
|
||||||
# Whether to disable pickling dags
|
# Whether to disable pickling dags
|
||||||
donot_pickle = True
|
donot_pickle = False
|
||||||
|
|
||||||
# How long before timing out a python file import
|
# How long before timing out a python file import while filling the DagBag
|
||||||
dagbag_import_timeout = 30
|
dagbag_import_timeout = 30
|
||||||
|
|
||||||
# How long before timing out a DagFileProcessor, which processes a dag file
|
|
||||||
dag_file_processor_timeout = 50
|
|
||||||
|
|
||||||
# The class to use for running task instances in a subprocess
|
# The class to use for running task instances in a subprocess
|
||||||
task_runner = StandardTaskRunner
|
task_runner = StandardTaskRunner
|
||||||
|
|
||||||
@ -195,8 +156,8 @@ enable_xcom_pickling = True
|
|||||||
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
||||||
killed_task_cleanup_time = 60
|
killed_task_cleanup_time = 60
|
||||||
|
|
||||||
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow dags backfill -c` or
|
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
|
||||||
# `airflow dags trigger -c`, the key-value pairs will override the existing ones in params.
|
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
|
||||||
dag_run_conf_overrides_params = False
|
dag_run_conf_overrides_params = False
|
||||||
|
|
||||||
# Worker initialisation check to validate Metadata Database connection
|
# Worker initialisation check to validate Metadata Database connection
|
||||||
@ -205,9 +166,6 @@ worker_precheck = False
|
|||||||
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
|
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
|
||||||
dag_discovery_safe_mode = True
|
dag_discovery_safe_mode = True
|
||||||
|
|
||||||
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
|
|
||||||
default_task_retries = 0
|
|
||||||
|
|
||||||
|
|
||||||
[cli]
|
[cli]
|
||||||
# In what way should the cli access the API. The LocalClient will use the
|
# In what way should the cli access the API. The LocalClient will use the
|
||||||
@ -247,9 +205,6 @@ default_gpus = 0
|
|||||||
[hive]
|
[hive]
|
||||||
# Default mapreduce queue for HiveOperator tasks
|
# Default mapreduce queue for HiveOperator tasks
|
||||||
default_hive_mapred_queue =
|
default_hive_mapred_queue =
|
||||||
# Template for mapred_job_name in HiveOperator, supports the following named parameters:
|
|
||||||
# hostname, dag_id, task_id, execution_date
|
|
||||||
mapred_job_name_template = Airflow HiveOperator task for {{hostname}}.{{dag_id}}.{{task_id}}.{{execution_date}}
|
|
||||||
|
|
||||||
[webserver]
|
[webserver]
|
||||||
# The base url of your website as airflow cannot guess what domain or
|
# The base url of your website as airflow cannot guess what domain or
|
||||||
@ -283,8 +238,7 @@ worker_refresh_batch_size = 1
|
|||||||
worker_refresh_interval = 30
|
worker_refresh_interval = 30
|
||||||
|
|
||||||
# Secret key used to run your flask app
|
# Secret key used to run your flask app
|
||||||
# It should be as random as possible
|
secret_key = temporary_key
|
||||||
secret_key = {SECRET_KEY}
|
|
||||||
|
|
||||||
# Number of workers to run the Gunicorn web server
|
# Number of workers to run the Gunicorn web server
|
||||||
workers = 4
|
workers = 4
|
||||||
@ -298,8 +252,25 @@ access_logfile = -
|
|||||||
error_logfile = -
|
error_logfile = -
|
||||||
|
|
||||||
# Expose the configuration file in the web server
|
# Expose the configuration file in the web server
|
||||||
|
# This is only applicable for the flask-admin based web UI (non FAB-based).
|
||||||
|
# In the FAB-based web UI with RBAC feature,
|
||||||
|
# access to configuration is controlled by role permissions.
|
||||||
expose_config = False
|
expose_config = False
|
||||||
|
|
||||||
|
# Set to true to turn on authentication:
|
||||||
|
# https://airflow.apache.org/security.html#web-authentication
|
||||||
|
authenticate = False
|
||||||
|
|
||||||
|
# Filter the list of dags by owner name (requires authentication to be enabled)
|
||||||
|
filter_by_owner = False
|
||||||
|
|
||||||
|
# Filtering mode. Choices include user (default) and ldapgroup.
|
||||||
|
# Ldap group filtering requires using the ldap backend
|
||||||
|
#
|
||||||
|
# Note that the ldap server needs the "memberOf" overlay to be set up
|
||||||
|
# in order to user the ldapgroup mode.
|
||||||
|
owner_mode = user
|
||||||
|
|
||||||
# Default DAG view. Valid values are:
|
# Default DAG view. Valid values are:
|
||||||
# tree, graph, duration, gantt, landing_times
|
# tree, graph, duration, gantt, landing_times
|
||||||
dag_default_view = tree
|
dag_default_view = tree
|
||||||
@ -323,6 +294,9 @@ hide_paused_dags_by_default = False
|
|||||||
# Consistent page size across all listing views in the UI
|
# Consistent page size across all listing views in the UI
|
||||||
page_size = 100
|
page_size = 100
|
||||||
|
|
||||||
|
# Use FAB-based webserver with RBAC feature
|
||||||
|
rbac = False
|
||||||
|
|
||||||
# Define the color of navigation bar
|
# Define the color of navigation bar
|
||||||
navbar_color = #007A87
|
navbar_color = #007A87
|
||||||
|
|
||||||
@ -362,10 +336,6 @@ smtp_ssl = False
|
|||||||
smtp_port = 25
|
smtp_port = 25
|
||||||
smtp_mail_from = airflow@example.com
|
smtp_mail_from = airflow@example.com
|
||||||
|
|
||||||
[sentry]
|
|
||||||
# Sentry (https://docs.sentry.io) integration
|
|
||||||
sentry_dsn =
|
|
||||||
|
|
||||||
|
|
||||||
[celery]
|
[celery]
|
||||||
# This section only applies if you are using the CeleryExecutor in
|
# This section only applies if you are using the CeleryExecutor in
|
||||||
@ -407,7 +377,7 @@ broker_url = redis://redis:6379/1
|
|||||||
# This status is used by the scheduler to update the state of the task
|
# This status is used by the scheduler to update the state of the task
|
||||||
# The use of a database is highly recommended
|
# The use of a database is highly recommended
|
||||||
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
|
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
|
||||||
result_backend = db+postgresql://airflow:airflow@postgres/airflow
|
result_backend = db+postgresql://airflow:airflow@postges/airflow
|
||||||
|
|
||||||
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
|
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
|
||||||
# it `airflow flower`. This defines the IP that Celery Flower runs on
|
# it `airflow flower`. This defines the IP that Celery Flower runs on
|
||||||
@ -487,12 +457,9 @@ job_heartbeat_sec = 5
|
|||||||
# how often the scheduler should run (in seconds).
|
# how often the scheduler should run (in seconds).
|
||||||
scheduler_heartbeat_sec = 5
|
scheduler_heartbeat_sec = 5
|
||||||
|
|
||||||
# The number of times to try to schedule each DAG file
|
# after how much time should the scheduler terminate in seconds
|
||||||
# -1 indicates unlimited number
|
# -1 indicates to run continuously (see also num_runs)
|
||||||
num_runs = -1
|
run_duration = -1
|
||||||
|
|
||||||
# The number of seconds to wait between consecutive DAG file processing
|
|
||||||
processor_poll_interval = 1
|
|
||||||
|
|
||||||
# after how much time (seconds) a new DAGs should be picked up from the filesystem
|
# after how much time (seconds) a new DAGs should be picked up from the filesystem
|
||||||
min_file_process_interval = 0
|
min_file_process_interval = 0
|
||||||
@ -508,7 +475,7 @@ print_stats_interval = 30
|
|||||||
# This is used by the health check in the "/health" endpoint
|
# This is used by the health check in the "/health" endpoint
|
||||||
scheduler_health_check_threshold = 30
|
scheduler_health_check_threshold = 30
|
||||||
|
|
||||||
child_process_log_directory = {AIRFLOW_HOME}/logs/scheduler
|
child_process_log_directory = /opt/airflow/logs/scheduler
|
||||||
|
|
||||||
# Local task jobs periodically heartbeat to the DB. If the job has
|
# Local task jobs periodically heartbeat to the DB. If the job has
|
||||||
# not heartbeat in this many seconds, the scheduler will mark the
|
# not heartbeat in this many seconds, the scheduler will mark the
|
||||||
@ -541,11 +508,6 @@ statsd_host = localhost
|
|||||||
statsd_port = 8125
|
statsd_port = 8125
|
||||||
statsd_prefix = airflow
|
statsd_prefix = airflow
|
||||||
|
|
||||||
# If you want to avoid send all the available metrics to StatsD,
|
|
||||||
# you can configure an allow list of prefixes to send only the metrics that
|
|
||||||
# start with the elements of the list (e.g: scheduler,executor,dagrun)
|
|
||||||
statsd_allow_list =
|
|
||||||
|
|
||||||
# The scheduler can run multiple threads in parallel to schedule dags.
|
# The scheduler can run multiple threads in parallel to schedule dags.
|
||||||
# This defines how many threads will run.
|
# This defines how many threads will run.
|
||||||
max_threads = 2
|
max_threads = 2
|
||||||
@ -574,6 +536,48 @@ search_scope = LEVEL
|
|||||||
# broken schema, or do not return a schema.
|
# broken schema, or do not return a schema.
|
||||||
ignore_malformed_schema = False
|
ignore_malformed_schema = False
|
||||||
|
|
||||||
|
[mesos]
|
||||||
|
# Mesos master address which MesosExecutor will connect to.
|
||||||
|
master = localhost:5050
|
||||||
|
|
||||||
|
# The framework name which Airflow scheduler will register itself as on mesos
|
||||||
|
framework_name = Airflow
|
||||||
|
|
||||||
|
# Number of cpu cores required for running one task instance using
|
||||||
|
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||||
|
# command on a mesos slave
|
||||||
|
task_cpu = 1
|
||||||
|
|
||||||
|
# Memory in MB required for running one task instance using
|
||||||
|
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||||
|
# command on a mesos slave
|
||||||
|
task_memory = 256
|
||||||
|
|
||||||
|
# Enable framework checkpointing for mesos
|
||||||
|
# See http://mesos.apache.org/documentation/latest/slave-recovery/
|
||||||
|
checkpoint = False
|
||||||
|
|
||||||
|
# Failover timeout in milliseconds.
|
||||||
|
# When checkpointing is enabled and this option is set, Mesos waits
|
||||||
|
# until the configured timeout for
|
||||||
|
# the MesosExecutor framework to re-register after a failover. Mesos
|
||||||
|
# shuts down running tasks if the
|
||||||
|
# MesosExecutor framework fails to re-register within this timeframe.
|
||||||
|
# failover_timeout = 604800
|
||||||
|
|
||||||
|
# Enable framework authentication for mesos
|
||||||
|
# See http://mesos.apache.org/documentation/latest/configuration/
|
||||||
|
authenticate = False
|
||||||
|
|
||||||
|
# Mesos credentials, if authentication is enabled
|
||||||
|
# default_principal = admin
|
||||||
|
# default_secret = admin
|
||||||
|
|
||||||
|
# Optional Docker Image to run on slave before running the command
|
||||||
|
# This image should be accessible from mesos slave i.e mesos slave
|
||||||
|
# should be able to pull this docker image before executing the command.
|
||||||
|
# docker_image_slave = puckel/docker-airflow
|
||||||
|
|
||||||
[kerberos]
|
[kerberos]
|
||||||
ccache = /tmp/airflow_krb5_ccache
|
ccache = /tmp/airflow_krb5_ccache
|
||||||
# gets augmented with fqdn
|
# gets augmented with fqdn
|
||||||
@ -594,7 +598,7 @@ hide_sensitive_variable_fields = True
|
|||||||
# Elasticsearch host
|
# Elasticsearch host
|
||||||
host =
|
host =
|
||||||
# Format of the log_id, which is used to query for a given tasks logs
|
# Format of the log_id, which is used to query for a given tasks logs
|
||||||
log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
|
log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
|
||||||
# Used to mark the end of a log stream for a task
|
# Used to mark the end of a log stream for a task
|
||||||
end_of_log_mark = end_of_log
|
end_of_log_mark = end_of_log
|
||||||
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
|
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
|
||||||
@ -674,7 +678,7 @@ git_password =
|
|||||||
git_sync_root = /git
|
git_sync_root = /git
|
||||||
git_sync_dest = repo
|
git_sync_dest = repo
|
||||||
# Mount point of the volume if git-sync is being used.
|
# Mount point of the volume if git-sync is being used.
|
||||||
# i.e. {AIRFLOW_HOME}/dags
|
# i.e. /opt/airflow/dags
|
||||||
git_dags_folder_mount_point =
|
git_dags_folder_mount_point =
|
||||||
|
|
||||||
# To get Git-sync SSH authentication set up follow this format
|
# To get Git-sync SSH authentication set up follow this format
|
||||||
@ -765,7 +769,9 @@ tolerations =
|
|||||||
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
|
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
|
||||||
# See:
|
# See:
|
||||||
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
|
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
|
||||||
kube_client_request_args =
|
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
|
||||||
|
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
|
||||||
|
kube_client_request_args = {"_request_timeout" : [60,60] }
|
||||||
|
|
||||||
# Worker pods security context options
|
# Worker pods security context options
|
||||||
# See:
|
# See:
|
||||||
@ -779,17 +785,15 @@ run_as_user =
|
|||||||
# that allows for the key to be read, e.g. 65533
|
# that allows for the key to be read, e.g. 65533
|
||||||
fs_group =
|
fs_group =
|
||||||
|
|
||||||
# Annotations configuration as a single line formatted JSON object.
|
|
||||||
# See the naming convention in:
|
|
||||||
# https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
|
||||||
worker_annotations =
|
|
||||||
|
|
||||||
|
|
||||||
[kubernetes_node_selectors]
|
[kubernetes_node_selectors]
|
||||||
# The Key-value pairs to be given to worker pods.
|
# The Key-value pairs to be given to worker pods.
|
||||||
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
|
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
|
||||||
# Should be supplied in the format: key = value
|
# Should be supplied in the format: key = value
|
||||||
|
|
||||||
|
[kubernetes_annotations]
|
||||||
|
# The Key-value annotations pairs to be given to worker pods.
|
||||||
|
# Should be supplied in the format: key = value
|
||||||
|
|
||||||
[kubernetes_environment_variables]
|
[kubernetes_environment_variables]
|
||||||
# The scheduler sets the following environment variables into your workers. You may define as
|
# The scheduler sets the following environment variables into your workers. You may define as
|
||||||
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
|
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
|
||||||
|
@ -67,10 +67,6 @@ colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatte
|
|||||||
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
|
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
|
||||||
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
|
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
|
||||||
|
|
||||||
# Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter
|
|
||||||
# task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{try_number}}
|
|
||||||
task_log_prefix_template =
|
|
||||||
|
|
||||||
# Log filename format
|
# Log filename format
|
||||||
log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log
|
log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log
|
||||||
log_processor_filename_template = {{{{ filename }}}}.log
|
log_processor_filename_template = {{{{ filename }}}}.log
|
||||||
@ -122,10 +118,9 @@ sql_alchemy_max_overflow = 10
|
|||||||
# a lower config value will allow the system to recover faster.
|
# a lower config value will allow the system to recover faster.
|
||||||
sql_alchemy_pool_recycle = 1800
|
sql_alchemy_pool_recycle = 1800
|
||||||
|
|
||||||
# Check connection at the start of each connection pool checkout.
|
# How many seconds to retry re-establishing a DB connection after
|
||||||
# Typically, this is a simple statement like “SELECT 1”.
|
# disconnects. Setting this to 0 disables retries.
|
||||||
# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
|
sql_alchemy_reconnect_timeout = 300
|
||||||
sql_alchemy_pool_pre_ping = True
|
|
||||||
|
|
||||||
# The schema to use for the metadata database
|
# The schema to use for the metadata database
|
||||||
# SqlAlchemy supports databases with the concept of multiple schemas.
|
# SqlAlchemy supports databases with the concept of multiple schemas.
|
||||||
@ -157,14 +152,11 @@ plugins_folder = {AIRFLOW_HOME}/plugins
|
|||||||
fernet_key = {FERNET_KEY}
|
fernet_key = {FERNET_KEY}
|
||||||
|
|
||||||
# Whether to disable pickling dags
|
# Whether to disable pickling dags
|
||||||
donot_pickle = True
|
donot_pickle = False
|
||||||
|
|
||||||
# How long before timing out a python file import
|
# How long before timing out a python file import while filling the DagBag
|
||||||
dagbag_import_timeout = 30
|
dagbag_import_timeout = 30
|
||||||
|
|
||||||
# How long before timing out a DagFileProcessor, which processes a dag file
|
|
||||||
dag_file_processor_timeout = 50
|
|
||||||
|
|
||||||
# The class to use for running task instances in a subprocess
|
# The class to use for running task instances in a subprocess
|
||||||
task_runner = StandardTaskRunner
|
task_runner = StandardTaskRunner
|
||||||
|
|
||||||
@ -195,8 +187,8 @@ enable_xcom_pickling = True
|
|||||||
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
||||||
killed_task_cleanup_time = 60
|
killed_task_cleanup_time = 60
|
||||||
|
|
||||||
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow dags backfill -c` or
|
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
|
||||||
# `airflow dags trigger -c`, the key-value pairs will override the existing ones in params.
|
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
|
||||||
dag_run_conf_overrides_params = False
|
dag_run_conf_overrides_params = False
|
||||||
|
|
||||||
# Worker initialisation check to validate Metadata Database connection
|
# Worker initialisation check to validate Metadata Database connection
|
||||||
@ -205,9 +197,6 @@ worker_precheck = False
|
|||||||
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
|
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
|
||||||
dag_discovery_safe_mode = True
|
dag_discovery_safe_mode = True
|
||||||
|
|
||||||
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
|
|
||||||
default_task_retries = 0
|
|
||||||
|
|
||||||
|
|
||||||
[cli]
|
[cli]
|
||||||
# In what way should the cli access the API. The LocalClient will use the
|
# In what way should the cli access the API. The LocalClient will use the
|
||||||
@ -247,9 +236,6 @@ default_gpus = 0
|
|||||||
[hive]
|
[hive]
|
||||||
# Default mapreduce queue for HiveOperator tasks
|
# Default mapreduce queue for HiveOperator tasks
|
||||||
default_hive_mapred_queue =
|
default_hive_mapred_queue =
|
||||||
# Template for mapred_job_name in HiveOperator, supports the following named parameters:
|
|
||||||
# hostname, dag_id, task_id, execution_date
|
|
||||||
mapred_job_name_template = Airflow HiveOperator task for {{hostname}}.{{dag_id}}.{{task_id}}.{{execution_date}}
|
|
||||||
|
|
||||||
[webserver]
|
[webserver]
|
||||||
# The base url of your website as airflow cannot guess what domain or
|
# The base url of your website as airflow cannot guess what domain or
|
||||||
@ -283,8 +269,7 @@ worker_refresh_batch_size = 1
|
|||||||
worker_refresh_interval = 30
|
worker_refresh_interval = 30
|
||||||
|
|
||||||
# Secret key used to run your flask app
|
# Secret key used to run your flask app
|
||||||
# It should be as random as possible
|
secret_key = temporary_key
|
||||||
secret_key = {SECRET_KEY}
|
|
||||||
|
|
||||||
# Number of workers to run the Gunicorn web server
|
# Number of workers to run the Gunicorn web server
|
||||||
workers = 4
|
workers = 4
|
||||||
@ -298,8 +283,25 @@ access_logfile = -
|
|||||||
error_logfile = -
|
error_logfile = -
|
||||||
|
|
||||||
# Expose the configuration file in the web server
|
# Expose the configuration file in the web server
|
||||||
|
# This is only applicable for the flask-admin based web UI (non FAB-based).
|
||||||
|
# In the FAB-based web UI with RBAC feature,
|
||||||
|
# access to configuration is controlled by role permissions.
|
||||||
expose_config = False
|
expose_config = False
|
||||||
|
|
||||||
|
# Set to true to turn on authentication:
|
||||||
|
# https://airflow.apache.org/security.html#web-authentication
|
||||||
|
authenticate = False
|
||||||
|
|
||||||
|
# Filter the list of dags by owner name (requires authentication to be enabled)
|
||||||
|
filter_by_owner = False
|
||||||
|
|
||||||
|
# Filtering mode. Choices include user (default) and ldapgroup.
|
||||||
|
# Ldap group filtering requires using the ldap backend
|
||||||
|
#
|
||||||
|
# Note that the ldap server needs the "memberOf" overlay to be set up
|
||||||
|
# in order to user the ldapgroup mode.
|
||||||
|
owner_mode = user
|
||||||
|
|
||||||
# Default DAG view. Valid values are:
|
# Default DAG view. Valid values are:
|
||||||
# tree, graph, duration, gantt, landing_times
|
# tree, graph, duration, gantt, landing_times
|
||||||
dag_default_view = tree
|
dag_default_view = tree
|
||||||
@ -323,6 +325,9 @@ hide_paused_dags_by_default = False
|
|||||||
# Consistent page size across all listing views in the UI
|
# Consistent page size across all listing views in the UI
|
||||||
page_size = 100
|
page_size = 100
|
||||||
|
|
||||||
|
# Use FAB-based webserver with RBAC feature
|
||||||
|
rbac = False
|
||||||
|
|
||||||
# Define the color of navigation bar
|
# Define the color of navigation bar
|
||||||
navbar_color = #007A87
|
navbar_color = #007A87
|
||||||
|
|
||||||
@ -362,10 +367,6 @@ smtp_ssl = False
|
|||||||
smtp_port = 25
|
smtp_port = 25
|
||||||
smtp_mail_from = airflow@example.com
|
smtp_mail_from = airflow@example.com
|
||||||
|
|
||||||
[sentry]
|
|
||||||
# Sentry (https://docs.sentry.io) integration
|
|
||||||
sentry_dsn =
|
|
||||||
|
|
||||||
|
|
||||||
[celery]
|
[celery]
|
||||||
# This section only applies if you are using the CeleryExecutor in
|
# This section only applies if you are using the CeleryExecutor in
|
||||||
@ -487,12 +488,9 @@ job_heartbeat_sec = 5
|
|||||||
# how often the scheduler should run (in seconds).
|
# how often the scheduler should run (in seconds).
|
||||||
scheduler_heartbeat_sec = 5
|
scheduler_heartbeat_sec = 5
|
||||||
|
|
||||||
# The number of times to try to schedule each DAG file
|
# after how much time should the scheduler terminate in seconds
|
||||||
# -1 indicates unlimited number
|
# -1 indicates to run continuously (see also num_runs)
|
||||||
num_runs = -1
|
run_duration = -1
|
||||||
|
|
||||||
# The number of seconds to wait between consecutive DAG file processing
|
|
||||||
processor_poll_interval = 1
|
|
||||||
|
|
||||||
# after how much time (seconds) a new DAGs should be picked up from the filesystem
|
# after how much time (seconds) a new DAGs should be picked up from the filesystem
|
||||||
min_file_process_interval = 0
|
min_file_process_interval = 0
|
||||||
@ -541,11 +539,6 @@ statsd_host = localhost
|
|||||||
statsd_port = 8125
|
statsd_port = 8125
|
||||||
statsd_prefix = airflow
|
statsd_prefix = airflow
|
||||||
|
|
||||||
# If you want to avoid send all the available metrics to StatsD,
|
|
||||||
# you can configure an allow list of prefixes to send only the metrics that
|
|
||||||
# start with the elements of the list (e.g: scheduler,executor,dagrun)
|
|
||||||
statsd_allow_list =
|
|
||||||
|
|
||||||
# The scheduler can run multiple threads in parallel to schedule dags.
|
# The scheduler can run multiple threads in parallel to schedule dags.
|
||||||
# This defines how many threads will run.
|
# This defines how many threads will run.
|
||||||
max_threads = 2
|
max_threads = 2
|
||||||
@ -574,6 +567,48 @@ search_scope = LEVEL
|
|||||||
# broken schema, or do not return a schema.
|
# broken schema, or do not return a schema.
|
||||||
ignore_malformed_schema = False
|
ignore_malformed_schema = False
|
||||||
|
|
||||||
|
[mesos]
|
||||||
|
# Mesos master address which MesosExecutor will connect to.
|
||||||
|
master = localhost:5050
|
||||||
|
|
||||||
|
# The framework name which Airflow scheduler will register itself as on mesos
|
||||||
|
framework_name = Airflow
|
||||||
|
|
||||||
|
# Number of cpu cores required for running one task instance using
|
||||||
|
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||||
|
# command on a mesos slave
|
||||||
|
task_cpu = 1
|
||||||
|
|
||||||
|
# Memory in MB required for running one task instance using
|
||||||
|
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||||
|
# command on a mesos slave
|
||||||
|
task_memory = 256
|
||||||
|
|
||||||
|
# Enable framework checkpointing for mesos
|
||||||
|
# See http://mesos.apache.org/documentation/latest/slave-recovery/
|
||||||
|
checkpoint = False
|
||||||
|
|
||||||
|
# Failover timeout in milliseconds.
|
||||||
|
# When checkpointing is enabled and this option is set, Mesos waits
|
||||||
|
# until the configured timeout for
|
||||||
|
# the MesosExecutor framework to re-register after a failover. Mesos
|
||||||
|
# shuts down running tasks if the
|
||||||
|
# MesosExecutor framework fails to re-register within this timeframe.
|
||||||
|
# failover_timeout = 604800
|
||||||
|
|
||||||
|
# Enable framework authentication for mesos
|
||||||
|
# See http://mesos.apache.org/documentation/latest/configuration/
|
||||||
|
authenticate = False
|
||||||
|
|
||||||
|
# Mesos credentials, if authentication is enabled
|
||||||
|
# default_principal = admin
|
||||||
|
# default_secret = admin
|
||||||
|
|
||||||
|
# Optional Docker Image to run on slave before running the command
|
||||||
|
# This image should be accessible from mesos slave i.e mesos slave
|
||||||
|
# should be able to pull this docker image before executing the command.
|
||||||
|
# docker_image_slave = puckel/docker-airflow
|
||||||
|
|
||||||
[kerberos]
|
[kerberos]
|
||||||
ccache = /tmp/airflow_krb5_ccache
|
ccache = /tmp/airflow_krb5_ccache
|
||||||
# gets augmented with fqdn
|
# gets augmented with fqdn
|
||||||
@ -765,7 +800,9 @@ tolerations =
|
|||||||
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
|
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
|
||||||
# See:
|
# See:
|
||||||
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
|
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
|
||||||
kube_client_request_args =
|
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
|
||||||
|
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
|
||||||
|
kube_client_request_args = {{"_request_timeout" : [60,60] }}
|
||||||
|
|
||||||
# Worker pods security context options
|
# Worker pods security context options
|
||||||
# See:
|
# See:
|
||||||
@ -779,17 +816,15 @@ run_as_user =
|
|||||||
# that allows for the key to be read, e.g. 65533
|
# that allows for the key to be read, e.g. 65533
|
||||||
fs_group =
|
fs_group =
|
||||||
|
|
||||||
# Annotations configuration as a single line formatted JSON object.
|
|
||||||
# See the naming convention in:
|
|
||||||
# https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
|
||||||
worker_annotations =
|
|
||||||
|
|
||||||
|
|
||||||
[kubernetes_node_selectors]
|
[kubernetes_node_selectors]
|
||||||
# The Key-value pairs to be given to worker pods.
|
# The Key-value pairs to be given to worker pods.
|
||||||
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
|
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
|
||||||
# Should be supplied in the format: key = value
|
# Should be supplied in the format: key = value
|
||||||
|
|
||||||
|
[kubernetes_annotations]
|
||||||
|
# The Key-value annotations pairs to be given to worker pods.
|
||||||
|
# Should be supplied in the format: key = value
|
||||||
|
|
||||||
[kubernetes_environment_variables]
|
[kubernetes_environment_variables]
|
||||||
# The scheduler sets the following environment variables into your workers. You may define as
|
# The scheduler sets the following environment variables into your workers. You may define as
|
||||||
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
|
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user