mirror of https://github.com/vimagick/dockerfiles.git synced 2025-03-23 21:19:24 +02:00

Merge branch 'master' into update-mantis

This commit is contained in:
Kevin He 2019-10-27 07:42:03 +08:00 committed by GitHub
commit 23bb7b9e7d
No known key found for this signature in database
381 changed files with 19425 additions and 2034 deletions

View File

@ -21,7 +21,6 @@ A collection of delicious docker recipes.
- [ ] caddy
- [ ] dsniff
- [ ] ettercap
- [ ] freegeoip
- [ ] freelan
- [ ] gitbook
- [ ] gitolite
@ -32,8 +31,6 @@ A collection of delicious docker recipes.
- [ ] libreswan
- [ ] mitmproxy
- [ ] nagios
- [ ] nfs
- [ ] openldap
- [ ] openswan
- [ ] postfix
- [ ] pritunl
@ -46,8 +43,12 @@ A collection of delicious docker recipes.
## Big Data
- [x] airflow
- [x] ambari
- [x] kafka-arm
- [x] kafka-manager
- [x] prestodb (official)
- [x] prestosql (community)
- [x] superset-arm
- [x] zookeeper-arm
@ -70,11 +71,13 @@ A collection of delicious docker recipes.
## Daemon
- [x] alpine-arm :+1:
- [x] apacheds
- [x] aria2 :+1:
- [x] audiowaveform
- [x] cadvisor
- [x] casperjs :+1:
- [x] collectd
- [x] freegeoip
- [x] freeradius
- [x] frp :cn:
- [x] graphite
@ -91,10 +94,12 @@ A collection of delicious docker recipes.
- [x] motion-arm :+1:
- [x] nginx
- [x] nifi
- [x] ntopng
- [x] nullmailer
- [x] nullmailer-arm
- [x] openhab
- [x] openssh
- [x] ot-frontend-arm
- [x] ot-recorder
- [x] ot-recorder-arm
- [x] piknik
@ -161,7 +166,6 @@ A collection of delicious docker recipes.
- [x] json-server
- [x] mantisbt
- [x] mediagoblin
- [x] netdata
- [x] nginad
- [x] nodebb :+1:
- [x] openrefine
@ -180,6 +184,7 @@ A collection of delicious docker recipes.
## Security
- [x] aircrack-ng-arm
- [x] bro
- [x] clamav
- [x] dsniff
@ -187,6 +192,7 @@ A collection of delicious docker recipes.
- [x] grr
- [x] hydra
- [x] iptables
- [x] kismet
- [x] routersploit
- [x] snort :beetle:
- [x] sslsplit
@ -200,7 +206,9 @@ A collection of delicious docker recipes.
- [x] fteproxy-arm :+1:
- [x] hans
- [x] haproxy-arm
- [x] i2pd :ru:
- [x] kcptun :cn:
- [x] mtproxy
- [x] mysql-proxy
- [x] ngrok :+1:
- [x] obfsproxy
@ -223,6 +231,7 @@ A collection of delicious docker recipes.
## VPN
- [x] n2n :+1:
- [x] ocserv :+1:
- [x] openconnect
- [x] openconnect-arm
@ -234,6 +243,7 @@ A collection of delicious docker recipes.
- [x] strongswan :+1:
- [x] tinc :+1:
- [x] tinc-arm :+1:
- [x] wiregurad :beetle:
- [x] xl2tpd
## DNS
@ -248,50 +258,75 @@ A collection of delicious docker recipes.
## 3rd-party
- [x] docker.bintray.io/jfrog/artifactory-oss
- [x] tutum/builder
- [x] browserless/chrome
- [x] certbot
- [x] codercom/code-server
- [x] confluentinc/cp-kafka-mqtt
- [x] confluentinc/cp-kafka-rest
- [x] streamsets/datacollector
- [x] cachethq/docker
- [x] puckel/docker-airflow
- [x] drone/drone
- [x] drupal
- [x] elastalert
- [x] elk
- [x] flogo/flogo-docker
- [x] mher/flower
- [x] ghost
- [x] gitlab/gitlab-ce
- [x] atlassianlabs/gostatsd
- [x] gitea/gitea :cn:
- [x] gliderlabs/logspout
- [x] gliderlabs/registrator
- [ ] glot
- [ ] bash
- [ ] python
- [x] gogs :cn:
- [x] gogs/gogs :cn:
- [x] haproxy
- [x] jmxtrans/jmxtrans
- [x] wurstmeister/kafka
- [x] netdata/netdata
- [x] nextcloud
- [x] sonatype/nexus3
- [x] jazzdd/phpvirtualbox
- [x] jenkins
- [x] sonatype/nexus3
- [x] jupyter/notebook
- [x] kylemanna/openvpn
- [x] metabase/metabase
- [x] metasploitframework/metasploit-framework :skull:
- [x] minio/minio
- [x] mongo
- [x] neo4j
- [x] erichough/nfs-server
- [x] odoo
- [x] osixia/openldap
- [x] campbellsoftwaresolutions/osticket
- [x] owncloud
- [x] phpmyadmin
- [x] pihole/pihole
- [x] portainer/portainer :+1:
- [x] postgres
- [x] postgrest/postgrest
- [x] registry
- [x] rocket.chat
- [x] scrapinghub/splash
- [x] rundeck/rundeck
- [x] wonderfall/searx
- [ ] selenium
- [ ] hub
- [ ] node-firefox
- [x] standalone-firefox
- [x] sentry
- [x] scrapinghub/splash
- [x] amancevice/superset
- [x] v2ray/official :cn:
- [x] centurylink/watchtower
- [x] anapsix/webdis
- [x] wekanteam/wekan
- [x] yourls
- [x] zookeeper
- [x] elkozmon/zoonavigator
## auto-completion

View File

@ -0,0 +1,9 @@
# Dockerfile for aircrack-ng-arm
FROM easypi/alpine-arm
RUN apk add --no-cache aircrack-ng bash coreutils tmux
ENTRYPOINT ["sleep", "inf"]

aircrack-ng-arm/README.md Normal file
View File

@ -0,0 +1,13 @@
$ docker-compose up -d
$ docker-compose exec aircrack bash
>>> airmon-ng
>>> airmon-ng start wlan1
>>> ifconfig
>>> airodump-ng wlan1mon
>>> airmon-ng stop wlan1mon
>>> exit

View File

@ -0,0 +1,7 @@
image: easypi/aircrack-ng-arm
net: host
tty: true
restart: unless-stopped

airflow/Dockerfile Normal file
View File

@ -0,0 +1,43 @@
# Dockerfile for airflow
FROM python:3.7-alpine
ENV AIRFLOW_EXTRAS=async,all_dbs,celery,crypto,devel_hadoop,jdbc,ldap,password,redis,s3,samba,slack,ssh,statsd
ENV AIRFLOW_HOME=/opt/airflow
ENV AIRFLOW_CONFIG=/opt/airflow/airflow.cfg
RUN set -xe \
&& apk add --no-cache \
build-base \
cyrus-sasl-dev \
freetds \
freetds-dev \
krb5-dev \
libffi-dev \
mariadb-dev \
postgresql-dev \
python3-dev \
&& pip install cython numpy psycopg2-binary \
&& pip install apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION} \
&& pip install "websocket-client>=0.35,<0.55.0" \
&& apk del \
build-base \
cyrus-sasl-dev \
freetds-dev \
krb5-dev \
libffi-dev \
mariadb-dev \
postgresql-dev \
python3-dev \
&& rm -rf /root/.cache/pip
ENTRYPOINT ["airflow"]
CMD ["--help"]

View File

@ -1,6 +1,8 @@
## How It Works
@ -20,13 +22,31 @@ airflow
## Quick Start
# On Master
$ docker-compose up -d
$ chmod 777 data/airflow/dags
$ docker-compose exec webserver cp -r /usr/local/lib/python3.6/site-packages/airflow/example_dags dags
$ docker stack deploy -c docker-stack.yaml airflow
$ docker service update --replicas-max-per-node=1 airflow_worker
$ docker service update --replicas 3 airflow_worker
# On Workers
$ docker-compose up -d
$ chmod 777 data/airflow/dags
$ docker-compose exec worker cp -r /usr/local/lib/python3.6/site-packages/airflow/example_dags dags
$ docker stack services airflow
$ docker service ps airflow_webserver
$ docker exec -it airflow_webserver.1.xxxxxx sh
>>> airflow create_user -r Admin -u admin -e admin@borderxlab.com -f Super -l Admin -p secret
>>> airflow list_users
│ Id │ Username │ Email │ First name │ Last name │ Roles │
│ 1 │ admin │ admin@borderxlab.com │ Super │ Admin │ [Admin] │
>>> exit
$ curl http://localhost:8080/
$ curl http://localhost:5555/
> :warning: You need to prepare nfs server with `airflow.cfg`.
$ python -c 'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'
> :warning: You should set another value to `fernet_key` in `airflow.cfg` to improve security.

airflow/data/airflow.cfg Normal file
View File

@ -0,0 +1,829 @@
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
# This path must be absolute
dags_folder = /opt/airflow/dags
# The folder where airflow should store its log files
# This path must be absolute
base_log_folder = /opt/airflow/logs
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
# Users must supply an Airflow connection id that provides access to the storage
# location. If remote_logging is set to true, see UPDATING.md for additional
# configuration requirements.
remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
# Logging level
logging_level = INFO
fab_logging_level = WARN
# Logging class
# Specify the class that will specify the logging configuration
# This class has to be on the python classpath
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
logging_config_class =
# Log format
# Colour the logs when the controlling terminal is a TTY.
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
# Log filename format
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log
# Hostname by providing a path to a callable, which will resolve the hostname
# The format is "package:function". For example,
# default value "socket:getfqdn" means that result from getfqdn() of "socket" package will be used as hostname
# No argument should be required in the function specified.
# If using IP address as hostname is preferred, use value "airflow.utils.net:get_host_ip_address"
hostname_callable = socket:getfqdn
# Default timezone in case supplied date times are naive
# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
default_timezone = utc
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor
executor = CeleryExecutor
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postges:5432/airflow
# The encoding for the databases
sql_engine_encoding = utf-8
# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True
# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5
# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to -1 to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to 10.
sql_alchemy_max_overflow = 10
# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800
# How many seconds to retry re-establishing a DB connection after
# disconnects. Setting this to 0 disables retries.
sql_alchemy_reconnect_timeout = 300
# The schema to use for the metadata database
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =
# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 32
# The number of task instances allowed to run concurrently by the scheduler
dag_concurrency = 16
# Are DAGs paused by default at creation
dags_are_paused_at_creation = True
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 16
# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = False
# Where your Airflow plugins are stored
plugins_folder = /opt/airflow/plugins
# Secret key to save connection passwords in the db
fernet_key = CD2wL7G0zt1SLuO4JQpLJuHtBaBEcXWKbQyvkvf2cZ8=
# Whether to disable pickling dags
donot_pickle = False
# How long before timing out a python file import while filling the DagBag
dagbag_import_timeout = 30
# The class to use for running task instances in a subprocess
task_runner = StandardTaskRunner
# If set, tasks without a `run_as_user` argument will be run with this user
# Can be used to de-elevate a sudo user running Airflow when executing tasks
default_impersonation =
# What security module to use (for example kerberos):
security =
# If set to False enables some unsecure features like Charts and Ad Hoc Queries.
# In 2.0 will default to True.
secure_mode = False
# Turn unit test mode on (overwrites many configuration options with test
# values at runtime)
unit_test_mode = False
# Name of handler to read task instance logs.
# Default to use task handler.
task_log_reader = task
# Whether to enable pickling for xcom (note that this is insecure and allows for
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
enable_xcom_pickling = True
# When a task is killed forcefully, this is the amount of time in seconds that
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
killed_task_cleanup_time = 60
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
dag_run_conf_overrides_params = False
# Worker initialisation check to validate Metadata Database connection
worker_precheck = False
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
dag_discovery_safe_mode = True
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
# webserver
api_client = airflow.api.client.local_client
# If you set web_server_url_prefix, do NOT forget to append it here, ex:
# endpoint_url = http://localhost:8080/myroot
# So api will look like: http://localhost:8080/myroot/api/experimental/...
endpoint_url = http://localhost:8080
# How to authenticate users of the API
auth_backend = airflow.api.auth.backend.default
# what lineage backend to use
backend =
sasl_enabled = False
host =
port = 21000
username =
password =
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080
# The ip specified when starting the web server
web_server_host =
# The port on which to run the web server
web_server_port = 8080
# Paths to the SSL certificate and key for the web server. When both are
# provided SSL will be enabled. This does not change the web server port.
web_server_ssl_cert =
web_server_ssl_key =
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
web_server_master_timeout = 120
# Number of seconds the gunicorn webserver waits before timing out on a worker
web_server_worker_timeout = 120
# Number of workers to refresh at a time. When set to 0, worker refresh is
# disabled. When nonzero, airflow periodically refreshes webserver workers by
# bringing up new ones and killing old ones.
worker_refresh_batch_size = 1
# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30
# Secret key used to run your flask app
secret_key = temporary_key
# Number of workers to run the Gunicorn web server
workers = 4
# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
worker_class = sync
# Log files for the gunicorn webserver. '-' means log to stderr.
access_logfile = -
error_logfile = -
# Expose the configuration file in the web server
# This is only applicable for the flask-admin based web UI (non FAB-based).
# In the FAB-based web UI with RBAC feature,
# access to configuration is controlled by role permissions.
expose_config = False
# Set to true to turn on authentication:
# https://airflow.apache.org/security.html#web-authentication
authenticate = False
# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
# Filtering mode. Choices include user (default) and ldapgroup.
# Ldap group filtering requires using the ldap backend
# Note that the ldap server needs the "memberOf" overlay to be set up
# in order to user the ldapgroup mode.
owner_mode = user
# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree
# Default DAG orientation. Valid values are:
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR
# Puts the webserver in demonstration mode; blurs the names of Operators for
# privacy.
demo_mode = False
# The amount of time (in secs) webserver will wait for initial handshake
# while fetching logs from other worker machine
log_fetch_timeout_sec = 5
# By default, the webserver shows paused DAGs. Flip this to hide paused
# DAGs by default
hide_paused_dags_by_default = False
# Consistent page size across all listing views in the UI
page_size = 100
# Use FAB-based webserver with RBAC feature
rbac = True
# Define the color of navigation bar
navbar_color = #007A87
# Default dagrun to show in UI
default_dag_run_display_number = 25
# Enable werkzeug `ProxyFix` middleware
enable_proxy_fix = False
# Set secure flag on session cookie
cookie_secure = False
# Set samesite policy on session cookie
cookie_samesite =
# Default setting for wrap toggle on DAG code and TI log views.
default_wrap = False
# Send anonymous user activity to your analytics tool
# analytics_tool = # choose from google_analytics, segment, or metarouter
# analytics_id = XXXXXXXXXXX
email_backend = airflow.utils.email.send_email_smtp
# If you want airflow to send emails on retries, failure, and you want to use
# the airflow.utils.email.send_email_smtp function, you have to configure an
# smtp server here
smtp_host = localhost
smtp_starttls = True
smtp_ssl = False
# Uncomment and set the user/pass settings if you want to use SMTP AUTH
# smtp_user = airflow
# smtp_password = airflow
smtp_port = 25
smtp_mail_from = airflow@example.com
# This section only applies if you are using the CeleryExecutor in
# [core] section above
# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor
# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
worker_concurrency = 16
# The maximum and minimum concurrency that will be used when starting workers with the
# "airflow worker" command (always keep minimum processes, but grow to maximum if necessary).
# Note the value should be "max_concurrency,min_concurrency"
# Pick these numbers based on resources on worker box and the nature of the task.
# If autoscale option is available, worker_concurrency will be ignored.
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
# worker_autoscale = 16,12
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
broker_url = redis://redis:6379/1
# The Celery result_backend. When a job finishes, it needs to update the
# metadata of the job. Therefore it will post a message on a message bus,
# or insert it into a database (depending of the backend)
# This status is used by the scheduler to update the state of the task
# The use of a database is highly recommended
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
result_backend = db+postgresql://airflow:airflow@postges/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the IP that Celery Flower runs on
flower_host =
# The root URL for Flower
# Ex: flower_url_prefix = /flower
flower_url_prefix =
# This defines the port that Celery Flower runs on
flower_port = 5555
# Securing Flower with Basic Authentication
# Accepts user:password pairs separated by a comma
# Example: flower_basic_auth = user1:password1,user2:password2
flower_basic_auth =
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default
# How many processes CeleryExecutor uses to sync task state.
# 0 means to use max(1, number of cores - 1) processes.
sync_parallelism = 0
# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
# Celery Pool implementation.
# Choices include: prefork (default), eventlet, gevent or solo.
# See:
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
pool = prefork
# This section is for specifying options which can be passed to the
# underlying celery broker transport. See:
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options
# The visibility timeout defines the number of seconds to wait for the worker
# to acknowledge the task before the message is redelivered to another worker.
# Make sure to increase the visibility timeout to match the time of the longest
# ETA you're planning to use.
# visibility_timeout is only supported for Redis and SQS celery brokers.
# See:
# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options
#visibility_timeout = 21600
# This section only applies if you are using the DaskExecutor in
# [core] section above
# The IP address and port of the Dask cluster's scheduler.
cluster_address =
# TLS/ SSL settings to access a secured Dask scheduler.
tls_ca =
tls_cert =
tls_key =
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5
# after how much time should the scheduler terminate in seconds
# -1 indicates to run continuously (see also num_runs)
run_duration = -1
# after how much time (seconds) a new DAGs should be picked up from the filesystem
min_file_process_interval = 0
# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
dag_dir_list_interval = 300
# How often should stats be printed to the logs
print_stats_interval = 30
# If the last scheduler heartbeat happened more than scheduler_health_check_threshold ago (in seconds),
# scheduler is considered unhealthy.
# This is used by the health check in the "/health" endpoint
scheduler_health_check_threshold = 30
child_process_log_directory = /opt/airflow/logs/scheduler
# Local task jobs periodically heartbeat to the DB. If the job has
# not heartbeat in this many seconds, the scheduler will mark the
# associated task instance as failed and will re-schedule the task.
scheduler_zombie_task_threshold = 300
# Turn off scheduler catchup by setting this to False.
# Default behavior is unchanged and
# Command Line Backfills still work, but the scheduler
# will not do scheduler catchup if this is False,
# however it can be set on a per DAG basis in the
# DAG definition (catchup)
catchup_by_default = True
# This changes the batch size of queries in the scheduling main loop.
# If this is too high, SQL query performance may be impacted by one
# or more of the following:
# - reversion to full table scan
# - complexity of query predicate
# - excessive locking
# Additionally, you may hit the maximum allowable query length for your db.
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512
# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow
# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run.
max_threads = 2
authenticate = False
# Turn off scheduler use of cron intervals by setting this to False.
# DAGs submitted manually in the web UI or with trigger_dag will still run.
use_job_schedule = True
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL
# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow
# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1
# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256
# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False
# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits
# until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos
# shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# failover_timeout = 604800
# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False
# Mesos credentials, if authentication is enabled
# default_principal = admin
# default_secret = admin
# Optional Docker Image to run on slave before running the command
# This image should be accessible from mesos slave i.e mesos slave
# should be able to pull this docker image before executing the command.
# docker_image_slave = puckel/docker-airflow
ccache = /tmp/airflow_krb5_ccache
# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab
api_rev = v3
# UI to hide sensitive variable fields when set to True
hide_sensitive_variable_fields = True
# Elasticsearch host
host =
# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False
# Instead of the default log formatter, write the log lines as JSON
json_format = False
# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message
use_ssl = False
verify_certs = True
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
worker_container_repository =
worker_container_tag =
worker_container_image_pull_policy = IfNotPresent
# If True (default), worker pods will be deleted upon termination
delete_worker_pods = True
# Number of Kubernetes Worker Pod creation calls per scheduler loop
worker_pods_creation_batch_size = 1
# The Kubernetes namespace where airflow workers should be created. Defaults to `default`
namespace = default
# The name of the Kubernetes ConfigMap Containing the Airflow Configuration (this file)
airflow_configmap =
# For docker image already contains DAGs, this is set to `True`, and the worker will search for dags in dags_folder,
# otherwise use git sync or dags volume claim to mount DAGs
dags_in_image = False
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
dags_volume_subpath =
# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
dags_volume_claim =
# For volume mounted logs, the worker will look in this subpath for logs
logs_volume_subpath =
# A shared volume claim for the logs
logs_volume_claim =
# For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
# Useful in local environment, discouraged in production
dags_volume_host =
# A hostPath volume for the logs
# Useful in local environment, discouraged in production
logs_volume_host =
# A list of configMapsRefs to envFrom. If more than one configMap is
# specified, provide a comma separated list: configmap_a,configmap_b
env_from_configmap_ref =
# A list of secretRefs to envFrom. If more than one secret is
# specified, provide a comma separated list: secret_a,secret_b
env_from_secret_ref =
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
git_repo =
git_branch =
git_subpath =
# Use git_user and git_password for user authentication or git_ssh_key_secret_name and git_ssh_key_secret_key
# for SSH authentication
git_user =
git_password =
git_sync_root = /git
git_sync_dest = repo
# Mount point of the volume if git-sync is being used.
# i.e. /opt/airflow/dags
git_dags_folder_mount_point =
# To get Git-sync SSH authentication set up follow this format
# airflow-secrets.yaml:
# ---
# apiVersion: v1
# kind: Secret
# metadata:
# name: airflow-secrets
# data:
# # key needs to be gitSshKey
# gitSshKey: <base64_encoded_data>
# ---
# airflow-configmap.yaml:
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: airflow-configmap
# data:
# known_hosts: |
# github.com ssh-rsa <...>
# airflow.cfg: |
# ...
# git_ssh_key_secret_name = airflow-secrets
# git_ssh_known_hosts_configmap_name = airflow-configmap
git_ssh_key_secret_name =
git_ssh_known_hosts_configmap_name =
# To give the git_sync init container credentials via a secret, create a secret
# with two fields: GIT_SYNC_USERNAME and GIT_SYNC_PASSWORD (example below) and
# add `git_sync_credentials_secret = <secret_name>` to your airflow config under the kubernetes section
# Secret Example:
# apiVersion: v1
# kind: Secret
# metadata:
# name: git-credentials
# data:
# GIT_SYNC_USERNAME: <base64_encoded_git_username>
# GIT_SYNC_PASSWORD: <base64_encoded_git_password>
git_sync_credentials_secret =
# For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync
git_sync_container_repository = k8s.gcr.io/git-sync
git_sync_container_tag = v3.1.1
git_sync_init_container_name = git-sync-clone
git_sync_run_as_user = 65533
# The name of the Kubernetes service account to be associated with airflow workers, if any.
# Service accounts are required for workers that require access to secrets or cluster resources.
# See the Kubernetes RBAC documentation for more:
# https://kubernetes.io/docs/admin/authorization/rbac/
worker_service_account_name =
# Any image pull secrets to be given to worker pods, If more than one secret is
# required, provide a comma separated list: secret_a,secret_b
image_pull_secrets =
# GCP Service Account Keys to be provided to tasks run on Kubernetes Executors
# Should be supplied in the format: key-name-1:key-path-1,key-name-2:key-path-2
gcp_service_account_keys =
# Use the service account kubernetes gives to pods to connect to kubernetes cluster.
# It's intended for clients that expect to be running inside a pod running on kubernetes.
# It will raise an exception if called from a process not running in a kubernetes environment.
in_cluster = True
# When running with in_cluster=False change the default cluster_context or config_file
# options to Kubernetes client. Leave blank these to use default behaviour like `kubectl` has.
# cluster_context =
# config_file =
# Affinity configuration as a single line formatted JSON object.
# See the affinity model for top-level key names (e.g. `nodeAffinity`, etc.):
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#affinity-v1-core
affinity =
# A list of toleration objects as a single line formatted JSON array
# See:
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
tolerations =
# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
# provided as a single line formatted JSON dictionary string.
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
# See:
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {"_request_timeout" : [60,60] }
# Worker pods security context options
# See:
# https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
# Specifies the uid to run the first process of the worker pods containers as
run_as_user =
# Specifies a gid to associate with all containers in the worker pods
# if using a git_ssh_key_secret_name use an fs_group
# that allows for the key to be read, e.g. 65533
fs_group =
# The Key-value pairs to be given to worker pods.
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
# Should be supplied in the format: key = value
# The Key-value annotations pairs to be given to worker pods.
# Should be supplied in the format: key = value
# The scheduler sets the following environment variables into your workers. You may define as
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
# Environment variables in this section are defined as follows
# <environment_variable_key> = <environment_variable_value>
# For example if you wanted to set an environment variable with value `prod` and key
# `ENVIRONMENT` you would follow the following format:
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
# The scheduler mounts the following secrets into your workers as they are launched by the
# scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the
# defined secrets and mount them as secret environment variables in the launched workers.
# Secrets in this section are defined as follows
# <environment_variable_mount> = <kubernetes_secret_object>=<kubernetes_secret_key>
# For example if you wanted to mount a kubernetes secret key named `postgres_password` from the
# kubernetes secret object `airflow-secret` as the environment variable `POSTGRES_PASSWORD` into
# your workers you would follow the following format:
# POSTGRES_PASSWORD = airflow-secret=postgres_credentials
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
# The Key-value pairs to be given to worker pods.
# The worker pods will be given these static labels, as well as some additional dynamic labels
# to identify the task.
# Should be supplied in the format: key = value

View File

View File

View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
### Tutorial Documentation
Documentation that goes along with the Airflow tutorial located
from datetime import timedelta
import airflow
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization
default_args = {
'owner': 'Airflow',
'depends_on_past': False,
'start_date': airflow.utils.dates.days_ago(2),
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
# 'wait_for_downstream': False,
# 'dag': dag,
# 'sla': timedelta(hours=2),
# 'execution_timeout': timedelta(seconds=300),
# 'on_failure_callback': some_function,
# 'on_success_callback': some_other_function,
# 'on_retry_callback': another_function,
# 'sla_miss_callback': yet_another_function,
# 'trigger_rule': 'all_success'
dag = DAG(
description='A simple tutorial DAG',
# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
t1.doc_md = """\
#### Task Documentation
You can document your task using the attributes `doc_md` (markdown),
`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
rendered in the UI's Task Instance Details page.
dag.doc_md = __doc__
t2 = BashOperator(
bash_command='sleep 5',
templated_command = """
{% for i in range(5) %}
echo "{{ ds }}"
echo "{{ macros.ds_add(ds, 7)}}"
echo "{{ params.my_param }}"
{% endfor %}
t3 = BashOperator(
params={'my_param': 'Parameter I passed in'},
t1 >> [t2, t3]

View File

@ -0,0 +1,860 @@
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This is the template for Airflow's default configuration. When Airflow is
# imported, it looks for a configuration file at $AIRFLOW_HOME/airflow.cfg. If
# it doesn't exist, Airflow uses this template to generate it by replacing
# variables in curly braces with their global values from configuration.py.
# Users should not modify this file; they should customize the generated
# airflow.cfg instead.
# ----------------------- TEMPLATE BEGINS HERE -----------------------
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
# This path must be absolute
dags_folder = {AIRFLOW_HOME}/dags
# The folder where airflow should store its log files
# This path must be absolute
base_log_folder = {AIRFLOW_HOME}/logs
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
# Users must supply an Airflow connection id that provides access to the storage
# location. If remote_logging is set to true, see UPDATING.md for additional
# configuration requirements.
remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
# Logging level
logging_level = INFO
fab_logging_level = WARN
# Logging class
# Specify the class that will specify the logging configuration
# This class has to be on the python classpath
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
logging_config_class =
# Log format
# Colour the logs when the controlling terminal is a TTY.
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
# Log filename format
log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log
log_processor_filename_template = {{{{ filename }}}}.log
dag_processor_manager_log_location = {AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log
# Hostname by providing a path to a callable, which will resolve the hostname
# The format is "package:function". For example,
# default value "socket:getfqdn" means that result from getfqdn() of "socket" package will be used as hostname
# No argument should be required in the function specified.
# If using IP address as hostname is preferred, use value "airflow.utils.net:get_host_ip_address"
hostname_callable = socket:getfqdn
# Default timezone in case supplied date times are naive
# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
default_timezone = utc
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor
executor = SequentialExecutor
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = sqlite:///{AIRFLOW_HOME}/airflow.db
# The encoding for the databases
sql_engine_encoding = utf-8
# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True
# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5
# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to -1 to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to 10.
sql_alchemy_max_overflow = 10
# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800
# How many seconds to retry re-establishing a DB connection after
# disconnects. Setting this to 0 disables retries.
sql_alchemy_reconnect_timeout = 300
# The schema to use for the metadata database
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =
# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 32
# The number of task instances allowed to run concurrently by the scheduler
dag_concurrency = 16
# Are DAGs paused by default at creation
dags_are_paused_at_creation = True
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 16
# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = True
# Where your Airflow plugins are stored
plugins_folder = {AIRFLOW_HOME}/plugins
# Secret key to save connection passwords in the db
fernet_key = {FERNET_KEY}
# Whether to disable pickling dags
donot_pickle = False
# How long before timing out a python file import while filling the DagBag
dagbag_import_timeout = 30
# The class to use for running task instances in a subprocess
task_runner = StandardTaskRunner
# If set, tasks without a `run_as_user` argument will be run with this user
# Can be used to de-elevate a sudo user running Airflow when executing tasks
default_impersonation =
# What security module to use (for example kerberos):
security =
# If set to False enables some unsecure features like Charts and Ad Hoc Queries.
# In 2.0 will default to True.
secure_mode = False
# Turn unit test mode on (overwrites many configuration options with test
# values at runtime)
unit_test_mode = False
# Name of handler to read task instance logs.
# Default to use task handler.
task_log_reader = task
# Whether to enable pickling for xcom (note that this is insecure and allows for
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
enable_xcom_pickling = True
# When a task is killed forcefully, this is the amount of time in seconds that
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
killed_task_cleanup_time = 60
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
dag_run_conf_overrides_params = False
# Worker initialisation check to validate Metadata Database connection
worker_precheck = False
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
dag_discovery_safe_mode = True
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
# webserver
api_client = airflow.api.client.local_client
# If you set web_server_url_prefix, do NOT forget to append it here, ex:
# endpoint_url = http://localhost:8080/myroot
# So api will look like: http://localhost:8080/myroot/api/experimental/...
endpoint_url = http://localhost:8080
# How to authenticate users of the API
auth_backend = airflow.api.auth.backend.default
# what lineage backend to use
backend =
sasl_enabled = False
host =
port = 21000
username =
password =
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080
# The ip specified when starting the web server
web_server_host =
# The port on which to run the web server
web_server_port = 8080
# Paths to the SSL certificate and key for the web server. When both are
# provided SSL will be enabled. This does not change the web server port.
web_server_ssl_cert =
web_server_ssl_key =
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
web_server_master_timeout = 120
# Number of seconds the gunicorn webserver waits before timing out on a worker
web_server_worker_timeout = 120
# Number of workers to refresh at a time. When set to 0, worker refresh is
# disabled. When nonzero, airflow periodically refreshes webserver workers by
# bringing up new ones and killing old ones.
worker_refresh_batch_size = 1
# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30
# Secret key used to run your flask app
secret_key = temporary_key
# Number of workers to run the Gunicorn web server
workers = 4
# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
worker_class = sync
# Log files for the gunicorn webserver. '-' means log to stderr.
access_logfile = -
error_logfile = -
# Expose the configuration file in the web server
# This is only applicable for the flask-admin based web UI (non FAB-based).
# In the FAB-based web UI with RBAC feature,
# access to configuration is controlled by role permissions.
expose_config = False
# Set to true to turn on authentication:
# https://airflow.apache.org/security.html#web-authentication
authenticate = False
# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
# Filtering mode. Choices include user (default) and ldapgroup.
# Ldap group filtering requires using the ldap backend
# Note that the ldap server needs the "memberOf" overlay to be set up
# in order to user the ldapgroup mode.
owner_mode = user
# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree
# Default DAG orientation. Valid values are:
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR
# Puts the webserver in demonstration mode; blurs the names of Operators for
# privacy.
demo_mode = False
# The amount of time (in secs) webserver will wait for initial handshake
# while fetching logs from other worker machine
log_fetch_timeout_sec = 5
# By default, the webserver shows paused DAGs. Flip this to hide paused
# DAGs by default
hide_paused_dags_by_default = False
# Consistent page size across all listing views in the UI
page_size = 100
# Use FAB-based webserver with RBAC feature
rbac = False
# Define the color of navigation bar
navbar_color = #007A87
# Default dagrun to show in UI
default_dag_run_display_number = 25
# Enable werkzeug `ProxyFix` middleware
enable_proxy_fix = False
# Set secure flag on session cookie
cookie_secure = False
# Set samesite policy on session cookie
cookie_samesite =
# Default setting for wrap toggle on DAG code and TI log views.
default_wrap = False
# Send anonymous user activity to your analytics tool
# analytics_tool = # choose from google_analytics, segment, or metarouter
# analytics_id = XXXXXXXXXXX
email_backend = airflow.utils.email.send_email_smtp
# If you want airflow to send emails on retries, failure, and you want to use
# the airflow.utils.email.send_email_smtp function, you have to configure an
# smtp server here
smtp_host = localhost
smtp_starttls = True
smtp_ssl = False
# Uncomment and set the user/pass settings if you want to use SMTP AUTH
# smtp_user = airflow
# smtp_password = airflow
smtp_port = 25
smtp_mail_from = airflow@example.com
# This section only applies if you are using the CeleryExecutor in
# [core] section above
# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor
# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
worker_concurrency = 16
# The maximum and minimum concurrency that will be used when starting workers with the
# "airflow worker" command (always keep minimum processes, but grow to maximum if necessary).
# Note the value should be "max_concurrency,min_concurrency"
# Pick these numbers based on resources on worker box and the nature of the task.
# If autoscale option is available, worker_concurrency will be ignored.
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
# worker_autoscale = 16,12
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
# The Celery result_backend. When a job finishes, it needs to update the
# metadata of the job. Therefore it will post a message on a message bus,
# or insert it into a database (depending of the backend)
# This status is used by the scheduler to update the state of the task
# The use of a database is highly recommended
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the IP that Celery Flower runs on
flower_host =
# The root URL for Flower
# Ex: flower_url_prefix = /flower
flower_url_prefix =
# This defines the port that Celery Flower runs on
flower_port = 5555
# Securing Flower with Basic Authentication
# Accepts user:password pairs separated by a comma
# Example: flower_basic_auth = user1:password1,user2:password2
flower_basic_auth =
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default
# How many processes CeleryExecutor uses to sync task state.
# 0 means to use max(1, number of cores - 1) processes.
sync_parallelism = 0
# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
# Celery Pool implementation.
# Choices include: prefork (default), eventlet, gevent or solo.
# See:
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
pool = prefork
# This section is for specifying options which can be passed to the
# underlying celery broker transport. See:
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options
# The visibility timeout defines the number of seconds to wait for the worker
# to acknowledge the task before the message is redelivered to another worker.
# Make sure to increase the visibility timeout to match the time of the longest
# ETA you're planning to use.
# visibility_timeout is only supported for Redis and SQS celery brokers.
# See:
# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options
#visibility_timeout = 21600
# This section only applies if you are using the DaskExecutor in
# [core] section above
# The IP address and port of the Dask cluster's scheduler.
cluster_address =
# TLS/ SSL settings to access a secured Dask scheduler.
tls_ca =
tls_cert =
tls_key =
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5
# after how much time should the scheduler terminate in seconds
# -1 indicates to run continuously (see also num_runs)
run_duration = -1
# after how much time (seconds) a new DAGs should be picked up from the filesystem
min_file_process_interval = 0
# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
dag_dir_list_interval = 300
# How often should stats be printed to the logs
print_stats_interval = 30
# If the last scheduler heartbeat happened more than scheduler_health_check_threshold ago (in seconds),
# scheduler is considered unhealthy.
# This is used by the health check in the "/health" endpoint
scheduler_health_check_threshold = 30
child_process_log_directory = {AIRFLOW_HOME}/logs/scheduler
# Local task jobs periodically heartbeat to the DB. If the job has
# not heartbeat in this many seconds, the scheduler will mark the
# associated task instance as failed and will re-schedule the task.
scheduler_zombie_task_threshold = 300
# Turn off scheduler catchup by setting this to False.
# Default behavior is unchanged and
# Command Line Backfills still work, but the scheduler
# will not do scheduler catchup if this is False,
# however it can be set on a per DAG basis in the
# DAG definition (catchup)
catchup_by_default = True
# This changes the batch size of queries in the scheduling main loop.
# If this is too high, SQL query performance may be impacted by one
# or more of the following:
# - reversion to full table scan
# - complexity of query predicate
# - excessive locking
# Additionally, you may hit the maximum allowable query length for your db.
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512
# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow
# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run.
max_threads = 2
authenticate = False
# Turn off scheduler use of cron intervals by setting this to False.
# DAGs submitted manually in the web UI or with trigger_dag will still run.
use_job_schedule = True
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL
# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow
# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1
# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256
# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False
# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits
# until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos
# shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# failover_timeout = 604800
# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False
# Mesos credentials, if authentication is enabled
# default_principal = admin
# default_secret = admin
# Optional Docker Image to run on slave before running the command
# This image should be accessible from mesos slave i.e mesos slave
# should be able to pull this docker image before executing the command.
# docker_image_slave = puckel/docker-airflow
ccache = /tmp/airflow_krb5_ccache
# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab
api_rev = v3
# UI to hide sensitive variable fields when set to True
hide_sensitive_variable_fields = True
# Elasticsearch host
host =
# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False
# Instead of the default log formatter, write the log lines as JSON
json_format = False
# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message
use_ssl = False
verify_certs = True
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
worker_container_repository =
worker_container_tag =
worker_container_image_pull_policy = IfNotPresent
# If True (default), worker pods will be deleted upon termination
delete_worker_pods = True
# Number of Kubernetes Worker Pod creation calls per scheduler loop
worker_pods_creation_batch_size = 1
# The Kubernetes namespace where airflow workers should be created. Defaults to `default`
namespace = default
# The name of the Kubernetes ConfigMap Containing the Airflow Configuration (this file)
airflow_configmap =
# For docker image already contains DAGs, this is set to `True`, and the worker will search for dags in dags_folder,
# otherwise use git sync or dags volume claim to mount DAGs
dags_in_image = False
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
dags_volume_subpath =
# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
dags_volume_claim =
# For volume mounted logs, the worker will look in this subpath for logs
logs_volume_subpath =
# A shared volume claim for the logs
logs_volume_claim =
# For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
# Useful in local environment, discouraged in production
dags_volume_host =
# A hostPath volume for the logs
# Useful in local environment, discouraged in production
logs_volume_host =
# A list of configMapsRefs to envFrom. If more than one configMap is
# specified, provide a comma separated list: configmap_a,configmap_b
env_from_configmap_ref =
# A list of secretRefs to envFrom. If more than one secret is
# specified, provide a comma separated list: secret_a,secret_b
env_from_secret_ref =
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
git_repo =
git_branch =
git_subpath =
# Use git_user and git_password for user authentication or git_ssh_key_secret_name and git_ssh_key_secret_key
# for SSH authentication
git_user =
git_password =
git_sync_root = /git
git_sync_dest = repo
# Mount point of the volume if git-sync is being used.
# i.e. {AIRFLOW_HOME}/dags
git_dags_folder_mount_point =
# To get Git-sync SSH authentication set up follow this format
# airflow-secrets.yaml:
# ---
# apiVersion: v1
# kind: Secret
# metadata:
# name: airflow-secrets
# data:
# # key needs to be gitSshKey
# gitSshKey: <base64_encoded_data>
# ---
# airflow-configmap.yaml:
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: airflow-configmap
# data:
# known_hosts: |
# github.com ssh-rsa <...>
# airflow.cfg: |
# ...
# git_ssh_key_secret_name = airflow-secrets
# git_ssh_known_hosts_configmap_name = airflow-configmap
git_ssh_key_secret_name =
git_ssh_known_hosts_configmap_name =
# To give the git_sync init container credentials via a secret, create a secret
# with two fields: GIT_SYNC_USERNAME and GIT_SYNC_PASSWORD (example below) and
# add `git_sync_credentials_secret = <secret_name>` to your airflow config under the kubernetes section
# Secret Example:
# apiVersion: v1
# kind: Secret
# metadata:
# name: git-credentials
# data:
# GIT_SYNC_USERNAME: <base64_encoded_git_username>
# GIT_SYNC_PASSWORD: <base64_encoded_git_password>
git_sync_credentials_secret =
# For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync
git_sync_container_repository = k8s.gcr.io/git-sync
git_sync_container_tag = v3.1.1
git_sync_init_container_name = git-sync-clone
git_sync_run_as_user = 65533
# The name of the Kubernetes service account to be associated with airflow workers, if any.
# Service accounts are required for workers that require access to secrets or cluster resources.
# See the Kubernetes RBAC documentation for more:
# https://kubernetes.io/docs/admin/authorization/rbac/
worker_service_account_name =
# Any image pull secrets to be given to worker pods, If more than one secret is
# required, provide a comma separated list: secret_a,secret_b
image_pull_secrets =
# GCP Service Account Keys to be provided to tasks run on Kubernetes Executors
# Should be supplied in the format: key-name-1:key-path-1,key-name-2:key-path-2
gcp_service_account_keys =
# Use the service account kubernetes gives to pods to connect to kubernetes cluster.
# It's intended for clients that expect to be running inside a pod running on kubernetes.
# It will raise an exception if called from a process not running in a kubernetes environment.
in_cluster = True
# When running with in_cluster=False change the default cluster_context or config_file
# options to Kubernetes client. Leave blank these to use default behaviour like `kubectl` has.
# cluster_context =
# config_file =
# Affinity configuration as a single line formatted JSON object.
# See the affinity model for top-level key names (e.g. `nodeAffinity`, etc.):
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#affinity-v1-core
affinity =
# A list of toleration objects as a single line formatted JSON array
# See:
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
tolerations =
# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
# provided as a single line formatted JSON dictionary string.
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
# See:
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {{"_request_timeout" : [60,60] }}
# Worker pods security context options
# See:
# https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
# Specifies the uid to run the first process of the worker pods containers as
run_as_user =
# Specifies a gid to associate with all containers in the worker pods
# if using a git_ssh_key_secret_name use an fs_group
# that allows for the key to be read, e.g. 65533
fs_group =
# The Key-value pairs to be given to worker pods.
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
# Should be supplied in the format: key = value
# The Key-value annotations pairs to be given to worker pods.
# Should be supplied in the format: key = value
# The scheduler sets the following environment variables into your workers. You may define as
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
# Environment variables in this section are defined as follows
# <environment_variable_key> = <environment_variable_value>
# For example if you wanted to set an environment variable with value `prod` and key
# `ENVIRONMENT` you would follow the following format:
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
# The scheduler mounts the following secrets into your workers as they are launched by the
# scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the
# defined secrets and mount them as secret environment variables in the launched workers.
# Secrets in this section are defined as follows
# <environment_variable_mount> = <kubernetes_secret_object>=<kubernetes_secret_key>
# For example if you wanted to mount a kubernetes secret key named `postgres_password` from the
# kubernetes secret object `airflow-secret` as the environment variable `POSTGRES_PASSWORD` into
# your workers you would follow the following format:
# POSTGRES_PASSWORD = airflow-secret=postgres_credentials
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
# The Key-value pairs to be given to worker pods.
# The worker pods will be given these static labels, as well as some additional dynamic labels
# to identify the task.
# Should be supplied in the format: key = value

View File

View File

View File

@ -24,7 +24,7 @@ services:
restart: always
image: puckel/docker-airflow:1.9.0-4
image: puckel/docker-airflow:1.10.4
command: webserver
hostname: master
@ -52,7 +52,7 @@ services:
restart: always
image: puckel/docker-airflow:1.9.0-4
image: puckel/docker-airflow:1.10.4
command: scheduler
- ./data/airflow/dags:/usr/local/airflow/dags
@ -66,7 +66,7 @@ services:
restart: always
image: puckel/docker-airflow:1.9.0-4
image: puckel/docker-airflow:1.10.4
command: flower
- "5555:5555"

View File

@ -3,7 +3,7 @@ version: '2.1'
image: puckel/docker-airflow:1.9.0-4
image: puckel/docker-airflow:1.10.4
command: worker
hostname: worker1

airflow/docker-stack.yaml Normal file
View File

@ -0,0 +1,110 @@
version: "3.7"
image: redis:alpine
command: --save 900 1
- "6379:6379"
- /data/redis:/data
replicas: 1
- node.role == manager
condition: on-failure
image: postgres:alpine
- "5432:5432"
- /data/postgres:/var/lib/postgresql/data
- POSTGRES_DB=airflow
replicas: 1
- node.role == manager
condition: on-failure
image: vimagick/airflow
command: webserver
- "8080:8080"
- airflow_data:/opt/airflow
replicas: 1
- node.role == manager
condition: on-failure
- postgres
- redis
image: vimagick/airflow
command: scheduler
- airflow_data:/opt/airflow
replicas: 1
- node.role == manager
condition: on-failure
- webserver
image: vimagick/airflow
command: flower
- "5555:5555"
- airflow_data:/opt/airflow
replicas: 1
- node.role == manager
condition: on-failure
- webserver
image: vimagick/airflow
command: worker
- airflow_data:/opt/airflow
replicas: 0
- node.role == worker
condition: on-failure
- webserver
driver: local
type: nfs
o: "addr=,nolock,soft,rw"
device: ":/export/airflow"

airflow/systemd/airflow Normal file
View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This file is the environment file for Airflow. Put this file in /etc/sysconfig/airflow per default
# configuration of the systemd unit files.

View File

@ -0,0 +1,16 @@
Description=Airflow celery flower
After=network.target postgresql.service redis-server.service
Wants=postgresql.service redis-server.service
ExecStart=/home/airflow/.virtualenvs/airflow/bin/airflow flower --port=5555

View File

@ -0,0 +1,16 @@
Description=Airflow scheduler daemon
After=network.target postgresql.service redis-server.service
Wants=postgresql.service redis-server.service
ExecStart=/home/airflow/.virtualenvs/airflow/bin/airflow scheduler

View File

@ -0,0 +1,18 @@
Description=Airflow webserver daemon
After=network.target postgresql.service redis-server.service
Wants=postgresql.service redis-server.service
ExecStart=/home/airflow/.virtualenvs/airflow/bin/airflow webserver --pid /run/airflow/webserver.pid

View File

@ -0,0 +1,16 @@
Description=Airflow celery worker daemon
After=network.target postgresql.service redis-server.service
Wants=postgresql.service redis-server.service
ExecStart=/home/airflow/.virtualenvs/airflow/bin/airflow worker

ambari/Dockerfile Normal file
View File

@ -0,0 +1,29 @@
# Dockerfile for ambari
FROM ubuntu:18.04
MAINTAINER EasyPi Software Foundation
ENV AMBARI_SOURCE=http://public-repo-1.hortonworks.com/ambari/ubuntu18/2.x/updates/${AMBARI_VERSION}/ambari.list
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
RUN set -xe \
&& apt-get update \
&& apt-get install -y curl gnupg libpostgresql-jdbc-java openjdk-8-jdk openssh-client \
&& curl -sSL ${AMBARI_SOURCE} > /etc/apt/sources.list.d/ambari.list \
&& apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD \
&& apt-get update \
&& apt-get install -y ambari-server \
&& ambari-server setup --jdbc-db=postgres --jdbc-driver=/usr/share/java/postgresql.jar \
&& rm -rf /var/lib/apt/lists/*
VOLUME /etc/ambari-server/conf
CMD set -ex \
&& ambari-server start \
&& sleep 5 \
&& tail --pid=$(cat /var/run/ambari-server/ambari-server.pid) -f /var/log/ambari-server/ambari-server.log

ambari/README.md Normal file
View File

@ -0,0 +1,41 @@
The [Apache Ambari][1] project is aimed at making Hadoop management simpler by
developing software for provisioning, managing, and monitoring Apache Hadoop
clusters. [Ambari][2] provides an intuitive, easy-to-use Hadoop management web UI
backed by its RESTful APIs.
Make sure that ambari-server is reachable by ambari-agents via DNS.
## Up and Running
$ docker-compose run --rm server bash
>>> ambari-server setup
Customize user account for ambari-server daemon [y/n] (n)?
Do you want to change Oracle JDK [y/n] (n)?
Enter advanced database configuration [y/n] (n)? y
Enter choice (4):
Hostname (postgres):
Port (5432):
Database name (ambari):
Postgres schema (public):
Username (ambari):
Enter Database Password (ambari):
Proceed with configuring remote database connection properties [y/n] (y)?
>>> PGUSER=ambari PGPASSWORD=ambari psql -h postgres -d ambari -f /var/lib/ambari-server/resources/Ambari-DDL-Postgres-CREATE.sql
$ docker-compose up -d
Starting ambari_postgres_1 ... done
Starting ambari_server_1 ... done
$ curl http://localhost:8080/
[1]: https://ambari.apache.org/
[2]: https://docs.cloudera.com/HDPDocuments/Ambari/Ambari-

View File

@ -0,0 +1,118 @@
# Copyright 2011 The Apache Software Foundation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
http.x-xss-protection=1; mode=block
jdk1.8.desc=Oracle JDK 1.8 + Java Cryptography Extension (JCE) Policy Files 8
views.http.x-xss-protection=1; mode=block

View File

@ -0,0 +1,10 @@
com.sun.security.jgss.krb5.initiate {
com.sun.security.auth.module.Krb5LoginModule required

View File

@ -0,0 +1,116 @@
# Copyright 2011 The Apache Software Foundation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# Define some default values that can be overridden by system properties
# Root logger option
# Direct log messages to a log file
log4j.appender.file.layout.ConversionPattern=%d{ISO8601} %5p [%t] %c{1}:%L - %m%n
# Log config changes
log4j.appender.configchange.layout.ConversionPattern=%d{ISO8601} %5p - %m%n
# Log alert state changes
log4j.appender.alerts.layout.ConversionPattern=%d{ISO8601} %m%n
# Log database check process
log4j.logger.org.apache.ambari.server.checks.DatabaseConsistencyChecker=INFO, dbcheck
log4j.appender.dbcheck.layout.ConversionPattern=%d{ISO8601} %5p - %m%n
log4j.logger.org.apache.ambari.server.checks.DatabaseConsistencyCheckHelper=INFO, dbcheckhelper
log4j.appender.dbcheckhelper.layout.ConversionPattern=%d{ISO8601} %5p - %m%n
# Log stack merger
log4j.appender.stackmerger.layout.ConversionPattern=%d{ISO8601} %5p - %m%n
# EclipsLink -> slf4j bridge
# Jersey
# Jetty
# Audit logging

View File

@ -0,0 +1,65 @@
# Copyright 2011 The Apache Software Foundation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
#################### Metrics Source Configs #####################
#Metric sources : jvm,database
#### JVM Source Configs ###
#### Database Source Configs ###
# Note : To enable Database metrics source completely, add the following property to ambari.properties as well
# server.persistence.properties.eclipselink.profiler=org.apache.ambari.server.metrics.system.impl.AmbariPerformanceMonitor
# Ambari performance monitor granularity : NONE / NORMAL / HEAVY / ALL
# Collection interval in milliseconds
# Include entities to be tracked.
# Include some metrics which have the keyword even if they are not part of requested Entities.
# Examples
# Query Types
# ReadAllQuery,ReadObjectQuery,UpdateObjectQuery,ReportQuery,InsertObjectQuery,ValueReadQuery,DataModifyQuery,DoesExistQuery
# Query stages
# ObjectBuilding,QueryPreparation,SqlPrepare,SqlGeneration,StatementExecute,RowFetch,ConnectCalls,UnitOfWorkCommit,ClientSessionReleases,ConnectionManagement,CacheHits
# Sequences
# host_role_command_id_seq,alert_history_id_seq
############## General Metrics Service Configs #################

View File

@ -0,0 +1 @@

ambari/data/ssh/config Normal file
View File

@ -0,0 +1,11 @@
Host ambari-agent1
User root
Host ambari-agent2
User root
Host ambari-agent3
User root

ambari/docker-compose.yml Normal file
View File

@ -0,0 +1,31 @@
version: "3.7"
image: vimagick/ambari
hostname: ambari-server
- "8080:8080"
- "8440:8440"
- "8441:8441"
- ./data/ambari:/etc/ambari-server/conf
- ./data/log:/var/log/ambari-server
- ambari-agent1:
- ambari-agent2:
- ambari-agent3:
- postgres
restart: unless-stopped
image: postgres:alpine
- ./data/postgres:/var/lib/postgresql/data
- POSTGRES_DB=ambari
restart: unless-stopped

apacheds/Dockerfile Normal file
View File

@ -0,0 +1,40 @@
# Dockerfile for apacheds
FROM debian:buster
MAINTAINER EasyPi Software Foundation
ENV GOSU_URL=https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64
ENV APACHEDS_URL=http://mirrors.advancedhosters.com/apache/directory/apacheds/dist/${APACHEDS_VERSION}/${APACHEDS_FILE}
ENV APACHEDS_DATA=/var/lib/apacheds
RUN set -xe \
&& apt update \
&& apt install -y \
apt-utils \
curl \
ldap-utils \
openjdk-11-jre-headless \
procps \
&& curl -sSL ${GOSU_URL} > /usr/local/bin/gosu \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true \
&& dpkg -i ${APACHEDS_FILE} \
&& sed -i "/INSTANCES_DIRECTORY/s/-${APACHEDS_VERSION}//" /opt/apacheds-${APACHEDS_VERSION}/bin/apacheds \
&& rm -rf ${APACHEDS_FILE} /var/lib/apt/lists/*
EXPOSE 10389 10636
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
ENTRYPOINT ["docker-entrypoint.sh"]

apacheds/README.md Normal file
View File

@ -0,0 +1,13 @@
image: vimagick/apacheds
- "10389:10389"
- "10636:10636"
- ./data:/var/lib/apacheds
restart: unless-stopped

View File

@ -0,0 +1,8 @@
image: vimagick/apacheds
- "10389:10389"
- "10636:10636"
- ./data:/var/lib/apacheds
restart: unless-stopped

apacheds/docker-entrypoint.sh Executable file
View File

@ -0,0 +1,12 @@
set -e
if ! [ -d ${APACHEDS_INSTANCE_DIR} ]; then
gosu ${APACHEDS_USER} /opt/apacheds-${APACHEDS_VERSION}/bin/apacheds start ${APACHEDS_INSTANCE}
tail -n 0 -f ${APACHEDS_INSTANCE_DIR}/log/apacheds.log

apm-server/README.md Normal file
View File

@ -0,0 +1,15 @@
The [APM Server][1] receives data from [APM agents][2] and transforms them into
Elasticsearch documents. It does this by exposing an HTTP server endpoint to
which agents stream the APM data they collect. After the APM Server has
validated and processed events from the APM agents, the server transforms the
data into Elasticsearch documents and stores them in corresponding
Elasticsearch indices.
The APM Server works in conjunction with APM agents, Elasticsearch, and Kibana.
Please view the APM Overview for details on how these components work together.
[1]: https://www.elastic.co/guide/en/apm/server/current/index.html
[2]: https://www.elastic.co/guide/en/apm/agent/index.html

View File

@ -0,0 +1,841 @@
################### APM Server Configuration #########################
############################# APM Server ######################################
# Defines the host and port the server is listening on. use "unix:/path/to.sock" to listen on a unix domain socket.
host: ""
# Maximum permitted size in bytes of a request's header accepted by the server to be processed.
#max_header_size: 1048576
# Maximum permitted duration for reading an entire request.
#read_timeout: 30s
# Maximum permitted duration for writing a response.
#write_timeout: 30s
# Maximum duration in seconds before releasing resources when shutting down the server.
#shutdown_timeout: 5s
# Maximum allowed size in bytes of a single event
#max_event_size: 307200
# Maximum number of new connections to accept simultaneously (0 means unlimited)
# max_connections: 0
# Authorization token to be checked. If a token is set here the agents must
# send their token in the following format: Authorization: Bearer <secret-token>.
# It is recommended to use an authorization token in combination with SSL enabled,
# and save the token in the beats keystore.
#ssl.enabled: false
#ssl.certificate : "path/to/cert"
#ssl.key : "path/to/private_key"
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ""
# To enable real user monitoring (RUM) support set this to true.
#enabled: false
# Defines the maximum amount of events allowed to be sent to the APM Server RUM
# endpoint per ip per second. Defaults to 300.
#limit: 300
# An LRU cache is used to keep a rate limit per IP for the most recently seen IPs.
# This setting defines the number of unique IPs that can be tracked in the cache.
# Sites with many concurrent clients should consider increasing this limit. Defaults to 1000.
#lru_size: 1000
#-- General RUM settings
# Comma separated list of permitted origins for real user monitoring.
# User-agents will send an origin header that will be validated against this list.
# An origin is made of a protocol scheme, host and port, without the url path.
# Allowed origins in this setting can have * to match anything (eg.: http://*.example.com)
# If an item in the list is a single '*', everything will be allowed
#allow_origins : ['*']
# Regexp to be matched against a stacktrace frame's `file_name` and `abs_path` attributes.
# If the regexp matches, the stacktrace frame is considered to be a library frame.
#library_pattern: "node_modules|bower_components|~"
# Regexp to be matched against a stacktrace frame's `file_name`.
# If the regexp matches, the stacktrace frame is not used for calculating error groups.
# The default pattern excludes stacktrace frames that have a filename starting with '/webpack'
#exclude_from_grouping: "^/webpack"
# If a source map has previously been uploaded, source mapping is automatically applied
# to all error and transaction documents sent to the RUM endpoint.
# Source maps are always fetched from Elasticsearch, by default using the output.elasticsearch configuration.
# A different instance must be configured when using any other output.
# This setting only affects sourcemap reads - the output determines where sourcemaps are written.
# Array of hosts to connect to.
# Scheme and port can be left out and will be set to the default (http and 9200)
# In case you specify and additional path, the scheme is required: http://localhost:9200/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:9200
# hosts: ["localhost:9200"]
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# The `cache.expiration` determines how long a source map should be cached before fetching it again from Elasticsearch.
# Note that values configured without a time unit will be interpreted as seconds.
#expiration: 5m
# Source maps are stored in a seperate index.
# If the default index pattern for source maps at 'outputs.elasticsearch.indices'
# is changed, a matching index pattern needs to be specified here.
#index_pattern: "apm-*-sourcemap*"
# If set to true, APM Server augments data received by the agent with the original IP of the backend server,
# or the IP and User Agent of the real user (RUM requests). It defaults to true.
#capture_personal_data: true
# golang expvar support - https://golang.org/pkg/expvar/
# Set to true to Expose expvar
#enabled: false
# Url to expose expvar
#url: "/debug/vars"
# Instrumentation support for the server's HTTP endpoints and event publisher.
# Set to true to enable instrumentation of the APM server itself.
#enabled: false
# Environment in which the APM Server is running on (eg: staging, production, etc.)
#environment: ""
# Remote host to report instrumentation results to.
# - http://remote-apm-server:8200
# Remote apm-servers' secret_token
# Metrics endpoint
# Set to false to disable the metrics endpoint
#enabled: true
# A pipeline is a definition of processors applied to documents when writing them to Elasticsearch.
# Using pipelines involves two steps:
# (1) registering a pipeline
# (2) applying a pipeline during data ingestion (see `output.elasticsearch.pipelines`)
# You can manually register pipelines, or use this configuration option to ensure
# pipelines are loaded and registered at the configured Elasticsearch instances.
# Automatic pipeline registration requires
# * `output.elasticsearch` to be enabled and configured.
# * having the required Elasticsearch Processor Plugins installed.
# APM Server default pipelines require you to have the `Ingest User Agent Plugin` installed.
# Find the default pipeline configuration at `ingest/pipeline/definition.json`.
# Registers pipeline definitions in Elasticsearch on APM Server startup. Defaults to false.
#enabled: false
# Overwrites existing pipeline definitions in Elasticsearch. Defaults to true.
#overwrite: true
#================================ General ======================================
# Internal queue configuration for buffering events to be published.
# Queue type by name (default 'mem')
# The memory queue will present all available events (up to the outputs
# bulk_max_size) to the output, the moment the output is ready to server
# another batch of events.
# Max number of events the queue can buffer.
#events: 4096
# Hints the minimum number of events stored in the queue,
# before providing a batch of events to the outputs.
# A value of 0 (the default) ensures events are immediately available
# to be sent to the outputs.
#flush.min_events: 2048
# Maximum duration after which events are available to the outputs,
# if the number of events stored in the queue is < min_flush_events.
#flush.timeout: 1s
# Sets the maximum number of CPUs that can be executing simultaneously. The
# default is the number of logical CPUs available in the system.
#============================== Template =====================================
# A template is used to set the mapping in Elasticsearch
# By default template loading is enabled and the template is loaded.
# These settings can be adjusted to load your own template or overwrite existing ones.
# Set to false to disable template loading.
#setup.template.enabled: true
# Template name. By default the template name is "apm-%{[beat.version]}"
# The template name and pattern has to be set in case the elasticsearch index pattern is modified.
#setup.template.name: "apm-%{[beat.version]}"
# Template pattern. By default the template pattern is "apm-%{[beat.version]}-*" to apply to the default index settings.
# The first part is the version of the beat and then -* is used to match all daily indices.
# The template name and pattern has to be set in case the elasticsearch index pattern is modified.
#setup.template.pattern: "apm-%{[beat.version]}-*"
# Path to fields.yml file to generate the template
#setup.template.fields: "${path.config}/fields.yml"
# Overwrite existing template
#setup.template.overwrite: false
# Elasticsearch template settings
# A dictionary of settings to place into the settings.index dictionary
# of the Elasticsearch template. For more details, please check
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
#number_of_shards: 1
#codec: best_compression
#number_of_routing_shards: 30
#mapping.total_fields.limit: 2000
# A dictionary of settings for the _source field. For more details, please check
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html
#enabled: false
#============================== Deprecated: Dashboards =====================================
# Deprecated: Loading dashboards from the APM Server into Kibana is deprecated from 6.4 on.
# We suggest to use the Kibana UI to load APM Server dashboards and index pattern instead.
# These settings control loading the sample dashboards to the Kibana index. Loading
# the dashboards are disabled by default and can be enabled either by setting the
# options here, or by using the `-setup` CLI flag or the `setup` command.
#setup.dashboards.enabled: false
# The directory from where to read the dashboards. The default is the `kibana`
# folder in the home path.
#setup.dashboards.directory: ${path.home}/kibana
# The URL from where to download the dashboards archive. It is used instead of
# the directory if it has a value.
# The file archive (zip file) from where to read the dashboards. It is used instead
# of the directory when it has a value.
# The name of the Kibana index to use for setting the configuration. Default is ".kibana"
#setup.dashboards.kibana_index: .kibana
# The Elasticsearch index name. This overwrites the index name defined in the
# dashboards and index pattern. Example: testbeat-*
# The dashboards.index needs to be changed in case the elasticsearch index pattern is modified.
# Always use the Kibana API for loading the dashboards instead of autodetecting
# how to install the dashboards by first querying Elasticsearch.
#setup.dashboards.always_kibana: false
# If true and Kibana is not reachable at the time when dashboards are loaded,
# it will retry to reconnect to Kibana instead of exiting with an error.
#setup.dashboards.retry.enabled: false
# Duration interval between Kibana connection retries.
#setup.dashboards.retry.interval: 1s
# Maximum number of retries before exiting with an error, 0 for unlimited retrying.
#setup.dashboards.retry.maximum: 0
#============================== Deprecated: Kibana =====================================
# Deprecated: Starting with APM Server version 6.4, loading dashboards and index pattern
# from the APM Server into Kibana is deprecated.
# We suggest to use the Kibana UI to load APM Server dashboards and index pattern instead.
# Setting up a Kibana endpoint is not necessary when loading the index pattern and dashboards via the UI.
# Kibana Host
# Scheme and port can be left out and will be set to the default (http and 5601)
# In case you specify and additional path, the scheme is required: http://localhost:5601/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:5601
#host: "localhost:5601"
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# Optional HTTP Path
#path: ""
# Use SSL settings for HTTPS. Default is false.
#ssl.enabled: false
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# SSL configuration. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
#============================= Elastic Cloud ==================================
# These settings simplify using APM Server with the Elastic Cloud (https://cloud.elastic.co/).
# The cloud.id setting overwrites the `output.elasticsearch.hosts` and
# [deprecated] `setup.kibana.host` options.
# You can find the `cloud.id` in the Elastic Cloud web UI.
# The cloud.auth setting overwrites the `output.elasticsearch.username` and
# `output.elasticsearch.password` settings. The format is `<user>:<pass>`.
#================================ Outputs =====================================
# Configure what output to use when sending the data collected by the beat.
#-------------------------- Elasticsearch output ------------------------------
hosts: ["elasticsearch:9200"]
# Boolean flag to enable or disable the output module.
#enabled: true
# Set gzip compression level.
#compression_level: 0
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# Dictionary of HTTP parameters to pass within the url with index operations.
#param1: value1
#param2: value2
# Number of workers per Elasticsearch host.
#worker: 1
# By using the configuration below, apm documents are stored to separate indices,
# depending on their `processor.event`:
# - error
# - transaction
# - span
# - sourcemap
# The indices are all prefixed with `apm-%{[beat.version]}`.
# To allow managing indices based on their age, all indices (except for sourcemaps)
# end with the information of the day they got indexed.
# e.g. "apm-6.3.0-transaction-2018.03.20"
# Be aware that you can only specify one Elasticsearch template and one Kibana Index Pattern,
# In case you modify the index patterns you must also update those configurations accordingly,
# as they need to be aligned:
# * `setup.template.name`
# * `setup.template.pattern`
# * `setup.dashboards.index`
#index: "apm-%{[beat.version]}-%{+yyyy.MM.dd}"
- index: "apm-%{[beat.version]}-sourcemap"
processor.event: "sourcemap"
- index: "apm-%{[beat.version]}-error-%{+yyyy.MM.dd}"
processor.event: "error"
- index: "apm-%{[beat.version]}-transaction-%{+yyyy.MM.dd}"
processor.event: "transaction"
- index: "apm-%{[beat.version]}-span-%{+yyyy.MM.dd}"
processor.event: "span"
- index: "apm-%{[beat.version]}-metric-%{+yyyy.MM.dd}"
processor.event: "metric"
- index: "apm-%{[beat.version]}-onboarding-%{+yyyy.MM.dd}"
processor.event: "onboarding"
# A pipeline is a definition of processors applied to documents when writing them to Elasticsearch.
# APM Server comes with a default pipeline definition, located at `ingets/pipeline/definition.json`.
# Pipelines are disabled by default. To make use of them you have to:
# (1) ensure pipelines are registered in Elasticsearch, see `apm-server.register.ingest.pipeline`
# (2) enable the following:
#- pipeline: "apm_user_agent"
# Optional HTTP Path
#path: "/elasticsearch"
# Custom HTTP headers to add to each request
# X-My-Header: Contents of the header
# Proxy server url
#proxy_url: http://proxy:3128
# The number of times a particular Elasticsearch index operation is attempted. If
# the indexing operation doesn't succeed after this many retries, the events are
# dropped. The default is 3.
#max_retries: 3
# The maximum number of events to bulk in a single Elasticsearch bulk API index request.
# The default is 50.
#bulk_max_size: 50
# The number of seconds to wait before trying to reconnect to Elasticsearch
# after a network error. After waiting backoff.init seconds, apm-server
# tries to reconnect. If the attempt fails, the backoff timer is increased
# exponentially up to backoff.max. After a successful connection, the backoff
# timer is reset. The default is 1s.
#backoff.init: 1s
# The maximum number of seconds to wait before attempting to connect to
# Elasticsearch after a network error. The default is 60s.
#backoff.max: 60s
# Configure http request timeout before failing an request to Elasticsearch.
#timeout: 90
# Use SSL settings for HTTPS. Default is false.
#ssl.enabled: false
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# SSL configuration. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#----------------------------- Console output ---------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# Pretty print json event
#pretty: false
#----------------------------- Logstash output ---------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# The Logstash hosts
#hosts: ["localhost:5044"]
# Number of workers per Logstash host.
#worker: 1
# Set gzip compression level.
#compression_level: 3
# Configure escaping html symbols in strings.
#escape_html: true
# Optional maximum time to live for a connection to Logstash, after which the
# connection will be re-established. A value of `0s` (the default) will
# disable this feature.
# Not yet supported for async connections (i.e. with the "pipelining" option set)
#ttl: 30s
# Optional load balance the events between the Logstash hosts. Default is false.
#loadbalance: false
# Number of batches to be sent asynchronously to Logstash while processing
# new batches.
#pipelining: 2
# If enabled only a subset of events in a batch of events is transferred per
# group. The number of events to be sent increases up to `bulk_max_size`
# if no error is encountered.
#slow_start: false
# The number of seconds to wait before trying to reconnect to Logstash
# after a network error. After waiting backoff.init seconds, apm-server
# tries to reconnect. If the attempt fails, the backoff timer is increased
# exponentially up to backoff.max. After a successful connection, the backoff
# timer is reset. The default is 1s.
#backoff.init: 1s
# The maximum number of seconds to wait before attempting to connect to
# Logstash after a network error. The default is 60s.
#backoff.max: 60s
# Optional index name. The default index name is set to apm
# in all lowercase.
#index: 'apm'
# SOCKS5 proxy server URL
#proxy_url: socks5://user:password@socks5-server:2233
# Resolve names locally when using a proxy server. Defaults to false.
#proxy_use_local_resolver: false
# Enable SSL support. SSL is automatically enabled, if any SSL setting is set.
#ssl.enabled: true
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# Optional SSL configuration options. SSL is off by default.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#------------------------------- Kafka output ----------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# The list of Kafka broker addresses from where to fetch the cluster metadata.
# The cluster metadata contain the actual Kafka brokers events are published
# to.
#hosts: ["localhost:9092"]
# The Kafka topic used for produced events. The setting can be a format string
# using any event field. To set the topic from document type use `%{[type]}`.
#topic: beats
# The Kafka event key setting. Use format string to create unique event key.
# By default no event key will be generated.
#key: ''
# The Kafka event partitioning strategy. Default hashing strategy is `hash`
# using the `output.kafka.key` setting or randomly distributes events if
# `output.kafka.key` is not configured.
# If enabled, events will only be published to partitions with reachable
# leaders. Default is false.
#reachable_only: false
# Configure alternative event field names used to compute the hash value.
# If empty `output.kafka.key` setting will be used.
# Default value is empty list.
#hash: []
# Authentication details. Password is required if username is set.
#username: ''
#password: ''
# Kafka version libbeat is assumed to run against. Defaults to the "1.0.0".
#version: '1.0.0'
# Configure JSON encoding
# Pretty print json event
#pretty: false
# Configure escaping html symbols in strings.
#escape_html: true
# Metadata update configuration. Metadata do contain leader information
# deciding which broker to use when publishing.
# Max metadata request retry attempts when cluster is in middle of leader
# election. Defaults to 3 retries.
#retry.max: 3
# Waiting time between retries during leader elections. Default is 250ms.
#retry.backoff: 250ms
# Refresh metadata interval. Defaults to every 10 minutes.
#refresh_frequency: 10m
# The number of concurrent load-balanced Kafka output workers.
#worker: 1
# The number of times to retry publishing an event after a publishing failure.
# After the specified number of retries, the events are typically dropped.
# Some Beats, such as Filebeat, ignore the max_retries setting and retry until
# all events are published. Set max_retries to a value less than 0 to retry
# until all events are published. The default is 3.
#max_retries: 3
# The maximum number of events to bulk in a single Kafka request. The default
# is 2048.
#bulk_max_size: 2048
# The number of seconds to wait for responses from the Kafka brokers before
# timing out. The default is 30s.
#timeout: 30s
# The maximum duration a broker will wait for number of required ACKs. The
# default is 10s.
#broker_timeout: 10s
# The number of messages buffered for each Kafka broker. The default is 256.
#channel_buffer_size: 256
# The keep-alive period for an active network connection. If 0s, keep-alives
# are disabled. The default is 0 seconds.
#keep_alive: 0
# Sets the output compression codec. Must be one of none, snappy and gzip. The
# default is gzip.
#compression: gzip
# The maximum permitted size of JSON-encoded messages. Bigger messages will be
# dropped. The default value is 1000000 (bytes). This value should be equal to
# or less than the broker's message.max.bytes.
#max_message_bytes: 1000000
# The ACK reliability level required from broker. 0=no response, 1=wait for
# local commit, -1=wait for all replicas to commit. The default is 1. Note:
# If set to 0, no ACKs are returned by Kafka. Messages might be lost silently
# on error.
#required_acks: 1
# The configurable ClientID used for logging, debugging, and auditing
# purposes. The default is "beats".
#client_id: beats
# Enable SSL support. SSL is automatically enabled, if any SSL setting is set.
#ssl.enabled: true
# Optional SSL configuration options. SSL is off by default.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#================================= Paths ======================================
# The home path for the apm-server installation. This is the default base path
# for all other path settings and for miscellaneous files that come with the
# distribution (for example, the sample dashboards).
# If not set by a CLI flag or in the configuration file, the default for the
# home path is the location of the binary.
# The configuration path for the apm-server installation. This is the default
# base path for configuration files, including the main YAML configuration file
# and the Elasticsearch template file. If not set by a CLI flag or in the
# configuration file, the default for the configuration path is the home path.
#path.config: ${path.home}
# The data path for the apm-server installation. This is the default base path
# for all the files in which apm-server needs to store its data. If not set by a
# CLI flag or in the configuration file, the default for the data path is a data
# subdirectory inside the home path.
#path.data: ${path.home}/data
# The logs path for a apm-server installation. This is the default location for
# the Beat's log files. If not set by a CLI flag or in the configuration file,
# the default for the logs path is a logs subdirectory inside the home path.
#path.logs: ${path.home}/logs
#================================ Logging ======================================
# There are three options for the log output: syslog, file, stderr.
# Under Windows systems, the log files are per default sent to the file output,
# under all other system per default to syslog.
# Sets log level. The default log level is info.
# Available log levels are: error, warning, info, debug
#logging.level: info
# Enable debug output for selected components. To enable all selectors use ["*"]
# Other available selectors are "beat", "publish", "service"
# Multiple selectors can be chained.
#logging.selectors: [ ]
# Send all logging output to syslog. The default is false.
#logging.to_syslog: true
# If enabled, apm-server periodically logs its internal metrics that have changed
# in the last period. For each metric that changed, the delta from the value at
# the beginning of the period is logged. Also, the total values for
# all non-zero internal metrics are logged on shutdown. The default is true.
#logging.metrics.enabled: false
# The period after which to log the internal metrics. The default is 30s.
#logging.metrics.period: 30s
# Logging to rotating files. Set logging.to_files to false to disable logging to
# files.
#logging.to_files: true
# Configure the path where the logs are written. The default is the logs directory
# under the home path (the binary location).
#path: /var/log/apm-server
# The name of the files where the logs are written to.
#name: apm-server
# Configure log file size limit. If limit is reached, log file will be
# automatically rotated
#rotateeverybytes: 10485760 # = 10MB
# Number of rotated log files to keep. Oldest files will be deleted first.
#keepfiles: 7
# The permissions mask to apply when rotating log files. The default value is 0600.
# Must be a valid Unix-style file permissions mask expressed in octal notation.
#permissions: 0600
# Set to true to log messages in json format.
#logging.json: false
#================================ HTTP Endpoint ======================================
# Each beat can expose internal metrics through a HTTP endpoint. For security
# reasons the endpoint is disabled by default. This feature is currently experimental.
# Stats can be access through http://localhost:5066/stats . For pretty JSON output
# append ?pretty to the URL.
# Defines if the HTTP endpoint is enabled.
#http.enabled: false
# The HTTP endpoint will bind to this hostname or IP address. It is recommended to use only localhost.
#http.host: localhost
# Port on which the HTTP endpoint will bind. Default is 5066.
#http.port: 5066
#============================== Xpack Monitoring ===============================
# APM server can export internal metrics to a central Elasticsearch monitoring
# cluster. This requires xpack monitoring to be enabled in Elasticsearch. The
# reporting is disabled by default.
# Set to true to enable the monitoring reporter.
#xpack.monitoring.enabled: false
# Uncomment to send the metrics to Elasticsearch. Most settings from the
# Elasticsearch output are accepted here as well. Any setting that is not set is
# automatically inherited from the Elasticsearch output configuration, so if you
# have the Elasticsearch output configured, you can simply uncomment the
# following line.
# username: "apm_system"
# password: ""

View File

@ -0,0 +1,9 @@
image: docker.elastic.co/apm/apm-server:6.5.4
- "8200:8200"
- ./data:/usr/share/apm-server
- elasticsearch:
restart: always

artifactory/README.md Normal file
View File

@ -0,0 +1,7 @@
[JFrog Artifactory][1] is the only Universal Repository Manager supporting all major
packaging formats, build tools and CI servers.
[1]: https://www.jfrog.com/confluence/display/RTF/Welcome+to+Artifactory

View File

@ -0,0 +1,35 @@
# https://www.jfrog.com/confluence/display/RTF/Installing+with+Docker
version: "3.7"
image: postgres:12-alpine
- "5432:5432"
- ./data/postgres:/var/lib/postgresql/data
- POSTGRES_DB=artifactory
- POSTGRES_USER=artifactory
restart: always
image: docker.bintray.io/jfrog/artifactory-oss:6.13.1
- "8081:8081"
- ./data/artifactory:/var/opt/jfrog/artifactory
- DB_TYPE=postgresql
- DB_HOST=postgres
- DB_USER=artifactory
- DB_PASSWORD=artifactory
- EXTRA_JAVA_OPTIONS=-Xms512m -Xmx2g -Xss256k -XX:+UseG1GC
- postgres
restart: always

awx/README.md Normal file
View File

@ -0,0 +1,9 @@
[AWX][1] provides a web-based user interface, REST API, and task engine built
on top of Ansible. It is the upstream project for [Tower][2], a commercial
derivative of AWX.
[1]: https://github.com/ansible/awx
[2]: https://www.ansible.com/tower

awx/data/etc/SECRET_KEY Normal file
View File

@ -0,0 +1 @@

View File

@ -0,0 +1,29 @@
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': "awx",
'USER': "awx",
'PASSWORD': "awxpass",
'HOST': "postgres",
'PORT': "5432",
BROKER_URL = 'amqp://guest:guest@rabbitmq:5672/awx'
'default': {'BACKEND': 'asgi_amqp.AMQPChannelLayer',
'ROUTING': 'awx.main.routing.channel_routing',
'CONFIG': {'url': BROKER_URL}}
'default': {
'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
'LOCATION': 'memcached:11211'
'ephemeral': {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',

View File

@ -0,0 +1,12 @@

awx/docker-compose.yml Normal file
View File

@ -0,0 +1,69 @@
# https://github.com/ansible/awx/raw/devel/installer/roles/local_docker/templates/docker-compose.yml.j2
version: '2'
image: ansible/awx_web
container_name: awx_web
hostname: awxweb
user: root
- "8052:8052"
- ./data/etc/SECRET_KEY:/etc/tower/SECRET_KEY
- ./data/etc/environment.sh:/etc/tower/conf.d/environment.sh
- ./data/etc/credentials.py:/etc/tower/conf.d/credentials.py
- ./data/awx:/var/lib/awx/projects
- rabbitmq
- memcached
- postgres
restart: unless-stopped
image: ansible/awx_task
container_name: awx_task
hostname: awx
user: root
- ./data/etc/SECRET_KEY:/etc/tower/SECRET_KEY
- ./data/etc/environment.sh:/etc/tower/conf.d/environment.sh
- ./data/etc/credentials.py:/etc/tower/conf.d/credentials.py
- ./data/awx:/var/lib/awx/projects
- rabbitmq
- memcached
- web
- postgres
restart: unless-stopped
image: ansible/awx_rabbitmq
container_name: awx_rabbitmq
restart: unless-stopped
image: memcached:alpine
container_name: awx_memcached
restart: unless-stopped
image: postgres:12-alpine
container_name: awx_postgres
- ./data/postgres:/var/lib/postgresql/data/pgdata:Z
PGDATA: /var/lib/postgresql/data/pgdata
restart: unless-stopped

browserless/README.md Normal file
View File

@ -0,0 +1,58 @@
[Browserless][1] makes it easy to run your puppeteer scripts in an optimized
way. It takes care of all the binaries and managing of Chrome so you don't have
## docker-compose.yml
image: browserless/chrome
- "3000:3000"
- DEBUG=browserless/chrome
shm_size: 2gb
restart: always
## screenshot.js
'use strict';
const puppeteer = require('puppeteer');
(async() => {
const browser = await puppeteer.connect({browserWSEndpoint: 'ws://localhost:3000'});
const page = await browser.newPage();
await page.goto('https://www.google.com/', {waitUntil: 'networkidle2'});
await page.screenshot({path: 'google.png', fullPage: true});
await browser.close();
## Up and Running
$ docker-compose up -d
$ PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true npm install puppeteer
$ node screenshot.js
$ imgcat google.png
$ http \
url=https://www.youtube.com \
options:='{"fullPage":true}' \
gotoOptions:='{"waitUntil":"networkidle2"}' > youtube.png
$ imgcat youtube.png
[1]: https://docs.browserless.io/

View File

@ -0,0 +1,14 @@
image: browserless/chrome
- "3000:3000"
- DEBUG=browserless/chrome
shm_size: 2gb
restart: always

View File

@ -0,0 +1,25 @@
version: '3.5'
image: browserless/chrome:latest
- "3000:3000"
- DEBUG=browserless/chrome
memory: 1024M
replicas: 56
condition: on-failure
- subnet:

code-server/README.md Normal file
View File

@ -0,0 +1,7 @@
[code-server][1] is VS Code running on a remote server, accessible through the browser.
[1]: https://github.com/cdr/code-server

View File

@ -0,0 +1,9 @@
image: codercom/code-server
command: --allow-http --no-auth --disable-telemetry
- "8443:8443"
- ./data:/home/coder/project
- /home/coder/local/share/code-server
restart: unless-stopped

View File

@ -8,7 +8,7 @@ MAINTAINER kev <noreply@easypi.pro>
ENV DANTE_URL https://www.inet.no/dante/files/dante-$DANTE_VER.tar.gz
ENV DANTE_SHA baa25750633a7f9f37467ee43afdf7a95c80274394eddd7dcd4e1542aa75caad
ENV DANTE_SHA 4c97cff23e5c9b00ca1ec8a95ab22972813921d7fbf60fc453e3e06382fc38a7
ENV DANTE_FILE dante.tar.gz
ENV DANTE_DEPS build-essential curl

View File

@ -2,16 +2,18 @@
# Dockerfile for building docker-compose-arm
FROM resin/rpi-raspbian:jessie
FROM arm32v7/debian:stretch
MAINTAINER EasyPi Software Foundation
RUN set -xe \
&& apt-get update \
&& apt-get install -y build-essential \
curl \
libffi-dev \
libssl-dev \
python-dev \
zlib1g-dev \
&& curl https://bootstrap.pypa.io/get-pip.py | python \

View File

@ -5,43 +5,36 @@ drone
## github
image: drone/drone:1.5-linux-amd64
- "8080:80"
- ./data:/data
- DRONE_SERVER_HOST=drone.easypi.pro
- DRONE_GITHUB_SERVER=https://github.com
restart: always
# Github » Settings » Applications » Developer applications » Register new application
Application name: drone
Homepage URL: http://drone.easypi.info/
Homepage URL: http://drone.easypi.pro/
Application description: Drone is a Continuous Integration platform built on Docker, written in Go
Authorization callback URL: http://drone.easypi.info/authorize
Authorization callback URL: http://drone.easypi.pro/authorize
Client ID: ... (generated by github)
Client Secret: ... (generated by github)
## docker-compose.yml
image: drone/drone
- "8000:8000"
- ./drone:/var/lib/drone
- /var/run/docker.sock:/var/run/docker.sock
- REMOTE_CONFIG=https://github.com?client_id=...&client_secret=...
# - REMOTE_CONFIG=https://git.easypi.info/?open=false
- DEBUG=false
restart: always
image: drone/drone-wall
- "8080:80"
restart: always
> Drone will register gogs webhooks automatically, you don't need to do it manually.
## nginx/sites-enabled/drone
@ -49,7 +42,7 @@ wall:
server {
listen 80;
server_name drone.easypi.info;
server_name drone.easypi.pro;
location / {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $remote_addr;
@ -57,7 +50,7 @@ server {
proxy_set_header Host $http_host;
proxy_set_header Origin "";
proxy_redirect off;
proxy_http_version 1.1;
proxy_buffering off;
@ -65,14 +58,6 @@ server {
chunked_transfer_encoding off;
server {
listen 80;
server_name wall.easypi.info;
location / {
## up and running
@ -81,13 +66,10 @@ server {
# server
$ cd ~/fig/drone/
$ docker-compose up -d
$ docker-compose logs
$ docker-compose logs -f
# client (login with remote driver credential)
$ firefox http://drone.easypi.info/
# dashboard
$ firefox http://wall.easypi.info/
$ firefox http://drone.easypi.pro/
[1]: http://readme.drone.io/usage/overview/
[1]: https://readme.drone.io

View File

@ -1,23 +0,0 @@
# Dockerfile for drone-arm
FROM easypi/alpine-arm
MAINTAINER EasyPi Software Foundation
RUN apk add --no-cache ca-certificates
ADD drone /drone
DATABASE_CONFIG=/var/lib/drone/drone.sqlite \
GODEBUG=netdns=go \
ENTRYPOINT ["/drone"]
CMD ["server"]

View File

@ -1,24 +1,6 @@
image: armdrone/drone
- "8000:80"
- ./data:/var/lib/drone
- /var/run/docker.sock:/var/run/docker.sock
- REMOTE_CONFIG=http://git.easypi.info:3000/?open=false
- PLUGIN_FILTER=armdrone/*
- GIN_MODE=release
- git.easypi.info:
restart: always
## Deploy Key Setup

View File

@ -1,45 +1,38 @@
version: '2'
version: '3.7'
image: quay.io/armswarm/drone:0.8
image: drone/drone:1.6-linux-arm
- "8000:8000"
- "9000:9000"
- "8080:80"
- ./data/drone:/var/lib/drone
- ./data/drone:/data
- DRONE_HOST=https://drone.easypi.pro
- DRONE_DATABASE_DATASOURCE=root:root@tcp(mariadb:3306)/drone?parseTime=true
- DRONE_GOGS_URL=https://gogs.easypi.pro
- DRONE_SECRET=0123456789
- DRONE_DATABASE_DATASOURCE=/data/database.sqlite
- DRONE_GOGS_SERVER=http://gogs:3000
- mariadb
- postgres
- gogs
restart: always
image: drone/agent:linux-arm
image: drone/agent:1.6-linux-arm
- /var/run/docker.sock:/var/run/docker.sock
- DRONE_SERVER=drone-server:9000
- DRONE_SECRET=0123456789
- drone-server
restart: always
image: easypi/mariadb-arm
- "3306:3306"
- drone
restart: always
@ -50,12 +43,3 @@ services:
- ./data/gogs:/data
restart: always
image: easypi/phpmyadmin-arm
- "8080:80"
- PMA_HOST=mariadb
- PMA_PORT=3306
restart: always

Binary file not shown.

View File

@ -1,18 +1,57 @@
image: drone/drone
- "8000:80"
- ./data:/var/lib/drone
- /var/run/docker.sock:/var/run/docker.sock
- REMOTE_CONFIG=http://git.easypi.info:3000/?open=false
- DEBUG=false
restart: always
version: "3.7"
image: drone/drone-wall
- "8080:80"
restart: always
image: drone/drone:1.6-linux-amd64
- "8080:80"
- ./data/drone:/data
- DRONE_DATABASE_DATASOURCE=postgres://drone:drone@postgres:5432/drone?sslmode=disable
- DRONE_GOGS_SERVER=http://gogs:3000
- postgres
- gogs
restart: always
image: drone/agent:1.6-linux-amd64
- /var/run/docker.sock:/var/run/docker.sock
- drone
restart: always
image: postgres:12-alpine
- "5432:5432"
- ./data/postgres:/var/lib/postgresql/data
restart: always
image: gogs/gogs
- "2222:22"
- "3000:3000"
- ./data/gogs:/data
restart: always

View File

@ -1,7 +1,10 @@
[ElastAlert][1] is a simple framework for alerting on anomalies, spikes, or other
patterns of interest from data in Elasticsearch.
## up and running
$ docker-compose up -d
@ -9,5 +12,9 @@ $ docker-compose exec elastalert sh
>>> cd /opt/elastalert/rules
>>> elastalert-test-rule xxx.yaml
>>> exit
$ docker-compose restart
> ElastAlert will also load new rules, stop running missing rules, and restart
> modified rules as the files in this folder change.
[1]: http://elastalert.readthedocs.io/en/latest/

View File

@ -1,13 +1,13 @@
image: bitsensor/elastalert
image: bitsensor/elastalert:2.0.0
- "3030:3030"
- "3333:3333"
- ./data/config.yaml:/opt/elastalert/config.yaml
- ./data/rules:/opt/elastalert/rules
- ES_HOST=elasticsearch
- ES_PORT=9200
- elk_elasticsearch_1:elasticsearch
# extra_hosts:
# - elasticsearch:
restart: always

View File

@ -4,6 +4,7 @@ ELK
- Elasticsearch
- Logstash
- Kibana
- APM Server
## How it works
@ -21,6 +22,8 @@ $ docker-compose up -d
## Delete indices older than 7 days
File: delete-indices.yml
@ -45,6 +48,14 @@ actions:
unit_count: 7
File: ~/.curator/curator.yml
$ pip install elasticsearch-curator
$ curator delete-indices.yml

elk/data/apm-server.yml Normal file
View File

@ -0,0 +1,841 @@
################### APM Server Configuration #########################
############################# APM Server ######################################
# Defines the host and port the server is listening on. use "unix:/path/to.sock" to listen on a unix domain socket.
host: ""
# Maximum permitted size in bytes of a request's header accepted by the server to be processed.
#max_header_size: 1048576
# Maximum permitted duration for reading an entire request.
#read_timeout: 30s
# Maximum permitted duration for writing a response.
#write_timeout: 30s
# Maximum duration in seconds before releasing resources when shutting down the server.
#shutdown_timeout: 5s
# Maximum allowed size in bytes of a single event
#max_event_size: 307200
# Maximum number of new connections to accept simultaneously (0 means unlimited)
# max_connections: 0
# Authorization token to be checked. If a token is set here the agents must
# send their token in the following format: Authorization: Bearer <secret-token>.
# It is recommended to use an authorization token in combination with SSL enabled,
# and save the token in the beats keystore.
secret_token: "1870d518-6b62-450f-82b7-44fda593963f"
#ssl.enabled: false
#ssl.certificate : "path/to/cert"
#ssl.key : "path/to/private_key"
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ""
# To enable real user monitoring (RUM) support set this to true.
#enabled: false
# Defines the maximum amount of events allowed to be sent to the APM Server RUM
# endpoint per ip per second. Defaults to 300.
#limit: 300
# An LRU cache is used to keep a rate limit per IP for the most recently seen IPs.
# This setting defines the number of unique IPs that can be tracked in the cache.
# Sites with many concurrent clients should consider increasing this limit. Defaults to 1000.
#lru_size: 1000
#-- General RUM settings
# Comma separated list of permitted origins for real user monitoring.
# User-agents will send an origin header that will be validated against this list.
# An origin is made of a protocol scheme, host and port, without the url path.
# Allowed origins in this setting can have * to match anything (eg.: http://*.example.com)
# If an item in the list is a single '*', everything will be allowed
#allow_origins : ['*']
# Regexp to be matched against a stacktrace frame's `file_name` and `abs_path` attributes.
# If the regexp matches, the stacktrace frame is considered to be a library frame.
#library_pattern: "node_modules|bower_components|~"
# Regexp to be matched against a stacktrace frame's `file_name`.
# If the regexp matches, the stacktrace frame is not used for calculating error groups.
# The default pattern excludes stacktrace frames that have a filename starting with '/webpack'
#exclude_from_grouping: "^/webpack"
# If a source map has previously been uploaded, source mapping is automatically applied
# to all error and transaction documents sent to the RUM endpoint.
# Source maps are always fetched from Elasticsearch, by default using the output.elasticsearch configuration.
# A different instance must be configured when using any other output.
# This setting only affects sourcemap reads - the output determines where sourcemaps are written.
# Array of hosts to connect to.
# Scheme and port can be left out and will be set to the default (http and 9200)
# In case you specify and additional path, the scheme is required: http://localhost:9200/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:9200
# hosts: ["localhost:9200"]
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# The `cache.expiration` determines how long a source map should be cached before fetching it again from Elasticsearch.
# Note that values configured without a time unit will be interpreted as seconds.
#expiration: 5m
# Source maps are stored in a seperate index.
# If the default index pattern for source maps at 'outputs.elasticsearch.indices'
# is changed, a matching index pattern needs to be specified here.
#index_pattern: "apm-*-sourcemap*"
# If set to true, APM Server augments data received by the agent with the original IP of the backend server,
# or the IP and User Agent of the real user (RUM requests). It defaults to true.
#capture_personal_data: true
# golang expvar support - https://golang.org/pkg/expvar/
# Set to true to Expose expvar
#enabled: false
# Url to expose expvar
#url: "/debug/vars"
# Instrumentation support for the server's HTTP endpoints and event publisher.
# Set to true to enable instrumentation of the APM server itself.
#enabled: false
# Environment in which the APM Server is running on (eg: staging, production, etc.)
#environment: ""
# Remote host to report instrumentation results to.
# - http://remote-apm-server:8200
# Remote apm-servers' secret_token
# Metrics endpoint
# Set to false to disable the metrics endpoint
#enabled: true
# A pipeline is a definition of processors applied to documents when writing them to Elasticsearch.
# Using pipelines involves two steps:
# (1) registering a pipeline
# (2) applying a pipeline during data ingestion (see `output.elasticsearch.pipelines`)
# You can manually register pipelines, or use this configuration option to ensure
# pipelines are loaded and registered at the configured Elasticsearch instances.
# Automatic pipeline registration requires
# * `output.elasticsearch` to be enabled and configured.
# * having the required Elasticsearch Processor Plugins installed.
# APM Server default pipelines require you to have the `Ingest User Agent Plugin` installed.
# Find the default pipeline configuration at `ingest/pipeline/definition.json`.
# Registers pipeline definitions in Elasticsearch on APM Server startup. Defaults to false.
#enabled: false
# Overwrites existing pipeline definitions in Elasticsearch. Defaults to true.
#overwrite: true
#================================ General ======================================
# Internal queue configuration for buffering events to be published.
# Queue type by name (default 'mem')
# The memory queue will present all available events (up to the outputs
# bulk_max_size) to the output, the moment the output is ready to server
# another batch of events.
# Max number of events the queue can buffer.
#events: 4096
# Hints the minimum number of events stored in the queue,
# before providing a batch of events to the outputs.
# A value of 0 (the default) ensures events are immediately available
# to be sent to the outputs.
#flush.min_events: 2048
# Maximum duration after which events are available to the outputs,
# if the number of events stored in the queue is < min_flush_events.
#flush.timeout: 1s
# Sets the maximum number of CPUs that can be executing simultaneously. The
# default is the number of logical CPUs available in the system.
#============================== Template =====================================
# A template is used to set the mapping in Elasticsearch
# By default template loading is enabled and the template is loaded.
# These settings can be adjusted to load your own template or overwrite existing ones.
# Set to false to disable template loading.
#setup.template.enabled: true
# Template name. By default the template name is "apm-%{[beat.version]}"
# The template name and pattern has to be set in case the elasticsearch index pattern is modified.
#setup.template.name: "apm-%{[beat.version]}"
# Template pattern. By default the template pattern is "apm-%{[beat.version]}-*" to apply to the default index settings.
# The first part is the version of the beat and then -* is used to match all daily indices.
# The template name and pattern has to be set in case the elasticsearch index pattern is modified.
#setup.template.pattern: "apm-%{[beat.version]}-*"
# Path to fields.yml file to generate the template
#setup.template.fields: "${path.config}/fields.yml"
# Overwrite existing template
#setup.template.overwrite: false
# Elasticsearch template settings
# A dictionary of settings to place into the settings.index dictionary
# of the Elasticsearch template. For more details, please check
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
#number_of_shards: 1
#codec: best_compression
#number_of_routing_shards: 30
#mapping.total_fields.limit: 2000
# A dictionary of settings for the _source field. For more details, please check
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html
#enabled: false
#============================== Deprecated: Dashboards =====================================
# Deprecated: Loading dashboards from the APM Server into Kibana is deprecated from 6.4 on.
# We suggest to use the Kibana UI to load APM Server dashboards and index pattern instead.
# These settings control loading the sample dashboards to the Kibana index. Loading
# the dashboards are disabled by default and can be enabled either by setting the
# options here, or by using the `-setup` CLI flag or the `setup` command.
#setup.dashboards.enabled: false
# The directory from where to read the dashboards. The default is the `kibana`
# folder in the home path.
#setup.dashboards.directory: ${path.home}/kibana
# The URL from where to download the dashboards archive. It is used instead of
# the directory if it has a value.
# The file archive (zip file) from where to read the dashboards. It is used instead
# of the directory when it has a value.
# The name of the Kibana index to use for setting the configuration. Default is ".kibana"
#setup.dashboards.kibana_index: .kibana
# The Elasticsearch index name. This overwrites the index name defined in the
# dashboards and index pattern. Example: testbeat-*
# The dashboards.index needs to be changed in case the elasticsearch index pattern is modified.
# Always use the Kibana API for loading the dashboards instead of autodetecting
# how to install the dashboards by first querying Elasticsearch.
#setup.dashboards.always_kibana: false
# If true and Kibana is not reachable at the time when dashboards are loaded,
# it will retry to reconnect to Kibana instead of exiting with an error.
#setup.dashboards.retry.enabled: false
# Duration interval between Kibana connection retries.
#setup.dashboards.retry.interval: 1s
# Maximum number of retries before exiting with an error, 0 for unlimited retrying.
#setup.dashboards.retry.maximum: 0
#============================== Deprecated: Kibana =====================================
# Deprecated: Starting with APM Server version 6.4, loading dashboards and index pattern
# from the APM Server into Kibana is deprecated.
# We suggest to use the Kibana UI to load APM Server dashboards and index pattern instead.
# Setting up a Kibana endpoint is not necessary when loading the index pattern and dashboards via the UI.
# Kibana Host
# Scheme and port can be left out and will be set to the default (http and 5601)
# In case you specify and additional path, the scheme is required: http://localhost:5601/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:5601
#host: "localhost:5601"
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# Optional HTTP Path
#path: ""
# Use SSL settings for HTTPS. Default is false.
#ssl.enabled: false
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# SSL configuration. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
#============================= Elastic Cloud ==================================
# These settings simplify using APM Server with the Elastic Cloud (https://cloud.elastic.co/).
# The cloud.id setting overwrites the `output.elasticsearch.hosts` and
# [deprecated] `setup.kibana.host` options.
# You can find the `cloud.id` in the Elastic Cloud web UI.
# The cloud.auth setting overwrites the `output.elasticsearch.username` and
# `output.elasticsearch.password` settings. The format is `<user>:<pass>`.
#================================ Outputs =====================================
# Configure what output to use when sending the data collected by the beat.
#-------------------------- Elasticsearch output ------------------------------
hosts: ["elasticsearch:9200"]
# Boolean flag to enable or disable the output module.
#enabled: true
# Set gzip compression level.
#compression_level: 0
# Optional protocol and basic auth credentials.
#protocol: "https"
#username: "elastic"
#password: "changeme"
# Dictionary of HTTP parameters to pass within the url with index operations.
#param1: value1
#param2: value2
# Number of workers per Elasticsearch host.
#worker: 1
# By using the configuration below, apm documents are stored to separate indices,
# depending on their `processor.event`:
# - error
# - transaction
# - span
# - sourcemap
# The indices are all prefixed with `apm-%{[beat.version]}`.
# To allow managing indices based on their age, all indices (except for sourcemaps)
# end with the information of the day they got indexed.
# e.g. "apm-6.3.0-transaction-2018.03.20"
# Be aware that you can only specify one Elasticsearch template and one Kibana Index Pattern,
# In case you modify the index patterns you must also update those configurations accordingly,
# as they need to be aligned:
# * `setup.template.name`
# * `setup.template.pattern`
# * `setup.dashboards.index`
#index: "apm-%{[beat.version]}-%{+yyyy.MM.dd}"
- index: "apm-%{[beat.version]}-sourcemap"
processor.event: "sourcemap"
- index: "apm-%{[beat.version]}-error-%{+yyyy.MM.dd}"
processor.event: "error"
- index: "apm-%{[beat.version]}-transaction-%{+yyyy.MM.dd}"
processor.event: "transaction"
- index: "apm-%{[beat.version]}-span-%{+yyyy.MM.dd}"
processor.event: "span"
- index: "apm-%{[beat.version]}-metric-%{+yyyy.MM.dd}"
processor.event: "metric"
- index: "apm-%{[beat.version]}-onboarding-%{+yyyy.MM.dd}"
processor.event: "onboarding"
# A pipeline is a definition of processors applied to documents when writing them to Elasticsearch.
# APM Server comes with a default pipeline definition, located at `ingets/pipeline/definition.json`.
# Pipelines are disabled by default. To make use of them you have to:
# (1) ensure pipelines are registered in Elasticsearch, see `apm-server.register.ingest.pipeline`
# (2) enable the following:
#- pipeline: "apm_user_agent"
# Optional HTTP Path
#path: "/elasticsearch"
# Custom HTTP headers to add to each request
# X-My-Header: Contents of the header
# Proxy server url
#proxy_url: http://proxy:3128
# The number of times a particular Elasticsearch index operation is attempted. If
# the indexing operation doesn't succeed after this many retries, the events are
# dropped. The default is 3.
#max_retries: 3
# The maximum number of events to bulk in a single Elasticsearch bulk API index request.
# The default is 50.
#bulk_max_size: 50
# The number of seconds to wait before trying to reconnect to Elasticsearch
# after a network error. After waiting backoff.init seconds, apm-server
# tries to reconnect. If the attempt fails, the backoff timer is increased
# exponentially up to backoff.max. After a successful connection, the backoff
# timer is reset. The default is 1s.
#backoff.init: 1s
# The maximum number of seconds to wait before attempting to connect to
# Elasticsearch after a network error. The default is 60s.
#backoff.max: 60s
# Configure http request timeout before failing an request to Elasticsearch.
#timeout: 90
# Use SSL settings for HTTPS. Default is false.
#ssl.enabled: false
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# SSL configuration. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
# It is recommended to use the provided keystore instead of entering the passphrase in plain text.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#----------------------------- Console output ---------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# Pretty print json event
#pretty: false
#----------------------------- Logstash output ---------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# The Logstash hosts
#hosts: ["localhost:5044"]
# Number of workers per Logstash host.
#worker: 1
# Set gzip compression level.
#compression_level: 3
# Configure escaping html symbols in strings.
#escape_html: true
# Optional maximum time to live for a connection to Logstash, after which the
# connection will be re-established. A value of `0s` (the default) will
# disable this feature.
# Not yet supported for async connections (i.e. with the "pipelining" option set)
#ttl: 30s
# Optional load balance the events between the Logstash hosts. Default is false.
#loadbalance: false
# Number of batches to be sent asynchronously to Logstash while processing
# new batches.
#pipelining: 2
# If enabled only a subset of events in a batch of events is transferred per
# group. The number of events to be sent increases up to `bulk_max_size`
# if no error is encountered.
#slow_start: false
# The number of seconds to wait before trying to reconnect to Logstash
# after a network error. After waiting backoff.init seconds, apm-server
# tries to reconnect. If the attempt fails, the backoff timer is increased
# exponentially up to backoff.max. After a successful connection, the backoff
# timer is reset. The default is 1s.
#backoff.init: 1s
# The maximum number of seconds to wait before attempting to connect to
# Logstash after a network error. The default is 60s.
#backoff.max: 60s
# Optional index name. The default index name is set to apm
# in all lowercase.
#index: 'apm'
# SOCKS5 proxy server URL
#proxy_url: socks5://user:password@socks5-server:2233
# Resolve names locally when using a proxy server. Defaults to false.
#proxy_use_local_resolver: false
# Enable SSL support. SSL is automatically enabled, if any SSL setting is set.
#ssl.enabled: true
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# Optional SSL configuration options. SSL is off by default.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#------------------------------- Kafka output ----------------------------------
# Boolean flag to enable or disable the output module.
#enabled: false
# The list of Kafka broker addresses from where to fetch the cluster metadata.
# The cluster metadata contain the actual Kafka brokers events are published
# to.
#hosts: ["localhost:9092"]
# The Kafka topic used for produced events. The setting can be a format string
# using any event field. To set the topic from document type use `%{[type]}`.
#topic: beats
# The Kafka event key setting. Use format string to create unique event key.
# By default no event key will be generated.
#key: ''
# The Kafka event partitioning strategy. Default hashing strategy is `hash`
# using the `output.kafka.key` setting or randomly distributes events if
# `output.kafka.key` is not configured.
# If enabled, events will only be published to partitions with reachable
# leaders. Default is false.
#reachable_only: false
# Configure alternative event field names used to compute the hash value.
# If empty `output.kafka.key` setting will be used.
# Default value is empty list.
#hash: []
# Authentication details. Password is required if username is set.
#username: ''
#password: ''
# Kafka version libbeat is assumed to run against. Defaults to the "1.0.0".
#version: '1.0.0'
# Configure JSON encoding
# Pretty print json event
#pretty: false
# Configure escaping html symbols in strings.
#escape_html: true
# Metadata update configuration. Metadata do contain leader information
# deciding which broker to use when publishing.
# Max metadata request retry attempts when cluster is in middle of leader
# election. Defaults to 3 retries.
#retry.max: 3
# Waiting time between retries during leader elections. Default is 250ms.
#retry.backoff: 250ms
# Refresh metadata interval. Defaults to every 10 minutes.
#refresh_frequency: 10m
# The number of concurrent load-balanced Kafka output workers.
#worker: 1
# The number of times to retry publishing an event after a publishing failure.
# After the specified number of retries, the events are typically dropped.
# Some Beats, such as Filebeat, ignore the max_retries setting and retry until
# all events are published. Set max_retries to a value less than 0 to retry
# until all events are published. The default is 3.
#max_retries: 3
# The maximum number of events to bulk in a single Kafka request. The default
# is 2048.
#bulk_max_size: 2048
# The number of seconds to wait for responses from the Kafka brokers before
# timing out. The default is 30s.
#timeout: 30s
# The maximum duration a broker will wait for number of required ACKs. The
# default is 10s.
#broker_timeout: 10s
# The number of messages buffered for each Kafka broker. The default is 256.
#channel_buffer_size: 256
# The keep-alive period for an active network connection. If 0s, keep-alives
# are disabled. The default is 0 seconds.
#keep_alive: 0
# Sets the output compression codec. Must be one of none, snappy and gzip. The
# default is gzip.
#compression: gzip
# The maximum permitted size of JSON-encoded messages. Bigger messages will be
# dropped. The default value is 1000000 (bytes). This value should be equal to
# or less than the broker's message.max.bytes.
#max_message_bytes: 1000000
# The ACK reliability level required from broker. 0=no response, 1=wait for
# local commit, -1=wait for all replicas to commit. The default is 1. Note:
# If set to 0, no ACKs are returned by Kafka. Messages might be lost silently
# on error.
#required_acks: 1
# The configurable ClientID used for logging, debugging, and auditing
# purposes. The default is "beats".
#client_id: beats
# Enable SSL support. SSL is automatically enabled, if any SSL setting is set.
#ssl.enabled: true
# Optional SSL configuration options. SSL is off by default.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Configure SSL verification mode. If `none` is configured, all server hosts
# and certificates will be accepted. In this mode, SSL based connections are
# susceptible to man-in-the-middle attacks. Use only for testing. Default is
# `full`.
#ssl.verification_mode: full
# List of supported/valid TLS versions. By default all TLS versions 1.0 up to
# 1.2 are enabled.
#ssl.supported_protocols: [TLSv1.0, TLSv1.1, TLSv1.2]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# Optional passphrase for decrypting the Certificate Key.
#ssl.key_passphrase: ''
# Configure cipher suites to be used for SSL connections
#ssl.cipher_suites: []
# Configure curve types for ECDHE based cipher suites
#ssl.curve_types: []
# Configure what types of renegotiation are supported. Valid options are
# never, once, and freely. Default is never.
#ssl.renegotiation: never
#================================= Paths ======================================
# The home path for the apm-server installation. This is the default base path
# for all other path settings and for miscellaneous files that come with the
# distribution (for example, the sample dashboards).
# If not set by a CLI flag or in the configuration file, the default for the
# home path is the location of the binary.
# The configuration path for the apm-server installation. This is the default
# base path for configuration files, including the main YAML configuration file
# and the Elasticsearch template file. If not set by a CLI flag or in the
# configuration file, the default for the configuration path is the home path.
#path.config: ${path.home}
# The data path for the apm-server installation. This is the default base path
# for all the files in which apm-server needs to store its data. If not set by a
# CLI flag or in the configuration file, the default for the data path is a data
# subdirectory inside the home path.
#path.data: ${path.home}/data
# The logs path for a apm-server installation. This is the default location for
# the Beat's log files. If not set by a CLI flag or in the configuration file,
# the default for the logs path is a logs subdirectory inside the home path.
#path.logs: ${path.home}/logs
#================================ Logging ======================================
# There are three options for the log output: syslog, file, stderr.
# Under Windows systems, the log files are per default sent to the file output,
# under all other system per default to syslog.
# Sets log level. The default log level is info.
# Available log levels are: error, warning, info, debug
#logging.level: info
# Enable debug output for selected components. To enable all selectors use ["*"]
# Other available selectors are "beat", "publish", "service"
# Multiple selectors can be chained.
#logging.selectors: [ ]
# Send all logging output to syslog. The default is false.
#logging.to_syslog: true
# If enabled, apm-server periodically logs its internal metrics that have changed
# in the last period. For each metric that changed, the delta from the value at
# the beginning of the period is logged. Also, the total values for
# all non-zero internal metrics are logged on shutdown. The default is true.
#logging.metrics.enabled: false
# The period after which to log the internal metrics. The default is 30s.
#logging.metrics.period: 30s
# Logging to rotating files. Set logging.to_files to false to disable logging to
# files.
#logging.to_files: true
# Configure the path where the logs are written. The default is the logs directory
# under the home path (the binary location).
#path: /var/log/apm-server
# The name of the files where the logs are written to.
#name: apm-server
# Configure log file size limit. If limit is reached, log file will be
# automatically rotated
#rotateeverybytes: 10485760 # = 10MB
# Number of rotated log files to keep. Oldest files will be deleted first.
#keepfiles: 7
# The permissions mask to apply when rotating log files. The default value is 0600.
# Must be a valid Unix-style file permissions mask expressed in octal notation.
#permissions: 0600
# Set to true to log messages in json format.
#logging.json: false
#================================ HTTP Endpoint ======================================
# Each beat can expose internal metrics through a HTTP endpoint. For security
# reasons the endpoint is disabled by default. This feature is currently experimental.
# Stats can be access through http://localhost:5066/stats . For pretty JSON output
# append ?pretty to the URL.
# Defines if the HTTP endpoint is enabled.
#http.enabled: false
# The HTTP endpoint will bind to this hostname or IP address. It is recommended to use only localhost.
#http.host: localhost
# Port on which the HTTP endpoint will bind. Default is 5066.
#http.port: 5066
#============================== Xpack Monitoring ===============================
# APM server can export internal metrics to a central Elasticsearch monitoring
# cluster. This requires xpack monitoring to be enabled in Elasticsearch. The
# reporting is disabled by default.
# Set to true to enable the monitoring reporter.
#xpack.monitoring.enabled: false
# Uncomment to send the metrics to Elasticsearch. Most settings from the
# Elasticsearch output are accepted here as well. Any setting that is not set is
# automatically inherited from the Elasticsearch output configuration, so if you
# have the Elasticsearch output configured, you can simply uncomment the
# following line.
# username: "apm_system"
# password: ""

View File

@ -1,6 +1,7 @@
http.host: ""
path.config: /usr/share/logstash/pipeline
#xpack.monitoring.enabled: false
xpack.monitoring.elasticsearch.url: http://elasticsearch:9200
#xpack.monitoring.enabled: true
#- http://elasticsearch:9200
#xpack.monitoring.elasticsearch.username: logstash_system
#xpack.monitoring.elasticsearch.password: changeme

View File

@ -1,45 +1,58 @@
image: docker.elastic.co/elasticsearch/elasticsearch:6.1.2
- "9200:9200"
- "9300:9300"
- ./data:/usr/share/elasticsearch/data
- node.name=es1
- cluster.name=elk
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
soft: -1
hard: -1
restart: unless-stopped
version: '3.5'
image: docker.elastic.co/logstash/logstash:6.1.2
- "5044:5044"
- "9600:9600"
- "12201:12201/udp"
- elasticsearch
- ./data/logstash.yml:/usr/share/logstash/config/logstash.yml
- ./data/pipeline:/usr/share/logstash/pipeline
- /usr/share/logstash/vendor/bundle
- node.name=ls1
- "LS_JAVA_OPTS=-Xms1g -Xmx1g"
restart: unless-stopped
image: docker.elastic.co/kibana/kibana:6.1.2
- "5601:5601"
- elasticsearch
ELASTICSEARCH_URL: http://elasticsearch:9200
restart: unless-stopped
image: docker.elastic.co/elasticsearch/elasticsearch:7.3.0
- "9200:9200"
- ./data:/usr/share/elasticsearch/data
- node.name=es1
- cluster.name=docker-cluster
- cluster.initial_master_nodes=es1
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms16g -Xmx16g"
soft: -1
hard: -1
restart: unless-stopped
image: docker.elastic.co/logstash/logstash:7.3.0
- "5044:5044"
- "9600:9600"
- "12201:12201/udp"
- elasticsearch
- ./data/logstash.yml:/usr/share/logstash/config/logstash.yml
- ./data/pipeline:/usr/share/logstash/pipeline
- node.name=ls1
- "LS_JAVA_OPTS=-Xms4g -Xmx4g"
restart: unless-stopped
image: docker.elastic.co/kibana/kibana:7.3.0
- "5601:5601"
- elasticsearch
ELASTICSEARCH_URL: http://elasticsearch:9200
restart: unless-stopped
image: docker.elastic.co/apm/apm-server:7.3.0
- "8200:8200"
- elasticsearch
- ./data/apm-server.yml:/usr/share/apm-server/apm-server.yml
restart: unless-stopped

flogo/docker-compose.yml Normal file
View File

@ -0,0 +1,6 @@
image: flogo/flogo-docker
command: eula-accept
- "3303:3303"
restart: always

flower/README.md Normal file
View File

@ -0,0 +1,22 @@
[Flower][1] is a web based tool for monitoring and administrating Celery clusters.
## docker-compose.yml
image: mher/flower
- "5555:5555"
- CELERY_BROKER_URL=redis://redis:6379/0
- FLOWER_BASIC_AUTH=username:password
- redis:x.x.x.x
restart: always
[1]: https://flower.readthedocs.io/en/latest/

flower/docker-compose.yml Normal file
View File

@ -0,0 +1,11 @@
image: mher/flower
- "5555:5555"
- CELERY_BROKER_URL=redis://redis:6379/0
- FLOWER_BASIC_AUTH=username:password
- redis:x.x.x.x
restart: always

freegeoip/Dockerfile Normal file
View File

@ -0,0 +1,22 @@
# Dockerfile for freegeoip
FROM alpine
MAINTAINER EasyPi Software Foundation
ENV FREEGEOIP_FILE freegeoip-${FREEGEOIP_VERSION}-linux-amd64.tar.gz
ENV FREEGEOIP_URL https://github.com/fiorix/freegeoip/releases/download/v${FREEGEOIP_VERSION}/${FREEGEOIP_FILE}
WORKDIR /opt/freegeoip
RUN set -xe \
&& apk add --no-cache curl tar \
&& curl -sSL ${FREEGEOIP_URL} | tar xz --strip 1 \
&& apk del curl tar
EXPOSE 8080 8888
ENTRYPOINT ["./freegeoip"]
CMD ["-public", "public", "-http", ":8080", "-internal-server", ":8888"]

freegeoip/README.md Normal file
View File

@ -0,0 +1,46 @@
## docker-compose.yml
image: vimagick/freegeoip
- "8080:8080"
- "8888:8888"
restart: always
## up and running
$ docker-compose up -d
$ curl -s http://localhost:8080/json/ | jq .
"ip": "",
"country_code": "US",
"country_name": "United States",
"region_code": "",
"region_name": "",
"city": "",
"zip_code": "",
"time_zone": "",
"latitude": 37.751,
"longitude": -97.822,
"metro_code": 0
$ curl -s http://localhost:8888/metrics
freegeoip_client_connections{proto="http"} 0
freegeoip_client_country_code_total{country_code="unknown"} 7
freegeoip_client_ipproto_version_total{ip="4"} 7
freegeoip_db_events_total{event="loaded"} 1
go_gc_duration_seconds{quantile="0"} 5.9754e-05
go_gc_duration_seconds{quantile="0.25"} 7.0367e-05
go_gc_duration_seconds{quantile="0.5"} 9.6169e-05
go_gc_duration_seconds{quantile="0.75"} 0.000112867
go_gc_duration_seconds{quantile="1"} 0.000260533
go_gc_duration_seconds_sum 0.001055739

View File

@ -0,0 +1,6 @@
image: vimagick/freegeoip
- "8080:8080"
- "8888:8888"
restart: always

View File

@ -0,0 +1,12 @@
Description=IP Geolocation Web Server
ExecStart=/opt/freegeoip/freegeoip -public public -http :8080 -internal-server :8888

View File

@ -5,7 +5,7 @@
FROM alpine
MAINTAINER kev <noreply@easypi.pro>
ENV FRP_URL https://github.com/fatedier/frp/releases/download/v${FRP_VERSION}/frp_${FRP_VERSION}_linux_amd64.tar.gz
WORKDIR /opt/frp

View File

@ -2,10 +2,10 @@
# Dockerfile for frp-arm
FROM alpine
FROM arm32v7/alpine:3
MAINTAINER EasyPi Software Foundation
ENV FRP_URL https://github.com/fatedier/frp/releases/download/v${FRP_VERSION}/frp_${FRP_VERSION}_linux_arm.tar.gz
WORKDIR /opt/frp

View File

@ -1,6 +1,7 @@
image: easypi/frp-arm
command: ./frpc -c frpc.ini
- ./data/frps.ini:/opt/frp/frps.ini
- ./data/frpc.ini:/opt/frp/frpc.ini
net: host
restart: always
restart: unless-stopped

View File

@ -2,7 +2,8 @@
server_addr =
server_port = 7000
protocol = kcp
privilege_token = 12345678
token = 12345678
admin_port = 7400
type = tcp

frp/openwrt/frpc Normal file → Executable file
View File

@ -1,5 +1,5 @@
#!/bin/sh /etc/rc.common
# Copyright (C) 2018 EasyPi Software Foundation
# Copyright (C) 2019 EasyPi Software Foundation

frp/openwrt/frps Normal file → Executable file
View File

@ -1,5 +1,5 @@
#!/bin/sh /etc/rc.common
# Copyright (C) 2018 EasyPi Software Foundation
# Copyright (C) 2019 EasyPi Software Foundation

View File

@ -3,7 +3,10 @@ Description=frp client
ExecStart=/usr/local/bin/frpc -c /etc/frp/frpc.ini
ExecReload=/usr/local/bin/frpc reload -c /etc/frp/frpc.ini

frp/systemd/frpc@.service Normal file
View File

@ -0,0 +1,14 @@
Description=Frp Client Service
ExecStart=/usr/local/bin/frpc -c /etc/frp/%i.ini
ExecReload=/usr/local/bin/frpc reload -c /etc/frp/%i.ini

View File

@ -3,7 +3,9 @@ Description=frp server
ExecStart=/usr/local/bin/frps -c /etc/frp/frps.ini

View File

@ -5,10 +5,10 @@
FROM easypi/alpine-arm
MAINTAINER EasyPi Software Foundation
ENV GHOST_INSTALL /var/lib/ghost
ENV GHOST_CONTENT /var/lib/ghost/content
ENV NODE_ENV production

gitea/README.md Normal file
View File

@ -0,0 +1,7 @@
[Gitea][1] is a community managed lightweight code hosting solution written in
Go. It is published under the MIT license.
[1]: https://gitea.io/

gitea/docker-compose.yml Normal file
View File

@ -0,0 +1,34 @@
version: "3.7"
image: gitea/gitea
- "2222:22"
- "3000:3000"
- ./data/gitea:/data
- USER_UID=1000
- USER_GID=1000
- DB_TYPE=postgres
- DB_HOST=postgres:5432
- DB_NAME=gitea
- DB_USER=gitea
- DB_PASSWD=gitea
- postgres
restart: always
image: postgres:12-alpine
- "5432:5432"
- ./data/postgres:/var/lib/postgresql/data
restart: always

View File

@ -38,4 +38,7 @@ $ tree -FL 3 ./data/git/
└── user2/
Please read [this][2] to learn more about `app.ini`.
[1]: https://gogs.io/
[2]: https://gogs.io/docs/advanced/configuration_cheat_sheet.html

View File

@ -2,21 +2,23 @@
# Dockerfile for gogs-arm
FROM easypi/alpine-arm
FROM arm32v7/alpine:3
MAINTAINER EasyPi Software Foundation
ENV GOGS_CUSTOM /data/gogs
RUN apk add --no-cache bash \
ca-certificates \
curl \
git \
linux-pam \
openssh \
s6 \
socat \
RUN apk add --no-cache \
bash \
ca-certificates \
curl \
git \
linux-pam \
openssh \
s6 \
socat \
RUN set -xe \
&& adduser -H -D -g 'Gogs Git User' -h /data/git -s /bin/bash git \
@ -24,16 +26,16 @@ RUN set -xe \
&& echo "export GOGS_CUSTOM=${GOGS_CUSTOM}" >> /etc/profile
RUN set -xe \
&& curl -L https://github.com/tianon/gosu/releases/download/1.10/gosu-armhf > /usr/sbin/gosu \
&& curl -L https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-armhf > /usr/sbin/gosu \
&& chmod +x /usr/sbin/gosu
RUN set -xe \
&& mkdir /app/ \
&& cd /app/ \
&& curl -LO https://github.com/gogits/gogs/releases/download/v${GOGS_VERSION}/raspi2_armv6.zip \
&& unzip raspi2_armv6.zip \
&& rm raspi2_armv6.zip \
&& ln -s /lib/libc.musl-armhf.so.1 /lib/ld-linux-armhf.so.3
&& curl -LO https://github.com/gogits/gogs/releases/download/v${GOGS_VERSION}/raspi_armv7.zip \
&& unzip raspi_armv7.zip \
&& rm raspi_armv7.zip \
&& ln -s /lib/libc.musl-armv7.so.1 /lib/ld-linux-armhf.so.3
RUN set -xe \
&& cd /app/gogs/ \

View File

@ -0,0 +1,14 @@
address = "graphite:2003"
dial_timeout = '5s'
write_timeout = '30s'
mode = 'tags'
global_prefix = 'stats'
global_suffix = ''
prefix_counter = 'counters'
prefix_timer = 'timers'
prefix_gauge = 'gauges'
prefix_sets = 'sets'

View File

@ -0,0 +1,12 @@
image: atlassianlabs/gostatsd
command: gostatsd --backends=graphite --config-path=/etc/gostatsd/gostatsd.toml
- "8125:8125/udp"
- "8126:8126/tcp"
- "8181:8181/tcp"
- ./data:/etc/gostatsd
- graphite_graphite_1:graphite
restart: unless-stopped

View File

@ -27,6 +27,7 @@ grafana:
- GF_SMTP_FROM_ADDRESS=grafana@example.com
restart: always

View File

@ -14,4 +14,5 @@ grafana:
- GF_SMTP_FROM_ADDRESS=grafana@example.com
restart: always

View File

@ -5,7 +5,7 @@
FROM alpine
MAINTAINER EasyPi Software Foundation
ENV GRAPHITE_CONF_DIR=/opt/graphite/conf
ENV GRAPHITE_STORAGE_DIR=/opt/graphite/storage
ENV PATH=$PATH:/opt/graphite/bin
@ -34,7 +34,8 @@ RUN set -xe \
&& echo "SECRET_KEY = '$(head -c 16 /dev/urandom | base64)'" > graphite/local_settings.py \
&& curl -sSL https://github.com/graphite-project/graphite-web/raw/master/webapp/manage.py > manage.py \
&& chmod +x manage.py \
&& ./manage.py migrate --run-syncdb --noinput \
&& ./manage.py collectstatic --noinput --settings=graphite.settings \
&& ./manage.py migrate --noinput --run-syncdb \
&& apk del build-base \
curl \
git \
@ -48,8 +49,11 @@ COPY supervisord.conf /etc/supervisor/
EXPOSE 2003 \
EXPOSE 2003/udp \
2003 \
2004 \
2023 \
2024 \
7002 \
8080 \

View File

@ -14,7 +14,10 @@ graphite:
image: vimagick/graphite
- "2003:2003"
- "2003:2003/udp"
- "2004:2004"
- "2023:2023"
- "2024:2024"
- "7002:7002"
- "8080:8080"
- "9001:9001"
@ -32,14 +35,21 @@ $ mkdir -p data/storage/log/webapp
$ docker-compose up -d
$ docker-compose exec graphite sh
>>> vi conf/storage-schemas.conf
>>> python webapp/manage.py migrate --run-syncdb --noinput
>>> python webapp/manage.py migrate --noinput --run-syncdb
>>> python webapp/manage.py createsuperuser
>>> python webapp/manage.py changepassword
>>> supervisorctl restart
>>> supervisorctl status
carbon-aggregator RUNNING pid 9, uptime 0:00:13
carbon-cache RUNNING pid 8, uptime 0:00:22
graphite-webapp RUNNING pid 7, uptime 0:00:24
>>> exit
$ tree -F -L 4
├── data/
│   ├── conf/
│   │   ├── aggregation-rules.conf
│   │   ├── carbon.conf
│   │   ├── rewrite-rules.conf
│   │   └── storage-schemas.conf
│   └── storage/
│   ├── carbon-cache-a.pid

View File

@ -0,0 +1,43 @@
# The form of each line in this file should be as follows:
# output_template (frequency) = method input_pattern
# This will capture any received metrics that match 'input_pattern'
# for calculating an aggregate metric. The calculation will occur
# every 'frequency' seconds and the 'method' can specify 'sum' or
# 'avg'. The name of the aggregate metric will be derived from
# 'output_template' filling in any captured fields from 'input_pattern'.
# For example, if you're metric naming scheme is:
# <env>.applications.<app>.<server>.<metric>
# You could configure some aggregations like so:
# <env>.applications.<app>.all.requests (60) = sum <env>.applications.<app>.*.requests
# <env>.applications.<app>.all.latency (60) = avg <env>.applications.<app>.*.latency
# As an example, if the following metrics are received:
# prod.applications.apache.www01.requests
# prod.applications.apache.www01.requests
# They would all go into the same aggregation buffer and after 60 seconds the
# aggregate metric 'prod.applications.apache.all.requests' would be calculated
# by summing their values.
# Template components such as <env> will match everything up to the next dot.
# To match metric multiple components including the dots, use <<metric>> in the
# input template:
# <env>.applications.<app>.all.<app_metric> (60) = sum <env>.applications.<app>.*.<<app_metric>>
# It is also possible to use regular expressions. Following the example above
# when using:
# <env>.applications.<app>.<domain>.requests (60) = sum <env>.applications.<app>.<domain>\d{2}.requests
# You will end up with 'prod.applications.apache.www.requests' instead of
# 'prod.applications.apache.all.requests'.
# Note that any time this file is modified, it will be re-read automatically.

View File

@ -505,13 +505,13 @@ PICKLE_RECEIVER_PORT = 2024
# If set true, metric received will be forwarded to DESTINATIONS in addition to
# the output of the aggregation rules. If set false the carbon-aggregator will
# only ever send the output of aggregation.
# Filenames of the configuration files to use for this instance of aggregator.
# Filenames are relative to CONF_DIR.
# AGGREGATION_RULES = aggregation-rules.conf
# REWRITE_RULES = rewrite-rules.conf
AGGREGATION_RULES = aggregation-rules.conf
REWRITE_RULES = rewrite-rules.conf
# This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single

View File

@ -0,0 +1,18 @@
# This file defines regular expression patterns that can be used to
# rewrite metric names in a search & replace fashion. It consists of two
# sections, [pre] and [post]. The rules in the pre section are applied to
# metric names as soon as they are received. The post rules are applied
# after aggregation has taken place.
# The general form of each rule is as follows:
# regex-pattern = replacement-text
# For example:
# [post]
# _sum$ =
# _avg$ =
# These rules would strip off a suffix of _sum or _avg from any metric names
# after aggregation.

View File

@ -1,9 +1,11 @@
image: vimagick/graphite
- "2003:2003"
- "2003:2003/udp"
- "2003:2003"
- "2004:2004"
- "2023:2023"
- "2024:2024"
- "7002:7002"
- "8080:8080"
- "9001:9001"

View File

@ -15,6 +15,11 @@ command = carbon-cache.py --debug start
redirect_stderr = true
autorestart = true
command = carbon-aggregator.py --debug start
redirect_stderr = true
autorestart = true
command = gunicorn -b :8080 graphite.wsgi:application
directory = /opt/graphite/webapp

View File

@ -9,7 +9,9 @@ RUN set -xe \
&& apk update \
&& apk add --no-cache ca-certificates \
build-base \
libffi-dev \
linux-headers \
openssl-dev \
python3 \
python3-dev \
&& pip3 install --no-cache-dir homeassistant \

View File

@ -2,20 +2,25 @@
# Dockerfile for hass-arm (Home Assistant)
FROM easypi/alpine-arm
FROM arm32v7/alpine:3
MAINTAINER EasyPi Software Foundation
RUN set -xe \
&& apk update \
&& apk add --no-cache ca-certificates \
build-base \
linux-headers \
python3 \
python3-dev \
&& pip3 install --no-cache-dir homeassistant \
&& wget https://github.com/home-assistant/hassio-cli/releases/download/1.3.1/hassio_armhf -O /usr/local/bin/hassio
&& apk add --no-cache \
ca-certificates \
build-base \
libffi-dev \
linux-headers \
openssl-dev \
python3 \
python3-dev \
&& pip3 install --no-cache-dir homeassistant==${HASS_VERSION} \
&& wget https://github.com/home-assistant/hassio-cli/releases/download/${HASS_CLI_VERSION}/hassio_armhf -O /usr/local/bin/hassio
VOLUME /etc/hass
ENTRYPOINT ["hass", "--config", "/etc/hass"]

View File

@ -2,37 +2,42 @@
# Dockerfile for hass-arm (Home Assistant)
FROM resin/rpi-raspbian:jessie
FROM balenalib/rpi-raspbian:buster
MAINTAINER EasyPi Software Foundation
RUN set -xe \
&& apt-get update \
&& apt-get install -y build-essential \
bluez \
curl \
libbluetooth3 \
libbluetooth-dev \
libboost-python-dev \
libboost-thread-dev \
libglib2.0 \
libglib2.0-dev \
pkg-config \
python-dev \
python3-dev \
&& apt-get install -y \
build-essential \
bluez \
curl \
libbluetooth3 \
libbluetooth-dev \
libboost-python-dev \
libboost-thread-dev \
libglib2.0 \
libglib2.0-dev \
pkg-config \
python-dev \
python3-dev \
&& curl https://bootstrap.pypa.io/get-pip.py | python3 \
&& pip3 install --no-cache-dir gattlib \
homeassistant \
pybluez \
&& setcap 'cap_net_raw,cap_net_admin+eip' /usr/bin/python3.4 \
&& apt-get remove -y curl \
libbluetooth-dev \
libboost-python-dev \
libboost-thread-dev \
libglib2.0-dev \
pkg-config \
&& pip3 install --no-cache-dir \
gattlib \
homeassistant \
pybluez \
&& setcap 'cap_net_raw,cap_net_admin+eip' /usr/bin/python3.7 \
&& apt-get remove -y \
curl \
libbluetooth-dev \
libboost-python-dev \
libboost-thread-dev \
libglib2.0-dev \
pkg-config \
VOLUME /etc/hass
ENTRYPOINT ["hass", "--config", "/etc/hass"]

Some files were not shown because too many files have changed in this diff Show More