# Licensed to the Apache Software Foundation (ASF) under one * # or more contributor license agreements. See the NOTICE file * # distributed with this work for additional information * # regarding copyright ownership. The ASF licenses this file * # to you under the Apache License, Version 2.0 (the * # "License"); you may not use this file except in compliance * # with the License. You may obtain a copy of the License at * # * # http://www.apache.org/licenses/LICENSE-2.0 * # * # Unless required by applicable law or agreed to in writing, * # software distributed under the License is distributed on an * # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * # KIND, either express or implied. See the License for the * # specific language governing permissions and limitations * # under the License. * apiVersion: v1 kind: ConfigMap metadata: name: airflow-configmap namespace: airflow data: airflow.cfg: | [core] dags_folder = {{CONFIGMAP_DAGS_FOLDER}} base_log_folder = /root/airflow/logs logging_level = INFO executor = KubernetesExecutor parallelism = 32 load_examples = False plugins_folder = /root/airflow/plugins sql_alchemy_conn = $SQL_ALCHEMY_CONN [scheduler] dag_dir_list_interval = 300 child_process_log_directory = /root/airflow/logs/scheduler # Task instances listen for external kill signal (when you clear tasks # from the CLI or the UI), this defines the frequency at which they should # listen (in seconds). job_heartbeat_sec = 5 max_threads = 2 # The scheduler constantly tries to trigger new tasks (look at the # scheduler section in the docs for more information). This defines # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 # after how much time a new DAGs should be picked up from the filesystem min_file_process_interval = 0 statsd_on = False statsd_host = localhost statsd_port = 8125 statsd_prefix = airflow # How many seconds to wait between file-parsing loops to prevent the logs from being spammed. min_file_parsing_loop_time = 1 print_stats_interval = 30 scheduler_zombie_task_threshold = 300 max_tis_per_query = 0 authenticate = False # Turn off scheduler catchup by setting this to False. # Default behavior is unchanged and # Command Line Backfills still work, but the scheduler # will not do scheduler catchup if this is False, # however it can be set on a per DAG basis in the # DAG definition (catchup) catchup_by_default = True [webserver] # The base url of your website as airflow cannot guess what domain or # cname you are using. This is used in automated emails that # airflow sends to point links to the right web server base_url = http://0.0.0.0:8080 rbac=True # The ip specified when starting the web server web_server_host = 0.0.0.0 # The port on which to run the web server web_server_port = 8080 # Paths to the SSL certificate and key for the web server. When both are # provided SSL will be enabled. This does not change the web server port. web_server_ssl_cert = web_server_ssl_key = # Number of seconds the webserver waits before killing gunicorn master that doesn't respond web_server_master_timeout = 120 # Number of seconds the gunicorn webserver waits before timing out on a worker web_server_worker_timeout = 120 # Number of workers to refresh at a time. When set to 0, worker refresh is # disabled. When nonzero, airflow periodically refreshes webserver workers by # bringing up new ones and killing old ones. worker_refresh_batch_size = 1 # Number of seconds to wait before refreshing a batch of workers. worker_refresh_interval = 30 # Secret key used to run your flask app secret_key = temporary_key # Number of workers to run the Gunicorn web server workers = 4 # The worker class gunicorn should use. Choices include # sync (default), eventlet, gevent worker_class = sync # Log files for the gunicorn webserver. '-' means log to stderr. access_logfile = - error_logfile = - # Expose the configuration file in the web server expose_config = False # Default DAG view. Valid values are: # tree, graph, duration, gantt, landing_times dag_default_view = tree # Default DAG orientation. Valid values are: # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) dag_orientation = LR # Puts the webserver in demonstration mode; blurs the names of Operators for # privacy. demo_mode = False # The amount of time (in secs) webserver will wait for initial handshake # while fetching logs from other worker machine log_fetch_timeout_sec = 5 # By default, the webserver shows paused DAGs. Flip this to hide paused # DAGs by default hide_paused_dags_by_default = False # Consistent page size across all listing views in the UI page_size = 100 [smtp] # If you want airflow to send emails on retries, failure, and you want to use # the airflow.utils.email.send_email_smtp function, you have to configure an # smtp server here smtp_host = localhost smtp_starttls = True smtp_ssl = False # Uncomment and set the user/pass settings if you want to use SMTP AUTH # smtp_user = airflow # smtp_password = airflow smtp_port = 25 smtp_mail_from = airflow@example.com [kubernetes] airflow_configmap = airflow-configmap worker_container_repository = {{AIRFLOW_IMAGE}} worker_container_tag = {{AIRFLOW_TAG}} worker_container_image_pull_policy = Always worker_service_account_name = airflow namespace = airflow delete_worker_pods = True dags_in_image = False git_repo = https://github.com/{{CONFIGMAP_GIT_REPO}}.git git_branch = {{CONFIGMAP_BRANCH}} git_subpath = airflow/contrib/example_dags/ git_user = git_password = git_sync_root = /git git_sync_path = repo git_dags_folder_mount_point = {{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}} dags_volume_claim = {{CONFIGMAP_DAGS_VOLUME_CLAIM}} dags_volume_subpath = logs_volume_claim = logs_volume_subpath = dags_volume_host = logs_volume_host = in_cluster = True gcp_service_account_keys = # Example affinity and toleration definitions. affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"weight":1,"preference":[{"matchExpressions":[{"key":"lifecycle","operator":"In","value":"Ec2Spot"}]}],"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} tolerations = [{ "key": "spotInstance", "operator": "Equal", "value": "true", "effect": "PreferNoSchedule" },{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] # affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} # tolerations = [{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 git_sync_container_tag = v2.0.5 git_sync_init_container_name = git-sync-clone [kubernetes_node_selectors] # The Key-value pairs to be given to worker pods. # The worker pods will be scheduled to the nodes of the specified key-value pairs. # Should be supplied in the format: key = value [kubernetes_annotations] # The Key-value annotations pairs to be given to worker pods. # Should be supplied in the format: key = value [kubernetes_secrets] SQL_ALCHEMY_CONN = airflow-secrets=sql_alchemy_conn [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = [celery] # This section only applies if you are using the CeleryExecutor in # [core] section above # The app name that will be used by celery celery_app_name = airflow.executors.celery_executor # The concurrency that will be used when starting workers with the # "airflow worker" command. This defines the number of task instances that # a worker will take, so size up your workers based on the resources on # your worker box and the nature of your tasks worker_concurrency = 16 # When you start an airflow worker, airflow starts a tiny web server # subprocess to serve the workers local log files to the airflow main # web server, who then builds pages and sends them to users. This defines # the port on which the logs are served. It needs to be unused, and open # visible from the main web server to connect into the workers. worker_log_server_port = 8793 # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally # a sqlalchemy database. Refer to the Celery documentation for more # information. # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow # The Celery result_backend. When a job finishes, it needs to update the # metadata of the job. Therefore it will post a message on a message bus, # or insert it into a database (depending of the backend) # This status is used by the scheduler to update the state of the task # The use of a database is highly recommended # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings result_backend = db+mysql://airflow:airflow@localhost:3306/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the IP that Celery Flower runs on flower_host = 0.0.0.0 # The root URL for Flower # Ex: flower_url_prefix = /flower flower_url_prefix = # This defines the port that Celery Flower runs on flower_port = 5555 # Securing Flower with Basic Authentication # Accepts user:password pairs separated by a comma # Example: flower_basic_auth = user1:password1,user2:password2 flower_basic_auth = # Default queue that tasks get assigned to and that worker listen on. default_queue = default # How many processes CeleryExecutor uses to sync task state. # 0 means to use max(1, number of cores - 1) processes. sync_parallelism = 0 # Import path for celery configuration options celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG [celery_broker_transport_options] # The visibility timeout defines the number of seconds to wait for the worker # to acknowledge the task before the message is redelivered to another worker. # Make sure to increase the visibility timeout to match the time of the longest # ETA you're planning to use. Especially important in case of using Redis or SQS visibility_timeout = 21600 # In case of using SSL ssl_active = False ssl_key = ssl_cert = ssl_cacert = [dask] # This section only applies if you are using the DaskExecutor in # [core] section above # The IP address and port of the Dask cluster's scheduler. cluster_address = 127.0.0.1:8786 # TLS/ SSL settings to access a secured Dask scheduler. tls_ca = tls_cert = tls_key = [ldap] # set this to ldaps://: uri = user_filter = objectClass=* user_name_attr = uid group_member_attr = memberOf superuser_filter = data_profiler_filter = bind_user = cn=Manager,dc=example,dc=com bind_password = insecure basedn = dc=example,dc=com cacert = /etc/ca/ldap_ca.crt search_scope = LEVEL [kerberos] ccache = /tmp/airflow_krb5_ccache # gets augmented with fqdn principal = airflow reinit_frequency = 3600 kinit_path = kinit keytab = airflow.keytab [cli] api_client = airflow.api.client.json_client endpoint_url = http://0.0.0.0:8080 [api] auth_backend = airflow.api.auth.backend.default [github_enterprise] api_rev = v3 [admin] # UI to hide sensitive variable fields when set to True hide_sensitive_variable_fields = True [elasticsearch] elasticsearch_host =