Dependency | Reason |
---|---|
Dagrun Running | Task instance's dagrun was not in the 'running' state but in the state 'failed'. |
Execution Date | The execution date is 2022-11-04T13:13:26.848566+00:00 but this is before the task's start date 2024-07-13T00:00:00+00:00. |
Task Instance State | Task is in the 'failed' state which is not a valid state for execution. The task must be cleared in order to be run. |
Dag Not Paused | Task's DAG 'primary-audience-global_v003' is paused. |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | def validation(**kwargs):
audience_meta_config = Variable.get("audience_meta_conf", deserialize_json=True)
AUD_COUNTRIES = audience_meta_config["AUD_COUNTRIES"]
AUD_MEDIA_OWNERS = audience_meta_config["AUD_MEDIA_OWNERS"]
LOG.info('Supported Audience Countries: ' + AUD_COUNTRIES)
LOG.info('Supported Audience Media Owners: ' + AUD_COUNTRIES)
conf_file = kwargs['audience-config-file']
job_config = Variable.get(conf_file, deserialize_json=True)
COUNTRY = job_config['COUNTRY']
MEDIA_OWNER = job_config['MEDIA_OWNER']
if COUNTRY not in AUD_COUNTRIES:
raise Exception('Countries: %s not supported. Add supported country in config' % COUNTRY)
if MEDIA_OWNER not in AUD_MEDIA_OWNERS:
raise Exception('Media Owners not supported. Add supported Media Owners in config' % MEDIA_OWNER)
|
Attribute | Value |
---|---|
dag_id | primary-audience-global_v003 |
duration | 0.363979 |
end_date | 2022-11-04 13:13:32.684244+00:00 |
execution_date | 2022-11-04T13:13:26.848566+00:00 |
executor_config | {} |
generate_command | <function TaskInstance.generate_command at 0x7ffa4fe777b8> |
hostname | airflow-worker-1.airflow-worker.bigdata.svc.cluster.local |
is_premature | False |
job_id | 279223 |
key | ('primary-audience-global_v003', 'validation', <Pendulum [2022-11-04T13:13:26.848566+00:00]>, 2) |
log | <Logger airflow.task (INFO)> |
log_filepath | /opt/airflow/logs/primary-audience-global_v003/validation/2022-11-04T13:13:26.848566+00:00.log |
log_url | https://airflow.devel.viooh.net.cn/admin/airflow/log?execution_date=2022-11-04T13%3A13%3A26.848566%2B00%3A00&task_id=validation&dag_id=primary-audience-global_v003 |
logger | <Logger airflow.task (INFO)> |
mark_success_url | https://airflow.devel.viooh.net.cn/success?task_id=validation&dag_id=primary-audience-global_v003&execution_date=2022-11-04T13%3A13%3A26.848566%2B00%3A00&upstream=false&downstream=false |
max_tries | 0 |
metadata | MetaData(bind=None) |
next_try_number | 2 |
operator | PythonOperator |
pid | 4890 |
pool | default_pool |
pool_slots | 1 |
prev_attempted_tries | 1 |
previous_execution_date_success | None |
previous_start_date_success | None |
previous_ti | <TaskInstance: primary-audience-global_v003.validation 2022-11-04 11:32:01.178461+00:00 [failed]> |
previous_ti_success | None |
priority_weight | 2 |
queue | default |
queued_dttm | 2022-11-04 13:13:29.375291+00:00 |
raw | False |
run_as_user | None |
start_date | 2022-11-04 13:13:32.320265+00:00 |
state | failed |
task | <Task(PythonOperator): validation> |
task_id | validation |
test_mode | False |
try_number | 2 |
unixname | airflow |
Attribute | Value |
---|---|
dag | <DAG: primary-audience-global_v003> |
dag_id | primary-audience-global_v003 |
depends_on_past | False |
deps | {<TIDep(Previous Dagrun State)>, <TIDep(Not Previously Skipped)>, <TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>} |
do_xcom_push | True |
downstream_list | [<Task(PythonOperator): create_cluster>] |
downstream_task_ids | {'create_cluster'} |
None | |
email_on_failure | True |
email_on_retry | True |
end_date | None |
execution_timeout | None |
executor_config | {} |
extra_links | [] |
global_operator_extra_link_dict | {} |
inlets | [] |
lineage_data | None |
log | <Logger airflow.task.operators (INFO)> |
logger | <Logger airflow.task.operators (INFO)> |
max_retry_delay | None |
on_failure_callback | <function task_fail_slack_alert at 0x7ffa3fcd4a60> |
on_retry_callback | None |
on_success_callback | None |
op_args | [] |
op_kwargs | {'cluster-name': 'primary-audience-global-JCDECAUX_ES-3000', 'audience-config-file': 'primary_audience_global_conf', 'master-instance-types': 'm5.2xlarge,m5.4xlarge', 'core-instance-types': 'm5.2xlarge,m5.4xlarge', 'task-instance-types': 'm5.2xlarge,m5.4xlarge', 'core-instance-capacity': 5, 'task-instance-capacity': 0, 'job-type': 'batch', 'emr-version': 'emr-5.21.0', 'emr-steps': '[{\n "step-name": "PrimaryAudienceGlobal-integrity",\n "py-script": "jobs/integrity_job.py",\n "config-file": "configs/primary-audience-global/1.0.0/job.json",\n "config-json": [\n {"spark.app.audience.country":"ES"},\n {"spark.app.audience.media.owner":"JCDECAUX_ES"},\n {"spark.app.audience.version":"3000"},\n {"spark.app.audience.process.dynamic":"True"},\n {"spark.app.audience.process.static":"False"},\n {"spark.app.audience.bucket.name":"s3a://viooh-datashare-jcd-es"},\n {"spark.app.audience.digital.file":"jcd-es-data/r12/dynamic"},\n {"spark.app.audience.static.file":""},\n {"spark.app.audience.demographic.file":"jcd-es-data/r12/demographic"},\n {"spark.app.audience.check.demo":"True"}\n ],\n "artifact": "AudiencesPipeline"\n },\n {\n "step-name": "PrimaryAudienceGlobal-load",\n "py-script": "jobs/etl_job.py",\n "config-file": "configs/primary-audience-global/1.0.0/job.json",\n "config-json": [\n {"spark.app.audience.country":"ES"},\n {"spark.app.audience.media.owner":"JCDECAUX_ES"},\n {"spark.app.audience.version":"3000"},\n {"spark.app.audience.process.dynamic":"True"},\n {"spark.app.audience.process.static":"False"},\n {"spark.app.audience.category.group.name":"ES"},\n {"spark.app.audience.category.group.id":"001"}\n ],\n "artifact": "AudiencesPipeline"\n},{\n "step-name": "RegenerateRoute",\n "config-file": "configs/primary-audience-global-cassandra/1.0.0/job.json",\n "config-json": [\n {"spark.app.primary.audience.base.path": "s3a://global-audiences-output/ES/JCDECAUX_ES/3000/raw/%date%/dynamic/"},\n {"spark.app.primary.audience.date": "20201214"},\n {"spark.app.primary.audience.cassandra.keyspace": "es_audience"},\n {"spark.app.primary.audience.version": "3000"}\n ],\n "artifact": "RegenerateRoute",\n "packages": "com.datastax.spark:spark-cassandra-connector_2.11:2.3.0",\n "py-script": "com/viooh/routegen/primary_audience_loader_cassandra.py"\n },{\n "step-name": "SaveToDataBase",\n "config-file": "configs/primary-audience-global-masterdb-uat/1.0.0/job.json",\n "config-json": [\n {"spark.route.version": "3000"},\n {"spark.location.outputFoldermdb":"s3a://global-audiences-output/ES/JCDECAUX_ES/3000/"},\n {"spark.external.db.database":"Espain_uat"},\n {"spark.date.daytouplaodtomysql":"20201214"}\n ],\n "main-class": "com.viooh.restoremasterdb.SaveToDataBase",\n "artifact": "restoremasterdb_2.11",\n "packages": "mysql:mysql-connector-java:5.1.46"\n }]', 'dag-id': 'primary-audience-global_v003', 'schedule_interval': None, 'trigger_dags': []} |
operator_extra_link_dict | {} |
operator_extra_links | () |
outlets | [] |
owner | data.engineers@viooh.com |
params | {} |
pool | default_pool |
pool_slots | 1 |
priority_weight | 1 |
priority_weight_total | 2 |
provide_context | True |
queue | default |
resources | None |
retries | 0 |
retry_delay | 0:05:00 |
retry_exponential_backoff | False |
run_as_user | None |
schedule_interval | None |
shallow_copy_attrs | ('python_callable', 'op_kwargs') |
sla | None |
start_date | 2024-07-13 00:00:00+00:00 |
subdag | None |
task_concurrency | None |
task_id | validation |
task_type | PythonOperator |
template_ext | [] |
template_fields | ('templates_dict', 'op_args', 'op_kwargs') |
templates_dict | None |
trigger_rule | all_success |
ui_color | #ffefeb |
ui_fgcolor | #000 |
upstream_list | [] |
upstream_task_ids | set() |
wait_for_downstream | False |
weight_rule | downstream |