Dependency | Reason |
---|---|
Dagrun Running | Task instance's dagrun was not in the 'running' state but in the state 'success'. |
Execution Date | The execution date is 2024-05-14T08:42:43.073866+00:00 but this is before the task's start date 2024-07-13T00:00:00+00:00. |
Task Instance State | Task is in the 'success' state which is not a valid state for execution. The task must be cleared in order to be run. |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | def validation(**kwargs):
audience_meta_config = Variable.get("audience_meta_conf", deserialize_json=True)
AUD_COUNTRIES = audience_meta_config["AUD_COUNTRIES"]
AUD_MEDIA_OWNERS = audience_meta_config["AUD_MEDIA_OWNERS"]
LOG.info('Supported Audience Countries: ' + AUD_COUNTRIES)
LOG.info('Supported Audience Media Owners: ' + AUD_COUNTRIES)
conf_file = kwargs['audience-config-file']
job_config = Variable.get(conf_file, deserialize_json=True)
COUNTRY = job_config['COUNTRY']
MEDIA_OWNER = job_config['MEDIA_OWNER']
if COUNTRY not in AUD_COUNTRIES:
raise Exception('Countries: %s not supported. Add supported country in config' % COUNTRY)
if MEDIA_OWNER not in AUD_MEDIA_OWNERS:
raise Exception('Media Owners not supported. Add supported Media Owners in config' % MEDIA_OWNER)
|
Attribute | Value |
---|---|
dag_id | primary-audience-global-ind_v003 |
duration | 0.397885 |
end_date | 2024-05-14 08:43:02.528174+00:00 |
execution_date | 2024-05-14T08:42:43.073866+00:00 |
executor_config | {} |
generate_command | <function TaskInstance.generate_command at 0x7ffa4fe777b8> |
hostname | airflow-worker-1.airflow-worker.data-eng.svc.cluster.local |
is_premature | False |
job_id | 541376 |
key | ('primary-audience-global-ind_v003', 'validation', <Pendulum [2024-05-14T08:42:43.073866+00:00]>, 2) |
log | <Logger airflow.task (INFO)> |
log_filepath | /opt/airflow/logs/primary-audience-global-ind_v003/validation/2024-05-14T08:42:43.073866+00:00.log |
log_url | https://airflow.devel.viooh.net.cn/admin/airflow/log?execution_date=2024-05-14T08%3A42%3A43.073866%2B00%3A00&task_id=validation&dag_id=primary-audience-global-ind_v003 |
logger | <Logger airflow.task (INFO)> |
mark_success_url | https://airflow.devel.viooh.net.cn/success?task_id=validation&dag_id=primary-audience-global-ind_v003&execution_date=2024-05-14T08%3A42%3A43.073866%2B00%3A00&upstream=false&downstream=false |
max_tries | 0 |
metadata | MetaData(bind=None) |
next_try_number | 2 |
operator | PythonOperator |
pid | 27106 |
pool | default_pool |
pool_slots | 1 |
prev_attempted_tries | 1 |
previous_execution_date_success | 2024-04-16 07:29:56.426869+00:00 |
previous_start_date_success | 2024-04-16 10:14:11.168473+00:00 |
previous_ti | <TaskInstance: primary-audience-global-ind_v003.validation 2024-04-16 07:29:56.426869+00:00 [success]> |
previous_ti_success | <TaskInstance: primary-audience-global-ind_v003.validation 2024-04-16 07:29:56.426869+00:00 [success]> |
priority_weight | 2 |
queue | default |
queued_dttm | 2024-05-14 08:42:59.254633+00:00 |
raw | False |
run_as_user | None |
start_date | 2024-05-14 08:43:02.130289+00:00 |
state | success |
task | <Task(PythonOperator): validation> |
task_id | validation |
test_mode | False |
try_number | 2 |
unixname | airflow |
Attribute | Value |
---|---|
dag | <DAG: primary-audience-global-ind_v003> |
dag_id | primary-audience-global-ind_v003 |
depends_on_past | False |
deps | {<TIDep(Previous Dagrun State)>, <TIDep(Not Previously Skipped)>, <TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>} |
do_xcom_push | True |
downstream_list | [<Task(PythonOperator): create_cluster>] |
downstream_task_ids | {'create_cluster'} |
None | |
email_on_failure | True |
email_on_retry | True |
end_date | None |
execution_timeout | None |
executor_config | {} |
extra_links | [] |
global_operator_extra_link_dict | {} |
inlets | [] |
lineage_data | None |
log | <Logger airflow.task.operators (INFO)> |
logger | <Logger airflow.task.operators (INFO)> |
max_retry_delay | None |
on_failure_callback | <function task_fail_slack_alert at 0x7ffa3fcd4a60> |
on_retry_callback | None |
on_success_callback | None |
op_args | [] |
op_kwargs | {'cluster-name': 'primary-audience-global-JCDECAUX_CN_SH_METRO-2406', 'audience-config-file': 'primary_audience_global_ind_conf', 'master-instance-types': 'm5.2xlarge,m5.4xlarge', 'core-instance-types': 'm5.2xlarge,m5.4xlarge', 'task-instance-types': 'm5.2xlarge,m5.4xlarge', 'core-instance-capacity': 1, 'task-instance-capacity': 0, 'emr-version': 'emr-6.0.0', 'audience-load-bootstrap': True, 'emr-steps': '[{\n "step-name": "PrimaryAudienceGlobal-integrity",\n "py-script": "jobs/integrity_job.py",\n "config-json": [\n {"spark.app.audience.country":"CN"},\n {"spark.app.audience.media.owner":"JCDECAUX_CN_SH_METRO"},\n {"spark.app.audience.version":"2406"},\n {"spark.app.audience.bucket.name":"s3://jcd-prd-datacorp-partners"},\n {"spark.app.audience.digital.file":"viooh/outputs/china/metro/shanghai/audience/shanghai-20240709092225/data/type=dynamic/"},\n {"spark.app.audience.static.file":"viooh/outputs/china/metro/shanghai/audience/shanghai-20240709092225/data/type=static/"},\n {"spark.app.audience.demographic.file":"viooh/outputs/china/metro/shanghai/audience/shanghai-20240709092225/demographics/"},\n {"spark.app.audience.input.bucket":"s3://global-cn-audiences-input"},\n {"spark.app.audience.output.bucket":"s3://global-cn-audiences-output"},\n {"spark.app.audience.folder":"route"},\n {"spark.app.audience.computation.date":"20240714160717"},\n {"spark.executor.memoryOverhead":"2500"}\n ],\n "artifact": "AudiencesPipeline"\n },{\n "step-name": "PrimaryAudienceGlobal-load",\n "py-script": "jobs/etl_job.py",\n "config-file": "configs/primary-audience-global/1.0.0/job.json",\n "config-json": [\n {"spark.app.audience.country":"CN"},\n {"spark.app.audience.media.owner":"JCDECAUX_CN_SH_METRO"},\n {"spark.app.audience.version":"2406"},\n {"spark.app.audience.digital.file":"viooh/outputs/china/metro/shanghai/audience/shanghai-20240709092225/data/type=dynamic/"},\n {"spark.app.audience.static.file":"viooh/outputs/china/metro/shanghai/audience/shanghai-20240709092225/data/type=static/"},\n {"spark.app.audience.folder":"route"},\n {"spark.app.audience.computation.date":"20240714160717"},\n {"spark.executor.memoryOverhead":"2500"}\n ],\n "artifact": "AudiencesPipeline"\n },{\n "step-name": "SaveToDataBase",\n "py-script": "jobs/save_to_sql.py",\n "config-file": "configs/primary-audience-global-masterdb-uat/1.0.0/job.json",\n "config-json": [\n {"spark.app.audience.country":"CN"},\n {"spark.app.audience.media.owner":"JCDECAUX_CN_SH_METRO"},\n {"spark.app.audience.version":"2406"},\n {"spark.app.audience.output.bucket":"s3://global-cn-audiences-output"},\n {"spark.app.audience.computation.date":"20240714160717"},\n {"spark.app.audience.database":"jcdecaux_cn_sh_metro_dev"},\n {"spark.executor.memoryOverhead":"2500"}\n ],\n "artifact": "AudiencesPipeline",\n "packages": "mysql:mysql-connector-java:5.1.46"\n }]', 'dag-id': 'primary-audience-global-ind_v003', 'schedule_interval': None, 'trigger_dags': []} |
operator_extra_link_dict | {} |
operator_extra_links | () |
outlets | [] |
owner | data.engineers@viooh.com |
params | {} |
pool | default_pool |
pool_slots | 1 |
priority_weight | 1 |
priority_weight_total | 2 |
provide_context | True |
queue | default |
resources | None |
retries | 0 |
retry_delay | 0:05:00 |
retry_exponential_backoff | False |
run_as_user | None |
schedule_interval | None |
shallow_copy_attrs | ('python_callable', 'op_kwargs') |
sla | None |
start_date | 2024-07-13 00:00:00+00:00 |
subdag | None |
task_concurrency | None |
task_id | validation |
task_type | PythonOperator |
template_ext | [] |
template_fields | ('templates_dict', 'op_args', 'op_kwargs') |
templates_dict | None |
trigger_rule | all_success |
ui_color | #ffefeb |
ui_fgcolor | #000 |
upstream_list | [] |
upstream_task_ids | set() |
wait_for_downstream | False |
weight_rule | downstream |