1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 | def get_config(**kwargs):
config = generate_cluster_config(kwargs)
job_flow_config = {"Name": config['Name'],
"LogUri": config['LogUri'],
"ReleaseLabel": config['ReleaseLabel'],
"Instances": config['Instances'],
"BootstrapActions": config['BootstrapActions'],
"Applications": config['Applications'],
"VisibleToAllUsers": True,
'Steps': config['Steps'],
"JobFlowRole": config['JobFlowRole'],
"ServiceRole": config['ServiceRole'],
"SecurityConfiguration": config['SecurityConfiguration'],
"Tags": config['Tags'],
"Configurations": config['Configurations']
}
LOG.info("job_flow_config: %s", job_flow_config)
return job_flow_config
|
Attribute |
Value |
dag_id |
booking-processing-job_v002 |
duration |
3.159233 |
end_date |
2021-04-20 02:11:39.187704+00:00 |
execution_date |
2021-04-20T01:00:00+00:00 |
executor_config |
{} |
generate_command |
<function TaskInstance.generate_command at 0x7f603554be18> |
hostname |
airflow-worker-1.airflow-worker.bigdata.svc.cluster.local |
is_premature |
False |
job_id |
19112 |
key |
('booking-processing-job_v002', 'create_emr_steps', <Pendulum [2021-04-20T01:00:00+00:00]>, 2) |
log |
<Logger airflow.task (INFO)> |
log_filepath |
/opt/airflow/logs/booking-processing-job_v002/create_emr_steps/2021-04-20T01:00:00+00:00.log |
log_url |
https://airflow.devel.viooh.net.cn/admin/airflow/log?execution_date=2021-04-20T01%3A00%3A00%2B00%3A00&task_id=create_emr_steps&dag_id=booking-processing-job_v002 |
logger |
<Logger airflow.task (INFO)> |
mark_success_url |
https://airflow.devel.viooh.net.cn/success?task_id=create_emr_steps&dag_id=booking-processing-job_v002&execution_date=2021-04-20T01%3A00%3A00%2B00%3A00&upstream=false&downstream=false |
max_tries |
0 |
metadata |
MetaData(bind=None) |
next_try_number |
2 |
operator |
PythonOperator |
pid |
115620 |
pool |
default_pool |
pool_slots |
1 |
prev_attempted_tries |
1 |
previous_execution_date_success |
2021-04-20 00:00:00+00:00 |
previous_start_date_success |
2021-04-20 01:11:45.219228+00:00 |
previous_ti |
<TaskInstance: booking-processing-job_v002.create_emr_steps 2021-04-20 00:00:00+00:00 [success]> |
previous_ti_success |
<TaskInstance: booking-processing-job_v002.create_emr_steps 2021-04-20 00:00:00+00:00 [success]> |
priority_weight |
3 |
queue |
default |
queued_dttm |
2021-04-20 02:11:33.708505+00:00 |
raw |
False |
run_as_user |
None |
start_date |
2021-04-20 02:11:36.028471+00:00 |
state |
success |
task |
<Task(PythonOperator): create_emr_steps> |
task_id |
create_emr_steps |
test_mode |
False |
try_number |
2 |
unixname |
airflow |
Attribute |
Value |
dag |
<DAG: booking-processing-job_v002> |
dag_id |
booking-processing-job_v002 |
depends_on_past |
True |
deps |
{<TIDep(Not Previously Skipped)>, <TIDep(Trigger Rule)>, <TIDep(Previous Dagrun State)>, <TIDep(Not In Retry Period)>} |
do_xcom_push |
True |
downstream_list |
[<Task(EmrCreateJobFlowOperator): create_cluster_and_add_emr_steps>] |
downstream_task_ids |
{'create_cluster_and_add_emr_steps'} |
email |
None |
email_on_failure |
True |
email_on_retry |
True |
end_date |
None |
execution_timeout |
None |
executor_config |
{} |
extra_links |
[] |
global_operator_extra_link_dict |
{} |
inlets |
[] |
lineage_data |
None |
log |
<Logger airflow.task.operators (INFO)> |
logger |
<Logger airflow.task.operators (INFO)> |
max_retry_delay |
None |
on_failure_callback |
<function task_fail_slack_alert at 0x7f602bc8d158> |
on_retry_callback |
None |
on_success_callback |
None |
op_args |
[] |
op_kwargs |
{'master-instance-types': 'm5.xlarge,m5.2xlarge', 'core-instance-types': 'm5.xlarge,m5.2xlarge', 'task-instance-types': 'm5.xlarge,m5.2xlarge', 'core-instance-capacity': 3, 'task-instance-capacity': 0, 'ebs-volume-size': '50', 'job-type': 'batch', 'emr-version': 'emr-6.14.0', 'emr-steps': '[\n {\n "step-name": "CampaignExtractor",\n "config-json": [\n {"spark.driver.memory":"9g"}\n ],\n "main-class": "com.viooh.campaignextractor.CampaignExtractorMain",\n "group-id":"com/viooh/campaignextractor",\n "artifact": "campaign-extractor",\n "jars": "/usr/lib/spark/connector/lib/spark-avro.jar"\n },\n {\n "step-name": "CampaignProcessingJob",\n "config-json": [\n {"spark.driver.memory":"9g"}\n ],\n "main-class": "com.viooh.CampaignProcessingMain",\n "artifact": "campaign-processing-job",\n "files": "s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/0.0.0/job.conf,s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/0.0.0/job.yaml"\n },\n {\n "step-name": "CampaignDeltaJob",\n "config-json": [\n {"spark.driver.memory":"9g"}\n ],\n "main-class": "com.viooh.booking.delta.CampaignDeltaMain",\n "jar-location": "s3://viooh-spark-artifacts-lab-cn/releases/com/viooh/pandora-campaign-delta/1.22.0/pandora-campaign-delta-1.22.0-jar-with-dependencies.jar",\n "files": "s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/0.0.0/job.conf,s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/0.0.0/job.yaml"\n }\n]', 'cluster-name': 'booking-processing-job', 'dag-id': 'booking-processing-job_v002', 'schedule_interval': '0 */1 * * *', 'trigger_dags': []} |
operator_extra_link_dict |
{} |
operator_extra_links |
() |
outlets |
[] |
owner |
data.engineers@viooh.com |
params |
{} |
pool |
default_pool |
pool_slots |
1 |
priority_weight |
1 |
priority_weight_total |
3 |
provide_context |
True |
queue |
default |
resources |
None |
retries |
4 |
retry_delay |
0:05:00 |
retry_exponential_backoff |
False |
run_as_user |
None |
schedule_interval |
0 */1 * * * |
shallow_copy_attrs |
('python_callable', 'op_kwargs') |
sla |
None |
start_date |
2024-09-19 19:00:00+00:00 |
subdag |
None |
task_concurrency |
None |
task_id |
create_emr_steps |
task_type |
PythonOperator |
template_ext |
[] |
template_fields |
('templates_dict', 'op_args', 'op_kwargs') |
templates_dict |
None |
trigger_rule |
all_success |
ui_color |
#ffefeb |
ui_fgcolor |
#000 |
upstream_list |
[] |
upstream_task_ids |
set() |
wait_for_downstream |
True |
weight_rule |
downstream |