1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 | def get_config(**kwargs):
config = generate_cluster_config(kwargs)
job_flow_config = {"Name": config['Name'],
"LogUri": config['LogUri'],
"ReleaseLabel": config['ReleaseLabel'],
"Instances": config['Instances'],
"BootstrapActions": config['BootstrapActions'],
"Applications": config['Applications'],
"VisibleToAllUsers": True,
'Steps': config['Steps'],
"JobFlowRole": config['JobFlowRole'],
"ServiceRole": config['ServiceRole'],
"SecurityConfiguration": config['SecurityConfiguration'],
"Tags": config['Tags'],
"Configurations": config['Configurations']
}
LOG.info("job_flow_config: %s", job_flow_config)
return job_flow_config
|
Attribute |
Value |
dag_id |
livepop-shuffle-aggregator-modeltransform_v004 |
duration |
None |
end_date |
2024-05-02 02:21:45.884059+00:00 |
execution_date |
2024-05-02T01:00:00+00:00 |
executor_config |
{} |
generate_command |
<function TaskInstance.generate_command at 0x7f7bf685a7b8> |
hostname |
|
is_premature |
False |
job_id |
None |
key |
('livepop-shuffle-aggregator-modeltransform_v004', 'create_emr_steps', <Pendulum [2024-05-02T01:00:00+00:00]>, 1) |
log |
<Logger airflow.task (INFO)> |
log_filepath |
/opt/airflow/logs/livepop-shuffle-aggregator-modeltransform_v004/create_emr_steps/2024-05-02T01:00:00+00:00.log |
log_url |
https://airflow.devel.viooh.net.cn/admin/airflow/log?execution_date=2024-05-02T01%3A00%3A00%2B00%3A00&task_id=create_emr_steps&dag_id=livepop-shuffle-aggregator-modeltransform_v004 |
logger |
<Logger airflow.task (INFO)> |
mark_success_url |
https://airflow.devel.viooh.net.cn/success?task_id=create_emr_steps&dag_id=livepop-shuffle-aggregator-modeltransform_v004&execution_date=2024-05-02T01%3A00%3A00%2B00%3A00&upstream=false&downstream=false |
max_tries |
4 |
metadata |
MetaData(bind=None) |
next_try_number |
1 |
operator |
PythonOperator |
pid |
None |
pool |
default_pool |
pool_slots |
1 |
prev_attempted_tries |
0 |
previous_execution_date_success |
2024-05-01 19:00:00+00:00 |
previous_start_date_success |
2024-05-01 20:00:40.751845+00:00 |
previous_ti |
<TaskInstance: livepop-shuffle-aggregator-modeltransform_v004.create_emr_steps 2024-05-02 00:00:00+00:00 [upstream_failed]> |
previous_ti_success |
<TaskInstance: livepop-shuffle-aggregator-modeltransform_v004.create_emr_steps 2024-05-01 19:00:00+00:00 [success]> |
priority_weight |
3 |
queue |
default |
queued_dttm |
None |
raw |
False |
run_as_user |
None |
start_date |
2024-05-02 02:21:45.884049+00:00 |
state |
upstream_failed |
task |
<Task(PythonOperator): create_emr_steps> |
task_id |
create_emr_steps |
test_mode |
False |
try_number |
1 |
unixname |
airflow |
Attribute |
Value |
dag |
<DAG: livepop-shuffle-aggregator-modeltransform_v004> |
dag_id |
livepop-shuffle-aggregator-modeltransform_v004 |
depends_on_past |
False |
deps |
{<TIDep(Trigger Rule)>, <TIDep(Not Previously Skipped)>, <TIDep(Previous Dagrun State)>, <TIDep(Not In Retry Period)>} |
do_xcom_push |
True |
downstream_list |
[<Task(EmrCreateJobFlowOperator): create_cluster_and_add_emr_steps>] |
downstream_task_ids |
{'create_cluster_and_add_emr_steps'} |
email |
None |
email_on_failure |
True |
email_on_retry |
True |
end_date |
None |
execution_timeout |
None |
executor_config |
{} |
extra_links |
[] |
global_operator_extra_link_dict |
{} |
inlets |
[] |
lineage_data |
None |
log |
<Logger airflow.task.operators (INFO)> |
logger |
<Logger airflow.task.operators (INFO)> |
max_retry_delay |
None |
on_failure_callback |
<function task_fail_slack_alert at 0x7f7be66b8a60> |
on_retry_callback |
None |
on_success_callback |
None |
op_args |
[] |
op_kwargs |
{'master-instance-types': 'm5.xlarge,m5.2xlarge', 'core-instance-types': 'm5.xlarge,m5.2xlarge', 'task-instance-types': 'm5.xlarge,m5.2xlarge', 'core-instance-capacity': 3, 'task-instance-capacity': 0, 'ebs-volume-size': '50', 'emr-version': 'emr-5.35.0', 'input-validation-step-config': 'livepop_shuffle_job_validation_config', 'emr-steps': '[\n {\n "step-name": "LivePOPShuffle",\n "config-json": [\n {"spark.driver.memory":"9g"},\n {"spark.serializer":"org.apache.spark.serializer.KryoSerializer"}\n ],\n "main-class": "com.viooh.pop.data.live.shuffle.RawLivePOPShuffleMain",\n "group-id":"com/viooh/pop",\n "artifact": "pop-shuffle-live",\n "jars": "/usr/lib/spark/external/lib/spark-avro.jar",\n "enable-custom-metrics" : "True"\n },\n {\n "step-name": "PopAggregator",\n "config-json": [\n {"spark.driver.memory":"9g"},\n {"spark.serializer":"org.apache.spark.serializer.KryoSerializer"}\n ],\n "main-class": "com.viooh.pop.aggregator.livepop.LivePOPAggregatorMain",\n "group-id":"com/viooh/pop",\n "artifact": "pop-data-aggregator",\n "enable-custom-metrics" : "True"\n },\n {\n "step-name": "ModelTransform",\n "config-json": [\n {"spark.driver.memory":"9g"}\n ],\n "main-class": "uk.co.viooh.job.modeltransform.ModelTransform",\n "group-id": "uk/co/viooh",\n "artifact": "pandora-model-transform",\n "enable-custom-metrics" : "True"\n }\n]', 'cluster-name': 'livepop-shuffle-aggregator-modeltransform', 'dag-id': 'livepop-shuffle-aggregator-modeltransform_v004', 'schedule_interval': '0 * * * *', 'trigger_dags': []} |
operator_extra_link_dict |
{} |
operator_extra_links |
() |
outlets |
[] |
owner |
data.engineers@viooh.com |
params |
{} |
pool |
default_pool |
pool_slots |
1 |
priority_weight |
1 |
priority_weight_total |
3 |
provide_context |
True |
queue |
default |
resources |
None |
retries |
4 |
retry_delay |
0:05:00 |
retry_exponential_backoff |
False |
run_as_user |
None |
schedule_interval |
0 * * * * |
shallow_copy_attrs |
('python_callable', 'op_kwargs') |
sla |
None |
start_date |
2024-07-04 12:00:00+00:00 |
subdag |
None |
task_concurrency |
None |
task_id |
create_emr_steps |
task_type |
PythonOperator |
template_ext |
[] |
template_fields |
('templates_dict', 'op_args', 'op_kwargs') |
templates_dict |
None |
trigger_rule |
all_success |
ui_color |
#ffefeb |
ui_fgcolor |
#000 |
upstream_list |
[<Task(PythonOperator): check_input_available>] |
upstream_task_ids |
{'check_input_available'} |
wait_for_downstream |
False |
weight_rule |
downstream |