DAG: booking-processing-job_v002

schedule: 0 */1 * * *


Task Instance: create_cluster_and_add_emr_steps


Rendered Template

job_flow_overrides
("{'Name': 'vd-uat-viooh-booking-processing-job', 'LogUri': "
 "'s3n://viooh-emr-logs-lab-cn/bigdata/emr/', 'ReleaseLabel': 'emr-5.35.0', "
 "'Instances': {'InstanceFleets': [{'Name': 'Masterfleet', "
 "'InstanceFleetType': 'MASTER', 'TargetOnDemandCapacity': 1, "
 "'InstanceTypeConfigs': [{'InstanceType': 'm5.xlarge'}, {'InstanceType': "
 "'m5.2xlarge'}]}, {'Name': 'Corefleet', 'InstanceFleetType': 'CORE', "
 "'TargetOnDemandCapacity': 3, 'InstanceTypeConfigs': [{'InstanceType': "
 "'m5.xlarge', 'EbsConfiguration': {'EbsBlockDeviceConfigs': "
 "[{'VolumeSpecification': {'VolumeType': 'gp2', 'SizeInGB': 50}, "
 "'VolumesPerInstance': 1}], 'EbsOptimized': True}}, {'InstanceType': "
 "'m5.2xlarge', 'EbsConfiguration': {'EbsBlockDeviceConfigs': "
 "[{'VolumeSpecification': {'VolumeType': 'gp2', 'SizeInGB': 50}, "
 "'VolumesPerInstance': 1}], 'EbsOptimized': True}}]}], 'Ec2KeyName': "
 "'data.engineer.lab', 'KeepJobFlowAliveWhenNoSteps': False, "
 "'TerminationProtected': False, 'Ec2SubnetId': 'subnet-0e214a494e67e2266', "
 "'EmrManagedMasterSecurityGroup': 'sg-0da002beeed87fb1a', "
 "'EmrManagedSlaveSecurityGroup': 'sg-0f1a0b7517309fcf0', "
 "'ServiceAccessSecurityGroup': 'sg-009528c399781cc69'}, 'BootstrapActions': "
 "[], 'Applications': [{'Name': 'Spark'}, {'Name': 'Hadoop'}, {'Name': "
 "'Ganglia'}, {'Name': 'Zeppelin'}], 'VisibleToAllUsers': True, 'Steps': "
 "[{'Name': 'Setup Hadoop Debugging', 'ActionOnFailure': 'TERMINATE_JOB_FLOW', "
 "'HadoopJarStep': {'Jar': 'command-runner.jar', 'Args': "
 "['state-pusher-script']}}, {'Name': 'CampaignExtractor', 'ActionOnFailure': "
 "'TERMINATE_CLUSTER', 'HadoopJarStep': {'Args': ['spark-submit', "
 "'--deploy-mode', 'cluster', '--master', 'yarn', '--conf', "
 "'spark.yarn.submit.waitAppCompletion=true', '--conf', "
 "'spark.driver.memory=9g', '--conf', 'spark.app.env=uat', '--conf', "
 "'spark.dag.execution.time=2024-03-08T06:00:00+00:00', '--class', "
 "'com.viooh.campaignextractor.CampaignExtractorMain', '--jars', "
 "'/usr/lib/spark/external/lib/spark-avro.jar', "
 "'s3://viooh-spark-artifacts-lab-cn/releases/com/viooh/campaignextractor/campaign-extractor/1.2.0.DE1-179/campaign-extractor-1.2.0.DE1-179-jar-with-dependencies.jar'], "
 "'Jar': 'command-runner.jar'}}, {'Name': 'CampaignProcessingJob', "
 "'ActionOnFailure': 'TERMINATE_CLUSTER', 'HadoopJarStep': {'Args': "
 "['spark-submit', '--deploy-mode', 'cluster', '--master', 'yarn', '--conf', "
 "'spark.yarn.submit.waitAppCompletion=true', '--conf', "
 "'spark.driver.memory=9g', '--conf', 'spark.app.env=uat', '--conf', "
 "'spark.dag.execution.time=2024-03-08T06:00:00+00:00', '--class', "
 "'com.viooh.CampaignProcessingMain', "
 "'s3://viooh-spark-artifacts-lab-cn/releases/com/viooh/campaign-processing-job/1.3.6/campaign-processing-job-1.3.6.jar'], "
 "'Jar': 'command-runner.jar'}}, {'Name': 'CampaignDeltaJob', "
 "'ActionOnFailure': 'TERMINATE_CLUSTER', 'HadoopJarStep': {'Args': "
 "['spark-submit', '--deploy-mode', 'cluster', '--master', 'yarn', '--conf', "
 "'spark.yarn.submit.waitAppCompletion=true', '--conf', "
 "'spark.driver.memory=9g', '--conf', 'spark.app.env=uat', '--conf', "
 "'spark.dag.execution.time=2024-03-08T06:00:00+00:00', '--class', "
 "'com.viooh.booking.delta.CampaignDeltaMain', '--files', "
 "'s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/1.0.0/job.conf,s3://viooh-spark-artifacts-lab-cn/metrics/batch-job-metrics/1.0.0/job.yaml', "
 "'--conf', 'spark.metrics.conf=job.conf', "
 "'s3://viooh-spark-artifacts-lab-cn/releases/com/viooh/pandora-campaign-delta/1.8.4/pandora-campaign-delta-1.8.4-jar-with-dependencies.jar'], "
 "'Jar': 'command-runner.jar'}}], 'JobFlowRole': 'DeEmrInstanceRole', "
 "'ServiceRole': 'DeEmrServiceRole', 'SecurityConfiguration': "
 "'bigdata_emr_cn_sec_conf', 'Tags': [{'Key': 'environment', 'Value': 'uat'}, "
 "{'Key': 'role', 'Value': 'analysis'}, {'Key': 'application', 'Value': "
 "'emr'}, {'Key': 'project', 'Value': 'campaign-compliance'}, {'Key': "
 "'TerminationPolicy', 'Value': 'OFFICE-HOURS'}, {'Key': 'ContactEmail', "
 "'Value': 'viooh.data@viooh.com'}, {'Key': 'team', 'Value': 'bigdata'}, "
 "{'Key': 'Name', 'Value': 'vd-uat-viooh-booking-processing-job'}], "
 "'Configurations': []}")