/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ using System; using System.Collections.Generic; using System.Globalization; using System.Text; #pragma warning disable 1591 namespace Amazon.ElasticMapReduce.Model { /// /// This class provides helper methods for creating common Elastic MapReduce step /// types. To use StepFactory, you should construct it with the appropriate bucket /// for your region. The official bucket format is "<region>.elasticmapreduce", so /// us-east-1 would use the bucket "us-east-1.elasticmapreduce". /// /// Create an interactive Hive job flow with debugging enabled: /// /// IAmazonElasticMapReduce emr = AWSClientFactory.CreateAmazonElasticMapReduceClient(accessKey, secretKey); /// /// StepFactory stepFactory = new StepFactory(); /// /// StepConfig enableDebugging = new StepConfig { /// Name = "Enable Debugging", /// ActionOnFailure = "TERMINATE_JOB_FLOW", /// HadoopJarStep = stepFactory.NewEnableDebuggingStep() /// }; /// /// StepConfig installHive = new StepConfig { /// Name = "Install Hive", /// ActionOnFailure = "TERMINATE_JOB_FLOW", /// HadoopJarStep = stepFactory.NewInstallHiveStep() /// }; /// /// RunJobFlowRequest request = new RunJobFlowRequest { /// Name = "Hive Interactive", /// Steps = new List<StepConfig> { enableDebugging, installHive }, /// LogUri = "s3://log-bucket/", /// Instances = new JobFlowInstancesConfig { /// Ec2KeyName = "keypair", /// HadoopVersion = "0.20", /// InstanceCount = 5, /// KeepJobFlowAliveWhenNoSteps = true, /// MasterInstanceType = "m1.small", /// SlaveInstanceType = "m1.small" /// } /// }; /// /// RunJobFlowResponse response = emr.RunJobFlow(request); /// /// public class StepFactory { private string bucket; private const string SWITCH_BASE_PATH = "--base-path", SWITCH_INSTALL_HIVE = "--install-hive", SWITCH_INSTALL_PIG = "--install-pig", SWITCH_HIVE_VERSIONS = "--hive-versions", SWITCH_RUN_HIVE_SCRIPT = "--run-hive-script", SWITCH_RUN_PIG_SCRIPT = "--run-pig-script", SWITCH_ARGS = "--args", SWITCH_F = "-f"; private const string TOOL_HIVE = "hive", TOOL_PIG = "pig"; private const string DEFAULT_BUCKET = "us-east-1.elasticmapreduce"; /// /// Constructor that gets its resources from the S3 bucket specified /// /// The bucket that contains the resources used by the step factory public StepFactory(string bucket) { this.bucket = bucket; } /// /// Constructor that gets its resources EMR bucket in the specified region. /// /// The region of the EMR bucket to use. public StepFactory(RegionEndpoint region) { this.bucket = string.Format(CultureInfo.InvariantCulture,"{0}.elasticmapreduce", region.SystemName); } /// /// Default constructor that gets its resources from the S3 in us-east-1. /// public StepFactory() : this(DEFAULT_BUCKET) { } /// /// Runs a specified script on the master node of your cluster. /// /// The script to run /// Arguments to be passed to the script. /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewScriptRunnerStep(string script, params string[] args) { List appendedArgs = new List(); appendedArgs.Add(script); foreach (var a in args) appendedArgs.Add(a); return new HadoopJarStepConfig { Jar = string.Format(CultureInfo.InvariantCulture, "s3://{0}/libs/script-runner/script-runner.jar", bucket), Args = appendedArgs }; } /// /// When run as the first step in your job flow, enables the Hadoop debugging UI /// in the AWS Management Console. /// /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewEnableDebuggingStep() { return NewScriptRunnerStep(string.Format(CultureInfo.InvariantCulture, "s3://{0}/libs/state-pusher/0.1/fetch", bucket)); } private HadoopJarStepConfig newHivePigStep(string type, params string[] args) { String[] argsArray = new String[args.Length + 2]; argsArray[0] = SWITCH_BASE_PATH; argsArray[1] = string.Format(CultureInfo.InvariantCulture, "s3://{0}/libs/{1}/", bucket, type); Array.Copy(args, 0, argsArray, 2, args.Length); return NewScriptRunnerStep(string.Format(CultureInfo.InvariantCulture, "s3://{0}/libs/{1}/{1}-script", bucket, type), argsArray); } /// /// Step that installs Hive on your job flow. /// /// The versions of Hive to install. /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewInstallHiveStep(params HiveVersion[] hiveVersions) { if (hiveVersions.Length > 0) { string[] versionStrings = new String[hiveVersions.Length]; for (int i = 0; i < hiveVersions.Length; i++) { versionStrings[i] = hiveVersions[i].VersionString; } return newHivePigStep(TOOL_HIVE, SWITCH_INSTALL_HIVE, SWITCH_HIVE_VERSIONS, string.Join(",", versionStrings)); } return newHivePigStep(TOOL_HIVE, SWITCH_INSTALL_HIVE); } /// /// Step that installs Hive on your job flow. /// /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewInstallHiveStep() { return NewInstallHiveStep(HiveVersion.Hive_0_5); } /// /// Step that runs a Hive script on your job flow. /// /// The script to run. /// Arguments that get passed to the script. /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewRunHiveScriptStep(string script, params string[] args) { return NewRunHiveScriptStepVersioned(script, HiveVersion.Hive_Latest.VersionString, args); } /// /// Step that runs a Hive script on your job flow with a specific verson of Hive. /// /// The script to run. /// The version of Hive to run. /// Arguments that get passed to the script. /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewRunHiveScriptStepVersioned(string script, string version, params string[] args) { string[] argsArray = new String[args.Length + 6]; argsArray[0] = SWITCH_HIVE_VERSIONS; argsArray[1] = version; argsArray[2] = SWITCH_RUN_HIVE_SCRIPT; argsArray[3] = SWITCH_ARGS; argsArray[4] = SWITCH_F; argsArray[5] = script; Array.Copy(args, 0, argsArray, 6, args.Length); return newHivePigStep(TOOL_HIVE, argsArray); } /// /// Step that installs Pig on your job flow. /// /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewInstallPigStep() { return newHivePigStep(TOOL_PIG, SWITCH_INSTALL_PIG); } /// /// Step that runs a Pig script on your job flow. /// /// The script to run. /// Arguments that get passed to the script. /// HadoopJarStepConfig that can be passed to your job flow. public HadoopJarStepConfig NewRunPigScriptStep(string script, params string[] args) { string[] argsArray = new string[args.Length + 4]; argsArray[0] = SWITCH_RUN_PIG_SCRIPT; argsArray[1] = SWITCH_ARGS; argsArray[2] = SWITCH_F; argsArray[3] = script; Array.Copy(args, 0, argsArray, 4, args.Length); return newHivePigStep(TOOL_PIG, argsArray); } /// /// The available Hive versions. /// For reference: http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/UsingEMR_SupportedHiveVersions.html /// public class HiveVersion { public static readonly HiveVersion Hive_0_4 = new HiveVersion("0.4"); public static readonly HiveVersion Hive_0_5 = new HiveVersion("0.5"); public static readonly HiveVersion Hive_0_7 = new HiveVersion("0.7"); public static readonly HiveVersion Hive_0_7_1 = new HiveVersion("0.7.1"); public static readonly HiveVersion Hive_0_7_1_1 = new HiveVersion("0.7.1.1"); public static readonly HiveVersion Hive_0_7_1_2 = new HiveVersion("0.7.1.2"); public static readonly HiveVersion Hive_0_7_1_3 = new HiveVersion("0.7.1.3"); public static readonly HiveVersion Hive_0_7_1_4 = new HiveVersion("0.7.1.4"); public static readonly HiveVersion Hive_0_8_1 = new HiveVersion("0.8.1"); public static readonly HiveVersion Hive_0_8_1_1 = new HiveVersion("0.8.1.1"); public static readonly HiveVersion Hive_0_8_1_2 = new HiveVersion("0.8.1.2"); public static readonly HiveVersion Hive_0_8_1_3 = new HiveVersion("0.8.1.3"); public static readonly HiveVersion Hive_0_8_1_4 = new HiveVersion("0.8.1.4"); public static readonly HiveVersion Hive_0_8_1_5 = new HiveVersion("0.8.1.5"); public static readonly HiveVersion Hive_0_8_1_6 = new HiveVersion("0.8.1.6"); public static readonly HiveVersion Hive_0_8_1_7 = new HiveVersion("0.8.1.7"); public static readonly HiveVersion Hive_0_8_1_8 = new HiveVersion("0.8.1.8"); public static readonly HiveVersion Hive_0_11_0 = new HiveVersion("0.11.0"); public static readonly HiveVersion Hive_0_11_0_1 = new HiveVersion("0.11.0.1"); public static readonly HiveVersion Hive_Latest = new HiveVersion("latest"); string _version; public HiveVersion(string version) { this._version = version; } public string VersionString { get { return this._version; } } } } }