Submit Hive And SQOOP jobs


Brief :

This procedure allows you to submit Hive jobs to Azure HDInsight Cluster and Run SQOOP queries to export Hive table data to SQL database.

Need:

Azure provides the Hive editor to submit Hive jobs on HDInsight Cluster by user interaction. If there are large number of jobs which needs to submit daily, then we can use the automated procedure which is explained below.

How It Works:
 
    Requirements-
  •  SQL Database
  • HDInsight Cluster (Running Status)
  • Install NuGet Package given below,
  • Install-Package Microsoft.Azure.Management.HDInsight.Job

To Submit Hive and SQOOP jobs use below class,


public class HiveQueryManager
    {
        private string SqlDatabaseServerName { get; set; }

        private string SqlDatabaseLogin { get; set; }

        private string SqlDatabaseLoginPassword { get; set; }

        private string SqlDatabaseDatabaseName { get; set; }

        private HDInsightJobManagementClient _hdiJobManagementClient;

        public Dictionary<string, string> defines;

        public List<string> args;

        public HiveQueryManager(string existingClusterUsername, string existingClusterPassword, string existingClusterUri)

        {

            var clusterCredentials = new BasicAuthenticationCloudCredentials { Username = existingClusterUsername, Password = existingClusterPassword };

            _hdiJobManagementClient = new HDInsightJobManagementClient(existingClusterUri, clusterCredentials);

        }

        public string SubmitHiveJob(string query)

        {

            try

            {

                var parameters = new HiveJobSubmissionParameters

                {

                    Query = query,

                    Defines = defines,

                    Arguments = args

                };

                System.Console.WriteLine("Submitting the Hive job to the cluster...");

                var jobResponse = _hdiJobManagementClient.JobManagement.SubmitHiveJob(parameters);

                var jobId = jobResponse.JobSubmissionJsonResponse.Id;

                System.Console.WriteLine("Response status code is " + jobResponse.StatusCode);

                System.Console.WriteLine("JobId is " + jobId);

                System.Console.WriteLine("Waiting for the job completion ...");

 
                // Wait for job completion

                var jobDetail = _hdiJobManagementClient.JobManagement.GetJob(jobId).JobDetail;

                while (!jobDetail.Status.JobComplete)

                {

                    Thread.Sleep(1000);

                    jobDetail = _hdiJobManagementClient.JobManagement.GetJob(jobId).JobDetail;

                }

                System.Console.WriteLine("Job Exit Code " + jobDetail.ExitValue.ToString());

                return "Job Exit Code " + jobDetail.ExitValue.ToString();

            }
            catch (Exception ex)

            {
                Console.WriteLine(ex.Message);

                return ex.Message;
            }
        }


        public void SetSQLDatabase(string sqlDatabaseServerName, string sqlDatabaseLogin, string sqlDatabaseLoginPassword, string sqlDatabaseDatabaseName)
        {
            SqlDatabaseServerName = sqlDatabaseServerName;

            SqlDatabaseLogin = sqlDatabaseLogin;

            SqlDatabaseLoginPassword = sqlDatabaseLoginPassword;

            SqlDatabaseDatabaseName = sqlDatabaseDatabaseName;
        }

        public string ExportTableToSQLDatabase(string hdfsPath, string tableName, string saperator)

        {

            try

            {

                var exportDir = hdfsPath; // "/hive/warehouse/tablename";

                // Connection string for using Azure SQL Database.

                // Comment if using SQL Server

                var connectionString = "jdbc:sqlserver://" + SqlDatabaseServerName + ".database.windows.net;user=" + SqlDatabaseLogin + "@" + SqlDatabaseServerName + ";password=" + SqlDatabaseLoginPassword + ";database=" + SqlDatabaseDatabaseName;

                // Connection string for using SQL Server.

                // Uncomment if using SQL Server

                //var connectionString = "jdbc:sqlserver://" + sqlDatabaseServerName + ";user=" + sqlDatabaseLogin + ";password=" + sqlDatabaseLoginPassword + ";database=" + sqlDatabaseDatabaseName;

                var parameters = new SqoopJobSubmissionParameters

                {
                    //Files = new List<string> { "/user/oozie/share/lib/sqoop/sqljdbc41.jar" }, // This line is required for Linux-based cluster.

                    Command = "export --connect " + connectionString + " --table " + tableName + " --export-dir " + exportDir + " --input-null-string \\\\N --input-null-non-string \\\\N --fields-terminated-by " + saperator + " -m 1"

                };

                var response = _hdiJobManagementClient.JobManagement.SubmitSqoopJob(parameters);

                var jobId = response.JobSubmissionJsonResponse.Id;

                System.Console.WriteLine("Response status code is " + response.StatusCode);

                System.Console.WriteLine("JobId is " + jobId);

                System.Console.WriteLine("Waiting for the job completion ...");

 
                // Wait for job completion

                var jobDetail = _hdiJobManagementClient.JobManagement.GetJob(jobId).JobDetail;

                while (!jobDetail.Status.JobComplete)

                {

                    Thread.Sleep(1000);

                    jobDetail = _hdiJobManagementClient.JobManagement.GetJob(jobId).JobDetail;

                }

                System.Console.WriteLine("Job Exit Code " + jobDetail.ExitValue.ToString());

                return "Job Submitted Successfully. The JobId is " + response.JobSubmissionJsonResponse.Id;

            }

            catch (Exception ex)

            {

                return ex.Message;

            }

        }

    }

  Below are the parameter details,
  • existingClusterUsername HDInsight Cluster User Name where we want to run the jobs.
  • existingClusterPassword HDInsight Cluster Password.
  • existingClusterUri HDInsight Cluster Uri.
  • existingClusterUri = ExistingClusterName + ".azurehdinsight.net";
  • hdfsPath – Path of hive table on HDFS.
  • Saperator – Saperator used while creating Hive Table.
  • tableName – Name of the table which needs to be exported (Create table in SQL Database with same structure and name).
Use HiveQueryManager class object to Submit Hive and SQOOP jobs. (Using SubmitHiveJob and ExportTableToSQLDatabase Methods).

Podcast

Michael Patterson sat down with the CEO of Boston Byte, Mustapha Shaikh to discuss the significance and rapid digitization of the healthcar...