Monday, 27 June 2016

Command to find a directory in Linux

sudo find / -name "dir-name-here"

Create Cluster Authentication without Organization ID

Download .ps1 file : https://gallery.technet.microsoft.com/scriptcenter/Self-signed-certificate-5920a7c6

PS C:\> Import-Module -Name F:\New-SelfSignedCertificateEx.ps1
PS C:\> New-SelfSignedCertificateEx -Subject "CN=exampleapp" -KeySpec "Exchange" -FriendlyName "exampleapp"
PS C:\> $cert = Get-ChildItem -Path cert:\CurrentUser\My\* -DnsName exampleapp
PS C:\> $keyValue = [System.Convert]::ToBase64String($cert.GetRawCertData())
PS C:\> Add-AzureRmAccount

PS C:\> $azureAdApplication = New-AzureRmADApplication -DisplayName "exampleapp" -HomePage "https://www.contoso.org" -Id
entifierUris "https://www.contoso.org/example" -KeyValue $keyValue -KeyType AsymmetricX509Cert -EndDate $cert.NotAfter -
StartDate $cert.NotBefore

PS C:\> $azureAdApplication

Note: ApplicationId           : 934fcbb6-66xxxxxxxxxxxxxxxxxxxxx

PS C:\> New-AzureRmADServicePrincipal -ApplicationId $azureAdApplication.ApplicationId

PS C:\> (Get-AzureRmSubscription).TenantId

Note: TenantId

PS C:\> (Get-ChildItem -Path cert:\CurrentUser\My\* -DnsName exampleapp).Thumbprint

Note: Thumbprint

Add-AzureRmAccount -ServicePrincipal -CertificateThumbprint CFC1DC609A547xxxxxxxxxxxxxxxxxxxxxxxx -ApplicationId 934fcbb6-66xxxxxxxxxxxxxxxxxxxxxxxxxxxx -TenantId edc9c7e2-1bxxxxxxxxxxxxxxxxxxxxxxxxxxx

Next, proceed with create cluster script. 

Wednesday, 15 June 2016

Hbase and Hive Support

*********************************************************************************
CREATE table in hbase using hive:
*********************************************************************************
create table hbase_wallets(Id BIGINT, MDN VARCHAR(20), Password VARCHAR(1000), eWalletId VARCHAR(20), Date TIMESTAMP, TransactionNumber VARCHAR(25), Requester VARCHAR(20), Instrument TINYINT, Status TINYINT, WalletType TINYINT, Opt1 VARCHAR(50), Opt2 VARCHAR(50), Opt3 VARCHAR(50), IsB2B BOOLEAN, UserId VARCHAR(50), Banks INT, WalletOwner INT, Currency TINYINT, INSERT_DT TIMESTAMP, UPDATE_DT TIMESTAMP, ACTIVE_FLAG CHAR(1))
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:MDN,cf1:Password,cf1:eWalletId,cf1:Date,cf1:TransactionNumber,cf1:Requester,cf1:Instrument,cf1:Status,cf1:WalletType,cf2:Opt1,cf2:Opt2,cf2:Opt3,cf2:IsB2B,cf2:UserId,cf2:Banks,cf2:WalletOwner,cf2:Currency,cf2:INSERT_DT,cf2:UPDATE_DT,cf2:ACTIVE_FLAG")
TBLPROPERTIES ("hbase.table.name" = "hive_wallets");

*********************************************************************************
To move data from hive to hbase:
*********************************************************************************
INSERT OVERWRITE TABLE hbase_wallets SELECT * FROM oxi_consolidated_orc.Wallets;

*********************************************************************************
CREATE table in hbase using hive:
*********************************************************************************
use default;
create EXTERNAL table hbase_actions(Id INT, description VARCHAR(80), insert_dt timestamp, update_dt timestamp, active_flag char(1))
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:description,cf1:insert_dt,cf1:update_dt,cf1:active_flag")
TBLPROPERTIES ("hbase.table.name" = "hbase_actions")

LOCATION 'wasb://linuxxxxxxhdinsightcluster@oxigxxxxxxcompute.blob.core.windows.net/test/actionshbasetest';

*********************************************************************************
To move data from hive to hbase:
*********************************************************************************
INSERT OVERWRITE TABLE hbase_wallets_test SELECT Id FROM oxi_consolidated.Wallets;

Get Bulk data from SFTP Server using automated expect script

This summary is not available. Please click here to view the post.

Get CSV data from SFTP and create Hive Table using Python script

Content of Expect Script

#!/usr/bin/expect
cd /home/admin123/SFTP/sftpdata
spawn sftp root@ftp.xxxxx.com
expect "password:"
send "ihCtwxxxxxxxxxxxxq\n"
expect "sftp>"
send "get /root/FRED-GGGDTPBRA188N.csv\n"
expect "sftp>"
send "exit\n"
expect ":~$"
spawn hadoop fs -mkdir -p /ftpdata/testdirdump
expect ":~$"
spawn hadoop fs -copyFromLocal -f /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv /ftpdata/testdirdump
expect ":~$"
spawn python /home/admin123/scripts/GenerateHiveScript.py /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv sftptestdata sftpFRED /ftpdata/testdirdump /home/admin123/SFTP/sftphivequery/hivequery
expect ":~$"
spawn sudo chmod -R 777 /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect ":~$"
spawn hive -f /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect eof
catch wait result


Contents of GenerateHiveScript.py 

'''
Created on 09-May-2016

@author: Lokesh.Nanda
'''

#import string
import re
#import time
import sys
import csv


source_path = sys.argv[1]
#F:\StockStreamingData\NSEStockData.20160428-131159.csv
database_name = sys.argv[2]
database_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', database_name)
#ftpdata
table_name = sys.argv[3]
table_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', table_name)
#testtable
hdfs_file_loc = sys.argv[4]
#/datapull/stocks
hive_query_path = sys.argv[5]
#F:\StockStreamingData\hivequery

with open(source_path, 'r') as f:
#with open('/home/admin123/NSEStockData.20160505-060003.csv', 'r') as f:
#with open('C:\Users\lokesh.nanda\Desktop\MOCK_DATA.csv', 'r') as f:
        first_line = f.readline()
        flag = False
        symbol = "~`!@#$%^&*()-+={}[]:>;'</?*-+"
        for i in first_line:
            if i in symbol:
                flag = True
                break
       
        #a = ","
        #count_comma = first_line.count(a)
        #first_line = csv.reader(first_line)
        #count_comma = first_line.count

for item in csv.reader([first_line], skipinitialspace=True):
   count_comma = len(item)

       
        finaldump = "create database if not exists " + database_name_mod + "; use " + database_name_mod + "; create external table " + table_name_mod +"("
        if(flag):
            i = 0
            while(i <= count_comma):
                finaldump = finaldump + "col"+ str(i) + " String,"
                i = i+1
        else:
            columns = first_line.split(',')
   #columns = csv.reader(first_line)
            for col in columns:
                finaldump = finaldump + "`" + col + "` String,"
        finaldump = finaldump[:-1] + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '" +hdfs_file_loc+ "';"

with open(hive_query_path + table_name + '.hql', 'w') as the_file:

    the_file.write(finaldump)

HdInsight Cluster Delete Script - PART 4

$resourceGroupName = "xxxxRG"
$clusterName = "linuxhdinsightclusterxxxx"
$azUserName = "xxxx@outlook.com"
$azUserPW = ConvertTo-SecureString -String "xxxxxxxxxx" -AsPlainText -Force
$cred = New-Object System.Management.Automation.PSCredential($azUserName, $azUserPW)
Login-AzureRmAccount -Credential $cred
Invoke-Expression "C:\Cluster\eodscript.ps1"
Remove-AzureRmHDInsightCluster -ResourceGroupName $resourceGroupName -ClusterName $clusterName

HdInsight Cluster Create Script - PART 3

Import-Module "C:\Program Files (x86)\Microsoft SDKs\Azure\PowerShell\ServiceManagement\Azure\Azure.psd1"
$resourceGroupName = "xxxxRG"
$storageAccountName = "saxxxx"
$containerName = "linuxhdinsightclusterxxxx"
$storageAccountKey = "nNoxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx90A=="
$subscriptionName = "xxxxxxxxxxxx"
$storageAccountLocation = "North Central US"
$clusterName = "linuxhdinsightclusterxxxx"
$clusterNodeCount = "8"
$azUserName = "xxxxxxxx@outlook.com"
$azUserPW = ConvertTo-SecureString -String "xxxxxxxxxx" -AsPlainText -Force
$cred = New-Object System.Management.Automation.PSCredential($azUserName, $azUserPW)
Login-AzureRmAccount -Credential $cred
Select-AzureRmSubscription -SubscriptionName $subscriptionName
$blobStorageName = "saxxxx.blob.core.windows.net"
$sshUserName = "admin123"
$sshUserPwd = "Test@45678"
$sshUserPW = ConvertTo-SecureString -String $sshUserPwd -AsPlainText -Force
$sshCredentials = New-Object System.Management.Automation.PSCredential($sshUserName,$sshUserPW)
$computerName = "linuxhdinsightclusterxxxx-ssh.azurehdinsight.net"
$htpUserName = "admin"
$htpUserPwd = "Test@45678"
$htpUserPW = ConvertTo-SecureString -String $htpUserPwd -AsPlainText -Force
$credentials = New-Object System.Management.Automation.PSCredential($htpUserName,$htpUserPW)
$oozieUserName = "rootx"
$oozieUserPwd = "Test@45678"
$oozieUserPW = ConvertTo-SecureString -String $oozieUserPwd -AsPlainText -Force
$oozieCred = New-Object System.Management.Automation.PSCredential($oozieUserName,$oozieUserPW)
$config = New-AzureRmHDInsightClusterConfig
$config = Add-AzureRmHDInsightScriptAction -Config $config -Name "Install Hue"  -NodeType HeadNode  -Parameters "Test@45678" -Uri https://hdiconfigactions.blob.core.windows.net/linuxhueconfigactionv01/install-hue-uber-v01.sh
$config = Add-AzureRmHDInsightScriptAction -Config $config -Name "Install R"  -NodeType HeadNode -Uri https://hdiconfigactions.blob.core.windows.net/linuxrconfigactionv01/r-installer-v01.sh
$config = Add-AzureRmHDInsightScriptAction -Config $config -Name "Install R"  -NodeType WorkerNode -Uri https://hdiconfigactions.blob.core.windows.net/linuxrconfigactionv01/r-installer-v01.sh
$config = Add-AzureRmHDInsightScriptAction -Config $config -Name "Install Solr"  -NodeType HeadNode  -Uri https://hdiconfigactions.blob.core.windows.net/linuxsolrconfigactionv01/solr-installer-v01.sh
$config = Add-AzureRmHDInsightScriptAction -Config $config -Name "Install Solr"  -NodeType WorkerNode -Uri https://hdiconfigactions.blob.core.windows.net/linuxsolrconfigactionv01/solr-installer-v01.sh

New-AzureRmHDInsightCluster -ClusterName $clusterName -config $config -ResourceGroupName $resourceGroupName -HttpCredential $credentials -Location $storageAccountLocation -DefaultStorageAccountName "$storageAccountName.blob.core.windows.net" -DefaultStorageAccountKey $storageAccountKey -DefaultStorageContainer $containerName  -ClusterSizeInNodes $clusterNodeCount -ClusterType "Spark" -OSType "Linux" -Version "3.4" -SshCredential $sshCredentials
Invoke-Expression "C:\Cluster\bootup.ps1"

HdInsight Cluster Backup Strategy - PART 2 eod.ps1

$servername = "xxxxxxxxxxxxxx-ssh.azurehdinsight.net"
$username = "admin123"
$password = "Test@45678"
$port = 22


$command1 = "hadoop fs -rm -R wasb://container-name@storageacc-name.blob.core.windows.net/backup/*"
$command2 = "hadoop fs -copyFromLocal /home/admin123/* wasb://container-namebackup@storageacc-name.blob.core.windows.net/backup/ "
$commandname = @($command1,$command2)

Import-Module SSH-Sessions
New-SshSession -ComputerName $servername -Username $username -Password $password -Port $port

foreach ($c in $commandname) {
$SshResults = Invoke-SshCommand -InvokeOnAll -Command $c
}
Write-Host "Done!!!"

Remove-SshSession -RemoveAll

HdInsight Cluster Backup Strategy - PART 1 bootup.ps1

Create a PowerShell Script as shown below:

$servername = "xxxxxxxx-ssh.azurehdinsight.net"
$username = "admin123"
$password = "Test@45678"
$port = 22
$command2 = "hadoop fs -copyToLocal wasb://container-name@storage-name.blob.core.windows.net/backup/* /home/admin123/"
$command3 = "sudo chmod -R 777 /home/admin123/*"
$command4 = "hadoop fs -rm -R wasb://conatinerarchive@storage-name.blob.core.windows.net/archive/*"
$command5 = "hadoop fs -copyFromLocal /home/admin123/* wasb://conatinerarchive@storage-name.blob.core.windows.net/archive/ "
$command6 = "/home/admin123/scripts/executehivebootup.sh"
$command7 = "/home/admin123/HueScripts/hueurl.sh"
$command8 = "sudo /home/admin123/scripts/add_configuration.sh"
$command9 = "sudo apt-get install expect"
$command10 = "/home/admin123/scripts/SFTPrun.sh"

$commandname = @($command2,$command3,$command4,$command5)

Import-Module SSH-Sessions
New-SshSession -ComputerName $servername -Username $username -Password $password -Port $port

Write-Host "Started bootup data pull..!!!"

foreach ($c in $commandname) {
$SshResults = Invoke-SshCommand -InvokeOnAll -Command $c
}
Write-Host "Step 1 : Done With Copying..!!!"

Write-Host "Step 2 : Creating Schemas..."

$SshResults2 = Invoke-SshCommand -InvokeOnAll -Command $command6

Write-Host "Step 3 : Generating Hue Link...."

$SshResults3 = Invoke-SshCommand -InvokeOnAll -Command $command7

Write-Host "Step 4 : Completed...."

$SshResults4 = Invoke-SshCommand -InvokeOnAll -Command $command8

Write-Host "Step 5 : Completed core site changes..."

$SshResults5 = Invoke-SshCommand -InvokeOnAll -Command $command9

Write-Host "Step 6 : Installed dependent libraries..."

$SshResults5 = Invoke-SshCommand -InvokeOnAll -Command $command10

Write-Host "Step 7 : Installed  SFTP dependencies..."

Remove-SshSession -RemoveAll


***************************************
Contents of shell scripts used:
***************************************

executehivebootup.sh

oxi_array=(`find /home/admin123/HiveStartup/ -type f -follow` )
for element in "${oxi_array[@]}"
do
  hive -f "${element}"
done

Content of HiveStartup
Create a folder HiveSchema - Add a file xxxdata.hql
xxxdata.hql:
CREATE DATABASE IF NOT EXISTS xxxdata;
USE xxxdata;
CREATE EXTERNAL TABLE IF NOT EXISTS xxxdata( Bill_account string,Reading_type string, Reading_datetime Timestamp,Reading_value string ,`Interval` string )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
LOCATION 'wasb://container-name@storageacc-name.blob.core.windows.net/xxxdata/';


hueurl.sh

hostname -f> /home/admin123/HueScripts/hueurl.properties
sudo sed -i s/hn0/huelink=hn0/ /home/admin123/HueScripts/hueurl.properties

Content of hueurl.properties

huelink=hn0-linuxxxxxxxxxxxxxxxjqd33z3g.ex.internal.cloudapp.net


add_configuration.sh

chmod -R 777 /usr/hdp/2.2.*/hadoop/lib
cp /home/admin123/jar/com.mysql.jdbc_5.1.5.jar /usr/hdp/2.2.*/hadoop/lib/
cp /etc/hadoop/conf/core-site.xml /home/admin123/scripts/
awk '/<configuration>/ {print;print "<property>\n<name>fs.azure.account.key.storageaccname.blob.core.windows.net</name>\n<value>Fajj9v2xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxetg==</value>\n</property>\n<property>\n<name>fs.azure.account.key.storageaccname.blob.core.windows.net</name>\n<value>vNcoxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxrUw==</value>\n</property>\n<property>\n<name>fs.azure.account.key.storageaccname.blob.core.windows.net</name>\n<value>PUWxxxxxxxxxxxxxxxxxxHq5iQ==</value>\n</property>"; next }1' /home/admin123/scripts/core-site.xml > /etc/hadoop/conf/core-site.xml


SFTPrun.sh

sftp_array=(`find /home/admin123/SFTP/sftpfirsttime/ -type f -follow` )
for element in "${sftp_array[@]}"
do
  "${element}"
done

Contents of sfttpfirsttime: ftp_test_com.sh

#!/usr/bin/expect

spawn sftp demo@test.rebex.net
expect "yes/no)?"
send "yes\n"
expect "password:"
send "password\n"
expect "sftp>"
send "exit\n"
interact


Partition Table after restart of Hdinsight CLuster

CREATE EXTERNAL TABLE amerendata_partitioned (Bill_account string,Reading_type string, Reading_datetime Timestamp,Reading_value string ,`Interval` string)
PARTITIONED BY (Reading_month String)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS ORC Location '/amerenORCpartitioned/';
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='1');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='2');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='3');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='4');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='5');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='6');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='7');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='8');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='9');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='10');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='11');
ALTER TABLE amerendata_partitioned ADD PARTITION(Reading_month='12');