Simple and effective Workarounds !: 2016

Tuesday 20 December 2016

bash Script to get Hue link in amazon EMR AWS on cluster startup

#!/bin/bash

set -xe
# =========================================================== #
# Created By: Lokesh Nanda
# Date: 21/12/2016
# Sample call: /home/hadoop/HueScripts/hueurl.sh
# Getting the DNS name of EMR on cluster startup and dumping it to /home/hadoop/HueScripts/hueurl.properties
# BDP UI to read /home/hadoop/HueScripts/hueurl.properties file to get the latest link
# =========================================================== #

InstanceIDDev=$(aws emr list-clusters | grep -m 1 \"Id\" | awk '{print $2}')
InstanceIDEMR=$(echo "$InstanceIDDev" | sed 's/\"//g'| sed 's/\,//g')
DescribeEMR=$(aws emr describe-cluster --cluster-id "${InstanceIDEMR}" | python -c "import sys, json; print json.load(sys.stdin)['Cluster']['MasterPublicDnsName']")
echo "${DescribeEMR}"> /home/hadoop/HueScripts/hueurl.properties
sudo sed -i s/ec2/huelink=ec2/ /home/hadoop/HueScripts/hueurl.properties
sudo sed -i s/compute.amazonaws.com/compute.amazonaws.com:8888/ /home/hadoop/HueScripts/hueurl.properties

Tuesday 25 October 2016

Logging in Bash Script

Logger program to be used in scripts. (Create on logger.sh and use it in all the sh files for logging)

#!/bin/bash

# =========================================================== #
# Created By: Lokesh Nanda
# Date: 24/10/2016
# Accepts a logfile name as parameter.
# Sample call: /home/admin123/Logging/logger.sh ${logfilename}
# used in shell script for logging.
# Supports 8 functions
# =========================================================== #

SCRIPT_FILE=$1
SCRIPT_LOG=/home/admin123/Logging/${SCRIPT_FILE}.log
touch "$SCRIPT_LOG"

function SUCCESS(){
local msg="$1"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_SUCCESS] $msg" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_SUCCESS] $msg"
}

function PROPERTYFILEERROR(){
timeAndDate=$(date)
script_name=$(basename "$0")
local msg="$1"
script_name="${script_name%.*}"
echo "[$timeAndDate] [BDP_DEBUG] > $script_name $msg" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] > $script_name $msg"
}

function SCRIPTENTRY(){
timeAndDate=$(date)
script_name=$(basename "$0")
script_name="${script_name%.*}"
echo "[$timeAndDate] [BDP_DEBUG] > $script_name $FUNCNAME" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] > $script_name $FUNCNAME"
}

function SCRIPTEXIT(){
script_name=$(basename "$0")
script_name="${script_name%.*}"
echo "[$timeAndDate] [BDP_DEBUG] < $script_name $FUNCNAME" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] < $script_name $FUNCNAME"
}

function ENTRY(){
local cfn="${FUNCNAME[1]}"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_DEBUG] > $cfn $FUNCNAME" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] > $cfn $FUNCNAME"
}

function EXIT(){
local cfn="${FUNCNAME[1]}"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_DEBUG] < $cfn $FUNCNAME" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] < $cfn $FUNCNAME"
}

function INFO(){
local msg="$1"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_INFO] $msg" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_INFO] $msg"
}

function DEBUG(){
local msg="$1"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_DEBUG] $msg" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_DEBUG] $msg"
}

function ERROR(){
local msg="$1"
timeAndDate=$(date)
echo "[$timeAndDate] [BDP_ERROR] $msg" >> "$SCRIPT_LOG"
echo "[$timeAndDate] [BDP_ERROR] $msg"
}

testlogger.sh to show how to call logger.sh and implement logging

#!/bin/bash

#set -e

# =========================================================== #

# Created By: Lokesh Nanda

# Date: 24/10/2016

# Calls logger.sh for logging framework

# Sample call: /home/admin123/Logging/testlogger.sh

# CHANGES IN SCRIPT:

# logfilename : file name of log (ex - testloginfo)

# Supports 8 functions

# =========================================================== #

logfilename=testloginfo

source /home/admin123/Logging/logger.sh ${logfilename}

SCRIPTENTRY

. /home/admin123/sqoop-jobs/connections/68.properties || PROPERTYFILEERROR "401: Failed to read property file"

jobName=mysqltest123

table=twitter

hiveDatabase=testdb

hiveTable=testtable26

stagingDir="${STORAGEURI}/stagingDir/${hiveDatabase}/${hiveTable}"

targetDir="${STORAGEURI}/${hiveDatabase}/${hiveTable}"

triggerSqoopJob(){

ENTRY

DEBUG "Triggering sqoop job import from Database $1 and Table $2"

sqoop import --driver com.mysql.jdbc.Driver --connect jdbc:mysql://${SERVERNAME}:${PORT}/${DATABASENAME} --username ${USERNAME} --password ${PASSWORD} --enclosed-by '\"' --table ${table} --num-mappers ${MAXMAPPER} --null-string \'\\\\N\' --null-non-string \'\\\\N\' --hive-drop-import-delims --map-column-java Token=String,AccountName=String,ConsumerKey=String,ConsumerSecret=String,HashTags=String,HiveTable=String,isActive=String,Secret=String --target-dir ${stagingDir} >> /home/admin123/Logging/${logfilename}.log 2>&1 && SUCCESS "sqoop import successful Database $1 and Table $2" || ERROR "404: Failed to run sqoop import for Database $1 and Table $2"

INFO "SqoopJob details updated for Database $1 and Table $2"

EXIT

}

triggerHiveJob(){

ENTRY

DEBUG "Triggering Hive Job"

hive -e "CREATE DATABASE IF NOT EXISTS ${hiveDatabase};

DROP TABLE IF EXISTS ${hiveDatabase}.stage_${hiveTable};

CREATE EXTERNAL table if not exists ${hiveDatabase}.stage_${hiveTable} (

\`ID\` INT,

\`Token\` VARCHAR(255),

\`AccountName\` VARCHAR(255),

\`ActiveEndDate\` TIMESTAMP,

\`ActiveStartDate\` TIMESTAMP,

\`ConsumerKey\` VARCHAR(255),

\`ConsumerSecret\` VARCHAR(255),

\`CreatedDate\` TIMESTAMP,

\`HashTags\` VARCHAR(255),

\`HiveTable\` VARCHAR(255),

\`isActive\` VARCHAR(255),

\`Secret\` VARCHAR(255),

\`UserId\` INT)

ROW FORMAT SERDE 'com.bizo.hive.serde.csv.CSVSerde'

Location '${stagingDir}';" >> /home/admin123/Logging/${logfilename}.log 2>&1 && SUCCESS "Hive job was successful for Database $1 and Table $2" || ERROR "402: Failed to run Hive job for Database $1 and Table $2"

INFO "HiveJob details updated for Database $1 and Table $2"

EXIT

}

triggerSqoopJob ${DATABASENAME} ${table}

triggerHiveJob ${hiveDatabase} stage_${hiveTable}

SCRIPTEXIT

SampleOutput:

Sample Log:

[Tue Oct 25 08:11:31 UTC 2016] [BDP_DEBUG] > testlogger SCRIPTENTRY

[Tue Oct 25 08:11:31 UTC 2016] [BDP_DEBUG] > triggerSqoopJob ENTRY

[Tue Oct 25 08:11:31 UTC 2016] [BDP_DEBUG] Triggering sqoop job import from Database oxigen and Table twitter

Warning: /usr/hdp/2.4.2.4-5/accumulo does not exist! Accumulo imports will fail.

Please set $ACCUMULO_HOME to the root of your Accumulo installation.

16/10/25 08:11:32 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6.2.4.2.4-5

16/10/25 08:11:33 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.

16/10/25 08:11:33 WARN sqoop.ConnFactory: Parameter --driver is set to an explicit driver however appropriate connection manager is not being set (via --connection-manager). Sqoop is going to fall back to org.apache.sqoop.manager.GenericJdbcManager. Please specify explicitly which connection manager should be used next time.

16/10/25 08:11:33 INFO manager.SqlManager: Using default fetchSize of 1000

16/10/25 08:11:33 INFO tool.CodeGenTool: Beginning code generation

SLF4J: Class path contains multiple SLF4J bindings.

SLF4J: Found binding in [jar:file:/usr/hdp/2.4.2.4-5/hadoop/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: Found binding in [jar:file:/usr/hdp/2.4.2.4-5/zookeeper/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.

SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]

16/10/25 08:11:34 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM twitter AS t WHERE 1=0

16/10/25 08:11:34 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /usr/hdp/2.4.2.4-5/hadoop-mapreduce

Note: /tmp/sqoop-admin123/compile/9ba3c1ba3ed2d158a63dab4a5f950ebd/twitter.java uses or overrides a deprecated API.

Note: Recompile with -Xlint:deprecation for details.

16/10/25 08:11:36 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-admin123/compile/9ba3c1ba3ed2d158a63dab4a5f950ebd/twitter.jar

16/10/25 08:11:37 INFO mapreduce.ImportJobBase: Beginning import of twitter

16/10/25 08:11:37 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM twitter AS t WHERE 1=0

16/10/25 08:11:39 INFO impl.TimelineClientImpl: Timeline service address: http://headnodehost:8188/ws/v1/timeline/

16/10/25 08:11:40 ERROR tool.ImportTool: Encountered IOException running import job: org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory wasb://oxigencentoscluster@sacentos.blob.core.windows.net/sqoop-import-data/stagingDir/testdb/testtable26 already exists

at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:146)

at org.apache.hadoop.mapreduce.JobSubmitter.checkSpecs(JobSubmitter.java:266)

at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:139)

at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)

at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:415)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)

at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)

at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308)

at org.apache.sqoop.mapreduce.ImportJobBase.doSubmitJob(ImportJobBase.java:196)

at org.apache.sqoop.mapreduce.ImportJobBase.runJob(ImportJobBase.java:169)

at org.apache.sqoop.mapreduce.ImportJobBase.runImport(ImportJobBase.java:266)

at org.apache.sqoop.manager.SqlManager.importTable(SqlManager.java:673)

at org.apache.sqoop.tool.ImportTool.importTable(ImportTool.java:497)

at org.apache.sqoop.tool.ImportTool.run(ImportTool.java:605)

at org.apache.sqoop.Sqoop.run(Sqoop.java:148)

at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)

at org.apache.sqoop.Sqoop.runSqoop(Sqoop.java:184)

at org.apache.sqoop.Sqoop.runTool(Sqoop.java:226)

at org.apache.sqoop.Sqoop.runTool(Sqoop.java:235)

at org.apache.sqoop.Sqoop.main(Sqoop.java:244)

[Tue Oct 25 08:11:40 UTC 2016] [BDP_ERROR] 404: Failed to run sqoop import for Database oxigen and Table twitter

[Tue Oct 25 08:11:40 UTC 2016] [BDP_INFO] SqoopJob details updated for Database oxigen and Table twitter

[Tue Oct 25 08:11:40 UTC 2016] [BDP_DEBUG] < triggerSqoopJob EXIT

[Tue Oct 25 08:11:40 UTC 2016] [BDP_DEBUG] > triggerHiveJob ENTRY

[Tue Oct 25 08:11:40 UTC 2016] [BDP_DEBUG] Triggering Hive Job

WARNING: Use "yarn jar" to launch YARN applications.

Logging initialized using configuration in file:/etc/hive/2.4.2.4-5/0/hive-log4j.properties

Exception in thread "main" java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.RetriableException): org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot create directory /tmp/hive/admin123/4bd585e6-03c5-4f26-8da0-4b54da07b738. Name node is in safe mode.

The reported blocks 0 needs additional 22 blocks to reach the threshold 0.9900 of total blocks 22.

The number of live datanodes 0 has reached the minimum number 0. Safe mode will be turned off automatically once the thresholds have been reached.

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1331)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3971)

at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1081)

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:630)

at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)

at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)

at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2206)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2202)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:415)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)

at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2200)

Caused by: org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot create directory /tmp/hive/admin123/4bd585e6-03c5-4f26-8da0-4b54da07b738. Name node is in safe mode.

The reported blocks 0 needs additional 22 blocks to reach the threshold 0.9900 of total blocks 22.

The number of live datanodes 0 has reached the minimum number 0. Safe mode will be turned off automatically once the thresholds have been reached.

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1327)

... 12 more

at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)

at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:680)

at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:624)

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:606)

at org.apache.hadoop.util.RunJar.run(RunJar.java:221)

at org.apache.hadoop.util.RunJar.main(RunJar.java:136)

Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.RetriableException): org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot create directory /tmp/hive/admin123/4bd585e6-03c5-4f26-8da0-4b54da07b738. Name node is in safe mode.