Content of Expect Script
#!/usr/bin/expect
cd /home/admin123/SFTP/sftpdata
spawn sftp root@ftp.xxxxx.com
expect "password:"
send "ihCtwxxxxxxxxxxxxq\n"
expect "sftp>"
send "get /root/FRED-GGGDTPBRA188N.csv\n"
expect "sftp>"
send "exit\n"
expect ":~$"
spawn hadoop fs -mkdir -p /ftpdata/testdirdump
expect ":~$"
spawn hadoop fs -copyFromLocal -f /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv /ftpdata/testdirdump
expect ":~$"
spawn python /home/admin123/scripts/GenerateHiveScript.py /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv sftptestdata sftpFRED /ftpdata/testdirdump /home/admin123/SFTP/sftphivequery/hivequery
expect ":~$"
spawn sudo chmod -R 777 /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect ":~$"
spawn hive -f /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect eof
catch wait result
Contents of GenerateHiveScript.py
'''
Created on 09-May-2016
@author: Lokesh.Nanda
'''
#import string
import re
#import time
import sys
import csv
source_path = sys.argv[1]
#F:\StockStreamingData\NSEStockData.20160428-131159.csv
database_name = sys.argv[2]
database_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', database_name)
#ftpdata
table_name = sys.argv[3]
table_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', table_name)
#testtable
hdfs_file_loc = sys.argv[4]
#/datapull/stocks
hive_query_path = sys.argv[5]
#F:\StockStreamingData\hivequery
with open(source_path, 'r') as f:
#with open('/home/admin123/NSEStockData.20160505-060003.csv', 'r') as f:
#with open('C:\Users\lokesh.nanda\Desktop\MOCK_DATA.csv', 'r') as f:
first_line = f.readline()
flag = False
symbol = "~`!@#$%^&*()-+={}[]:>;'</?*-+"
for i in first_line:
if i in symbol:
flag = True
break
#a = ","
#count_comma = first_line.count(a)
#first_line = csv.reader(first_line)
#count_comma = first_line.count
for item in csv.reader([first_line], skipinitialspace=True):
count_comma = len(item)
finaldump = "create database if not exists " + database_name_mod + "; use " + database_name_mod + "; create external table " + table_name_mod +"("
if(flag):
i = 0
while(i <= count_comma):
finaldump = finaldump + "col"+ str(i) + " String,"
i = i+1
else:
columns = first_line.split(',')
#columns = csv.reader(first_line)
for col in columns:
finaldump = finaldump + "`" + col + "` String,"
finaldump = finaldump[:-1] + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '" +hdfs_file_loc+ "';"
with open(hive_query_path + table_name + '.hql', 'w') as the_file:
the_file.write(finaldump)
#!/usr/bin/expect
cd /home/admin123/SFTP/sftpdata
spawn sftp root@ftp.xxxxx.com
expect "password:"
send "ihCtwxxxxxxxxxxxxq\n"
expect "sftp>"
send "get /root/FRED-GGGDTPBRA188N.csv\n"
expect "sftp>"
send "exit\n"
expect ":~$"
spawn hadoop fs -mkdir -p /ftpdata/testdirdump
expect ":~$"
spawn hadoop fs -copyFromLocal -f /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv /ftpdata/testdirdump
expect ":~$"
spawn python /home/admin123/scripts/GenerateHiveScript.py /home/admin123/SFTP/sftpdata/FRED-GGGDTPBRA188N.csv sftptestdata sftpFRED /ftpdata/testdirdump /home/admin123/SFTP/sftphivequery/hivequery
expect ":~$"
spawn sudo chmod -R 777 /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect ":~$"
spawn hive -f /home/admin123/SFTP/sftphivequery/hivequerysftpFRED.hql
expect eof
catch wait result
Contents of GenerateHiveScript.py
'''
Created on 09-May-2016
@author: Lokesh.Nanda
'''
#import string
import re
#import time
import sys
import csv
source_path = sys.argv[1]
#F:\StockStreamingData\NSEStockData.20160428-131159.csv
database_name = sys.argv[2]
database_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', database_name)
#ftpdata
table_name = sys.argv[3]
table_name_mod = re.sub('[^a-zA-Z0-9 \n\.]', '_', table_name)
#testtable
hdfs_file_loc = sys.argv[4]
#/datapull/stocks
hive_query_path = sys.argv[5]
#F:\StockStreamingData\hivequery
with open(source_path, 'r') as f:
#with open('/home/admin123/NSEStockData.20160505-060003.csv', 'r') as f:
#with open('C:\Users\lokesh.nanda\Desktop\MOCK_DATA.csv', 'r') as f:
first_line = f.readline()
flag = False
symbol = "~`!@#$%^&*()-+={}[]:>;'</?*-+"
for i in first_line:
if i in symbol:
flag = True
break
#a = ","
#count_comma = first_line.count(a)
#first_line = csv.reader(first_line)
#count_comma = first_line.count
for item in csv.reader([first_line], skipinitialspace=True):
count_comma = len(item)
finaldump = "create database if not exists " + database_name_mod + "; use " + database_name_mod + "; create external table " + table_name_mod +"("
if(flag):
i = 0
while(i <= count_comma):
finaldump = finaldump + "col"+ str(i) + " String,"
i = i+1
else:
columns = first_line.split(',')
#columns = csv.reader(first_line)
for col in columns:
finaldump = finaldump + "`" + col + "` String,"
finaldump = finaldump[:-1] + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '" +hdfs_file_loc+ "';"
with open(hive_query_path + table_name + '.hql', 'w') as the_file:
the_file.write(finaldump)
No comments:
Post a Comment