from datetime import datetime
from multiprocessing.pool import ThreadPool
requests.packages.urllib3.disable_warnings()
#comment_list_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentlist'
comment_list_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentlist_nov2017'
commentidsdeltapath = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentdeltalist_nov2017'
#commentidsdeltapath = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentdeltalist'
#raw_comment_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\inmoment_weekly_data\\comment_2017_2018'
raw_comment_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\inmoment_weekly_data\\comment_nov2017'
access_token = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
def read_comment_ids(comment_list_path):
comment_ids = open(comment_list_path).read().splitlines()
return comment_ids
def get_updated_comment_list(comment_ids_list,comment_ids_delta_list):
diff_list = list(set(comment_ids_list)-set(comment_ids_delta_list))
return diff_list
def update_comment_delta(commentidsdeltapath, commentId):
with open(commentidsdeltapath, "a") as file:
file.write(str(commentId))
file.write('\n')
def getDateTime():
return str(datetime.now()).replace(' ','')
def writeComment(text, commentId):
with open(raw_comment_path, "a") as file:
json.dump(text, file)
file.write('\n')
update_comment_delta(commentidsdeltapath, commentId)
def getComment(commentId):
headers = {'content-type': 'application/json'}
headers['Authorization'] = 'OAuth '+access_token
get_comment = "https://www.xxxxxxxxxxxxt.com/api/rest/1.0/comment/"+str(commentId)
try:
r = requests.get(get_comment, headers=headers, verify=False)
except Exception as err:
print(str(err))
time.sleep(30)
print('Sleeping for 30 sec..........')
r = requests.get(get_comment, headers=headers, verify=False)
if(r.status_code == 200):
val = json.loads(r.text)
'''
with open(raw_comment_path, "a") as file:
json.dump(val, file)
file.write('\n')
update_comment_delta(commentidsdeltapath, commentId)
'''
return val, commentId
try:
comment_ids_list = read_comment_ids(comment_list_path)
except Exception as e:
print('There is an error reading from ' + str(comment_list_path))
try:
comment_ids_delta_list = read_comment_ids(commentidsdeltapath)
except Exception as e:
print('There is an error reading from ' + str(commentidsdeltapath)+ ' This is first load')
comment_ids_delta_list = []
diff_commentid_list_raw = get_updated_comment_list(comment_ids_list,comment_ids_delta_list)
count_diff_id = len(diff_commentid_list_raw)
if count_diff_id == 0:
print("All comment id's done")
else:
#for commentId in diff_commentid_list_raw:
#getComment(access_token,commentId)
results = ThreadPool(20).imap_unordered(getComment, diff_commentid_list_raw)
for text, commentId in results:
writeComment(text, commentId)
try:
comment_ids_delta_list = read_comment_ids(commentidsdeltapath)
diff_commentid_list_raw = get_updated_comment_list(comment_ids_list,comment_ids_delta_list)
count_diff_id = len(diff_commentid_list_raw)
if count_diff_id == 0:
print('DOne !!!!!!')
else:
print('[main] - Count of diff id is not equal to 0, NEED TO RERUN')
except Exception as e:
print(str(e))
from multiprocessing.pool import ThreadPool
requests.packages.urllib3.disable_warnings()
#comment_list_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentlist'
comment_list_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentlist_nov2017'
commentidsdeltapath = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentdeltalist_nov2017'
#commentidsdeltapath = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\\inmoment_weekly_surveyids\\commentdeltalist'
#raw_comment_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\inmoment_weekly_data\\comment_2017_2018'
raw_comment_path = 'C:\\Users\\lnanda\\Desktop\\Lokesh\\inmoment\\inmoment_main\\inmoment_weekly_data\\comment_nov2017'
access_token = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
def read_comment_ids(comment_list_path):
comment_ids = open(comment_list_path).read().splitlines()
return comment_ids
def get_updated_comment_list(comment_ids_list,comment_ids_delta_list):
diff_list = list(set(comment_ids_list)-set(comment_ids_delta_list))
return diff_list
def update_comment_delta(commentidsdeltapath, commentId):
with open(commentidsdeltapath, "a") as file:
file.write(str(commentId))
file.write('\n')
def getDateTime():
return str(datetime.now()).replace(' ','')
def writeComment(text, commentId):
with open(raw_comment_path, "a") as file:
json.dump(text, file)
file.write('\n')
update_comment_delta(commentidsdeltapath, commentId)
def getComment(commentId):
headers = {'content-type': 'application/json'}
headers['Authorization'] = 'OAuth '+access_token
get_comment = "https://www.xxxxxxxxxxxxt.com/api/rest/1.0/comment/"+str(commentId)
try:
r = requests.get(get_comment, headers=headers, verify=False)
except Exception as err:
print(str(err))
time.sleep(30)
print('Sleeping for 30 sec..........')
r = requests.get(get_comment, headers=headers, verify=False)
if(r.status_code == 200):
val = json.loads(r.text)
'''
with open(raw_comment_path, "a") as file:
json.dump(val, file)
file.write('\n')
update_comment_delta(commentidsdeltapath, commentId)
'''
return val, commentId
try:
comment_ids_list = read_comment_ids(comment_list_path)
except Exception as e:
print('There is an error reading from ' + str(comment_list_path))
try:
comment_ids_delta_list = read_comment_ids(commentidsdeltapath)
except Exception as e:
print('There is an error reading from ' + str(commentidsdeltapath)+ ' This is first load')
comment_ids_delta_list = []
diff_commentid_list_raw = get_updated_comment_list(comment_ids_list,comment_ids_delta_list)
count_diff_id = len(diff_commentid_list_raw)
if count_diff_id == 0:
print("All comment id's done")
else:
#for commentId in diff_commentid_list_raw:
#getComment(access_token,commentId)
results = ThreadPool(20).imap_unordered(getComment, diff_commentid_list_raw)
for text, commentId in results:
writeComment(text, commentId)
try:
comment_ids_delta_list = read_comment_ids(commentidsdeltapath)
diff_commentid_list_raw = get_updated_comment_list(comment_ids_list,comment_ids_delta_list)
count_diff_id = len(diff_commentid_list_raw)
if count_diff_id == 0:
print('DOne !!!!!!')
else:
print('[main] - Count of diff id is not equal to 0, NEED TO RERUN')
except Exception as e:
print(str(e))