Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions config/essetup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@
'original_id': {'type': 'string'},
'original_name': {'type': 'string'}
}
},
'users': {
'id': {'type': 'long'},
'name': {'type': 'string'},
'screen_name': {'type': 'string'},
'followers_count': {'type': 'long'},
'friends_count': {'type': 'long'},
'location': {'type': 'string'},
'description': {'type': 'string'},
'favourites_count': {'type': 'long'},
'statuses_count': {'type': 'long'},
'listed_count': {'type': 'long'},
'profile_background_image_url': {'type': 'string'},
'profile_image_url': {'type': 'string'}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions crontab
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
0,15,30,45 * * * * python /discursive/index_twitter_stream.py /discursive/topics.txt
0 0 * * * python /discursive/index_user_profiles.py /discursive/users.txt
76 changes: 76 additions & 0 deletions index_user_profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import json
import tweepy
from config import esconn, aws_config, twitter_config
import os
from datetime import datetime as dt
from config import s3conn

# unicode mgmt
import sys
reload(sys)
sys.setdefaultencoding('utf8')

# Twitter auth and api call setup
auth = tweepy.OAuthHandler(twitter_config.CONSUMER_KEY, twitter_config.CONSUMER_SECRET)
auth.set_access_token(twitter_config.ACCESS_TOKEN, twitter_config.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

# Get elasticsearch connection
es = esconn.esconn()

if len(sys.argv) > 2:
sys.exit('ERROR: Received 2 or more arguments: {} {} {} Expected 1: User file name'.format(sys.argv[0], sys.argv[1], sys.argv[2]))

elif len(sys.argv) == 2:
try:
with open(sys.argv[1]) as f:
users = f.readlines()
except Exception:
sys.exit('ERROR: Expected user file %s not found' % sys.argv[1])
else:
try:
with open('users.txt') as f:
users = f.readlines()
except:
sys.exit('ERROR: Default users.txt not found. No alternate topic file was provided')


USERS = [user.replace('\n', '').strip() for user in users]

def retrieve_user_data():
try:
return api.lookup_users(user_ids=USERS)
except tweepy.TweepError as e:
sys.exit("An error occured looking up the user_ids. Verify the correctness and existance of the given screen names, handles or ids.")

def map_user_for_es(user, time_stamp):
return {
'timestamp': time_stamp,
'id': user.id,
'name': user.screen_name,
'screen_name': user.screen_name,
'followers_count': user.followers_count,
'friends_count': user.friends_count,
'location': user.location,
'description': user.description,
'favourites_count': user.favourites_count,
'statuses_count': user.statuses_count,
'listed_count': user.listed_count,
'profile_background_image_url': user.profile_background_image_url,
'profile_image_url': user.profile_image_url
}

def dump_to_elastic(bodydata):
es.index(index='twitter', doc_type="users", body=bodydata)

def get_time_stamp():
return dt.now()

def get_twitter_users_pipeline():
time_stamp = get_time_stamp()
user_data = retrieve_user_data()
for user in user_data:
mapped_user_data = map_user_for_es(user, time_stamp)
dump_to_elastic(mapped_user_data)

get_twitter_users_pipeline()
2 changes: 2 additions & 0 deletions users.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
25073877
20536157