Commit febb11d2 authored by Juan José Miranda's avatar Juan José Miranda

Subimos archivo que obtiene los datos de Twitter.

parent dba221b3
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from pymongo import MongoClient
import time
import json
import pymongo
#consumer key, consumer secret, access token, access secret.
ckey="IxRS5NCXXuN0n0IYleo6FVEHt"
csecret="mXBmmQLVcgVRAdLMqGRv3al6lSx8rIVxRGTNdX4eb1qxWtMdRY"
atoken="1147897159150387200-8VkSobYNgoRVPPiAvA2nMFAMgDwvHU"
asecret="aEueClRbyCIFahSvk2vCi9i4GiR7GOTYDR9Ym1H5y7vIn"
client = MongoClient("localhost", 27017)
db = client['tweets']
tweets = db.tweets
class listener(StreamListener):
def insertar_retweet(all_data):
t = json.loads(all_data)
result = tweets.insert_one(t)
print("First article key is: {}".format(result.inserted_id))
return
############################### USER #######################################
user_id = all_data["user"]["id"]
user_id_str = all_data["user"]["id_str"]
user_name = all_data["user"]["name"]
user_screen_name = all_data["user"]["screen_name"]
user_location = all_data["user"]["location"]
user_url = all_data["user"]["url"]
user_description = all_data["user"]["description"]
user_protected = all_data["user"]["protected"]
user_verified = all_data["user"]["verified"]
user_followers_count = all_data["user"]["followers_count"]
user_friends_count = all_data["user"]["friends_count"]
user_listed_count = all_data["user"]["listed_count"]
user_favourites_count = all_data["user"]["favourites_count"]
user_statuses_count = all_data["user"]["statuses_count"]
user_created_at = all_data["user"]["created_at"]
user_profile_image_url_https = all_data["user"]["profile_image_url_https"]
user_profile_banner_url = all_data["user"]["profile_banner_url"]
user_default_profile = all_data["user"]["default_profile"]
user_default_profile_image = all_data["user"]["default_profile_image"]
##################################################################################
########################### PLACE ###########################################
place_id = all_data["place"]["id"]
place_url = all_data["place"]["url"]
place_type = all_data["place"]["place_type"]
place_name = all_data["place"]["name"]
place_full_name = all_data["place"]["full_name"]
place_country_code = all_data["place"]["country_code"]
place_country = all_data["place"]["country"]
try:
for i in range(len(all_data["place"]["bounding_box"]["coordinates"])):
place_bounding_box_long = all_data["place"]["bounding_box"]["coordinates"][i][0]
place_bounding_box_lat = all_data["place"]["bounding_box"]["coordinates"][i][1]
except BaseException as e:
place_bounding_box_long = None
place_bounding_box_lat = None
if (all_data["place"]["bounding_box"] is not None):
place_bounding_box_type = all_data["place"]["bounding_box"]["type"]
else:
place_bounding_box_type = None
############################################################################################
################################## URL ###############################################
url_display_url = all_data["url"]["display_url"]
url_extended_url = all_data["url"]["extended_url"]
url_indices = all_data["url"]["indices"]
url__url = all_data["url"]["url"]
#######################################################################################
##################################### MEDIA ###########################################
media_id = all_data["media"]["id"]
media_id_str = all_data["media"]["id_str"]
media_display_url = all_data["media"]["display_url"]
media_expanded_url = all_data["media"]["expanded_url"]
media_indices = all_data["media"]["indices"]
media_media_url = all_data["media"]["media_url"]
media_media_url_https = all_data["media"]["media_url_https"]
media_type = all_data["media"]["type"]
media_url = all_data["media"]["url"]
if (all_data["media"]["sizes"] is not None):
media_thumb_h = all_data["media"]["sizes"]["thumb"]["h"]
media_thumb_w = all_data["media"]["sizes"]["thumb"]["w"]
media_thumb_resize = all_data["media"]["sizes"]["thumb"]["resize"]
media_large_h = all_data["media"]["sizes"]["large"]["h"]
media_large_w = all_data["media"]["sizes"]["large"]["w"]
media_large_resize = all_data["media"]["sizes"]["large"]["resize"]
media_medium_h = all_data["media"]["sizes"]["medium"]["h"]
media_medium_w = all_data["media"]["sizes"]["medium"]["w"]
media_medium_resize = all_data["media"]["sizes"]["medium"]["resize"]
media_small_h = all_data["media"]["sizes"]["small"]["h"]
media_small_w = all_data["media"]["sizes"]["small"]["w"]
media_small_resize = all_data["media"]["sizes"]["small"]["resize"]
else:
media_thumb_h = None
media_thumb_w = None
media_thumb_resize = None
media_large_h = None
media_large_w = None
media_large_resize = None
media_medium_h = None
media_medium_w = None
media_medium_resize = None
media_small_h = None
media_small_w = None
media_small_resize = None
########################################################################################
################################### USER_MENTIONS ########################################
user_mentions_id = all_data["user_mentions"]["id"]
user_mentions_id_str = all_data["user_mentions"]["id_str"]
user_mentions_indices = all_data["user_mentions"]["indices"]
user_mentions_name = all_data["user_mentions"]["name"]
user_mentions_screen_name = all_data["user_mentions"]["screen_name"]
###########################################################################################
################################# SYMBOL #######################################
symbols_indices = all_data["symbols"]["indices"]
symbols_text = all_data["symbols"]["text"]
####################################################################################
tweet_id = all_data["id"]
tweet_created_at = all_data["created_at"]
tweet_id_str = all_data["id_str"]
tweet_source = all_data["source"]
if (all_data["truncated"]):
tweet_text = all_data["extended_tweet"]["full_text"]
else:
tweet_text = all_data["text"]
tweet_in_reply_to_status_id = all_data["in_reply_to_status_id"]
tweet_in_reply_to_status_id_str = all_data["in_reply_to_status_id_str"]
tweet_in_reply_to_user_id = all_data["in_reply_to_user_id"]
tweet_in_reply_to_user_id_str = all_data["in_reply_to_user_id_str"]
tweet_in_reply_to_screen_name = all_data["in_reply_to_screen_name"]
tweet_user_id = all_data["user"]["id"]
if (all_data["coordinates"] is not None):
tweet_coordinates = all_data["coordinates"]["coordinates"]
tweet_coord_type = all_data["coordinates"]["type"]
else:
tweet_coordinates = all_data["coordinates"]
tweet_coord_type = all_data["coordinates"]
if (all_data["place"] is not None):
tweet_place_id = all_data["place"]["id"]
else:
tweet_place_id = all_data["place"]
tweet_is_quote_status = all_data["is_quote_status"]
if ('quoted_status_id' in data):
tweet_quoted_status_id = all_data["quoted_status_id"]
tweet_quoted_status_id_str = all_data["quoted_status_id_str"]
else:
tweet_quoted_status_id = None
tweet_quoted_status_id_str = None
if ('retweeted_status' in data):
tweet_retweet_id = all_data["retweeted_status"]["id"]
else:
tweet_retweet_id = None
tweet_quote_count = all_data["quote_count"]
tweet_reply_count = all_data["reply_count"]
tweet_retweet_count = all_data["retweet_count"]
tweet_favorite_count = all_data["favorite_count"]
tweet_hashtags = []
if (len(all_data["entities"]["hashtags"])>0):
for i in range(len(all_data["entities"]["hashtags"])):
tweet_hashtags.append(time.time()+float(i))
tweet_media = []
try:
for i in range(len(all_data["extended_entities"]["media"])):
tweet_media.append(time.time()+float(i))
except BaseException as e:
pass
try:
if (all_data["entities"]["media"] is not None):
tweet_media.append(time.time())
except BaseException as e:
pass
tweet_urls = []
if (len(all_data["entities"]["urls"])>0):
for i in range(len(all_data["entities"]["urls"])):
tweet_urls.append(time.time()+float(i))
tweet_user_mentions = []
if (len(all_data["entities"]["user_mentions"])>0):
for i in range(len(all_data["entities"]["user_mentions"])):
tweet_user_mentions.append(time.time()+float(i))
tweet_symbols = []
if (len(all_data["entities"]["symbols"])>0):
for i in range(len(all_data["entities"]["symbols"])):
tweet_symbols.append(time.time()+float(i))
tweet_favorited = all_data["favorited"]
tweet_retweeted = all_data["retweeted"]
tweet_filter_level = all_data["filter_level"]
tweet_lang = all_data["lang"]
tweet_timestamp_ms = all_data["timestamp_ms"]
try:
tweet_quoted_status_url = all_data["quoted_status_permalink"]["url"]
tweet_quoted_status_expanded = all_data["quoted_status_permalink"]["expanded"]
tweet_quoted_status_display = all_data["quoted_status_permalink"]["display"]
except BaseException as e:
tweet_quoted_status_url = None
tweet_quoted_status_expanded = None
tweet_quoted_status_display = None
def on_data(self, data):
try:
t = json.loads(data)
result = tweets.insert_one(t)
print("First article key is: {}".format(result.inserted_id))
return
if ('created_at' in data):
return
all_data = json.loads(data)
tweet_created_at = all_data["created_at"]
tweet_id = all_data["id"]
tweet_id_str = all_data["id_str"]
tweet_source = all_data["source"]
if (all_data["truncated"]):
tweet_text = all_data["extended_tweet"]["full_text"]
else:
tweet_text = all_data["text"]
tweet_in_reply_to_status_id = all_data["in_reply_to_status_id"]
tweet_in_reply_to_status_id_str = all_data["in_reply_to_status_id_str"]
tweet_in_reply_to_user_id = all_data["in_reply_to_user_id"]
tweet_in_reply_to_user_id_str = all_data["in_reply_to_user_id_str"]
tweet_in_reply_to_screen_name = all_data["in_reply_to_screen_name"]
tweet_user_id = all_data["user"]["id"]
if (all_data["coordinates"] is not None):
tweet_coordinates = all_data["coordinates"]["coordinates"]
tweet_coord_type = all_data["coordinates"]["type"]
else:
tweet_coordinates = all_data["coordinates"]
tweet_coord_type = all_data["coordinates"]
if (all_data["place"] is not None):
tweet_place_id = all_data["place"]["id"]
else:
tweet_place_id = all_data["place"]
tweet_is_quote_status = all_data["is_quote_status"]
if ('quoted_status_id' in data):
tweet_quoted_status_id = all_data["quoted_status_id"]
tweet_quoted_status_id_str = all_data["quoted_status_id_str"]
else:
tweet_quoted_status_id = None
tweet_quoted_status_id_str = None
if ('retweeted_status' in data):
tweet_retweet_id = all_data["retweeted_status"]["id"]
insertar_retweet(all_data["retweeted_status"])
else:
tweet_retweet_id = None
tweet_quote_count = all_data["quote_count"]
tweet_reply_count = all_data["reply_count"]
tweet_retweet_count = all_data["retweet_count"]
tweet_favorite_count = all_data["favorite_count"]
tweet_hashtags = []
if (len(all_data["entities"]["hashtags"])>0):
for i in range(len(all_data["entities"]["hashtags"])):
tweet_hashtags.append(time.time()+float(i))
tweet_media = []
try:
for i in range(len(all_data["extended_entities"]["media"])):
tweet_media.append(time.time()+float(i))
except BaseException as e:
pass
try:
if (all_data["entities"]["media"] is not None):
tweet_media.append(time.time())
except BaseException as e:
pass
tweet_urls = []
if (len(all_data["entities"]["urls"])>0):
for i in range(len(all_data["entities"]["urls"])):
tweet_urls.append(time.time()+float(i))
tweet_user_mentions = []
if (len(all_data["entities"]["user_mentions"])>0):
for i in range(len(all_data["entities"]["user_mentions"])):
tweet_user_mentions.append(time.time()+float(i))
tweet_symbols = []
if (len(all_data["entities"]["symbols"])>0):
for i in range(len(all_data["entities"]["symbols"])):
tweet_symbols.append(time.time()+float(i))
tweet_favorited = all_data["favorited"]
tweet_retweeted = all_data["retweeted"]
tweet_filter_level = all_data["filter_level"]
tweet_lang = all_data["lang"]
tweet_timestamp_ms = all_data["timestamp_ms"]
try:
tweet_quoted_status_url = all_data["quoted_status_permalink"]["url"]
tweet_quoted_status_expanded = all_data["quoted_status_permalink"]["expanded"]
tweet_quoted_status_display = all_data["quoted_status_permalink"]["display"]
except BaseException as e:
tweet_quoted_status_url = None
tweet_quoted_status_expanded = None
tweet_quoted_status_display = None
cursor.execute("INSERT INTO tweet (created_at, id, id_str, text, source, in_reply_to_status_id, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_user_id_str, \
in_reply_to_screen_name, user_id, coordinates, coord_type, place_id, is_quote_status, quoted_status_id, quoted_status_id_str, retweet_id, \
quote_count, reply_count, retweet_count, favorite_count, hashtags, media, urls, user_mentions, symbols, favorited, retweeted, \
filter_level, lang, timestamp_ms, quoted_status_url, quoted_status_expanded, quoted_status_display) VALUES (%s, %s, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,\
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", (tweet_created_at, tweet_id, tweet_id_str, tweet_text, tweet_source, tweet_in_reply_to_status_id, tweet_in_reply_to_status_id_str, tweet_in_reply_to_user_id, tweet_in_reply_to_user_id_str, \
tweet_in_reply_to_screen_name, tweet_user_id, tweet_coordinates, tweet_coord_type, tweet_place_id, tweet_is_quote_status, tweet_quoted_status_id, tweet_quoted_status_id_str, tweet_retweet_id, \
tweet_quote_count, tweet_reply_count, tweet_retweet_count, tweet_favorite_count, tweet_hashtags, tweet_media, tweet_urls, tweet_user_mentions, tweet_symbols, tweet_favorited, tweet_retweeted, \
tweet_filter_level, tweet_lang, tweet_timestamp_ms, tweet_quoted_status_url, tweet_quoted_status_expanded, tweet_quoted_status_display))
connection.commit()
return(True)
except BaseException as e:
print ('problema en data ', str(e))
time.sleep(5)
def on_error(self, status):
print (status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.sample()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment