You must have cloned and installed Twitter Bot Monitor on your computer. Please go to Github repo for installation instruction. This repo is private so please email me your github handle so I can grant you access.
Zhouhan Chen zc1245@nyu.edu
import json
import util
import streamer
import datetime
import numpy as np
import pandas as pd
import detect
from twitter_credential import token_dict
from collections import defaultdict
prefix = 'trump_test2'
keyword = ['trump']
num_tweets = 20000
duration = 3600
auth_key = 'streaming_1'
src_path = util.get_full_src_path(prefix)
print("The absolute path of raw data file is")
print(src_path)
print()
full_prefix = util.get_full_prefix(prefix)
print("The prefix for all subsequent files is")
print(full_prefix)
print()
tweetStreamer = streamer.Streamer(auth_key=token_dict[auth_key])
tweetStreamer.collect(keyword=keyword, filename=src_path,
num_tweets=num_tweets, duration=duration, whitelist = [],
save_file = True, print_info = "info")
detector = detect.SpamDetector(prefix=full_prefix, url_based = False,
sourcefile=src_path)
# generate user info dictionary
detector.save_user_info()
# EDA: plot the distribution of followers count
import matplotlib.pyplot as plt
plt.style.use('classic')
%matplotlib inline
import seaborn as sns
sns.set()
followers_count = []
for tweet in util.loadjson(src_path):
followers_count.append(tweet['user']['followers_count'])
print("followers count mean is ", np.mean(followers_count))
print("followers count std is ", np.std(followers_count))
followers_count = [num for num in followers_count if num < 10000]
plt.hist(followers_count, alpha=0.5, bins=20)
plt.xlabel('Followers count')
plt.ylabel('Number of accounts')
plt.title('Histogram of followers count')
from utility.wordcloud_maker import generate_cloud
from IPython.display import Image
from IPython.core.display import HTML
text = []
for tweet in util.loadjson(src_path):
text.append(tweet['text'])
generate_cloud(' '.join([t for t in text])) #, full_prefix + 'wordcloud')
# Image(filename = full_prefix + 'wordcloud.png')