#!/usr/bin/python3 # # tweetfetch.py # # Fetches tweets since a given timestamp and stores data in JSON files, as # well as an HTML dump for each in the subdirectory ./tweets. # # Usage: ./tweetfetch.py 1159606554041040896 # # The timestamp of the last tweet fetched is stored in ./tweets/.sentinel # # Copyright © 2017–2019 by martin f. krafft # Released under the Artistic Licence 2.0 # from authdata import * from twython import Twython import json import sys twitter = Twython(app_key=consumer_key, app_secret=consumer_secret, oauth_token=access_token, oauth_token_secret=access_secret, oauth_version=1) config = {'include_rts': False, 'count': 200, 'trim_user': True, 'exclude_replies': True, } if len(sys.argv) > 1: config['since_id'] = sys.argv[1] print("Limiting results to tweets since ID {}".format(config['since_id']), file=sys.stderr) user_timeline = twitter.get_user_timeline(screen_name="martinkrafft", **config) max_id = int(config.get('since_id', 0)) print("Fetched {} tweets, writing them to disk…".format(len(user_timeline)), file=sys.stderr) for tweet in user_timeline: with open("tweets/{}.json".format(tweet['id_str']), "wt") as tf: print(json.dumps(tweet), file=tf) with open("tweets/{}.html".format(tweet['id_str']), "wt") as tf: print(Twython.html_for_tweet(tweet, use_expanded_url=True), file=tf) print(" wrote tweet ID {}".format(tweet['id_str']), file=sys.stderr) max_id = max(tweet['id'], max_id) print("Writing ID {} to sentinel file…".format(max_id), file=sys.stderr) with open("tweets/.sentinel", "wt") as tf: print('{0:d}'.format(max_id), file=tf)