--- /dev/null
+authdata.py
+/__pycache__
--- /dev/null
+# Archiving Twitter
+
+These are a set of scripts allowing you to keep a copy of your Twitter life:
+
+1. Dumping followers, optionally sending changes to you by e-mail at regular
+ intervals (using cron).
+
+2. Archiving all your tweets as they come in, both their raw data, as well as
+ an HTML dump the way Twitter would dump it at the time.
+
+All of this is very hackish, but if it's of any use to you, then great!
+
+## Patches
+
+Patches welcome, please send them to madduck@madduck.net using
+git-format-patch and git-send-email.
+
+## Wishlist
+
+0. Setup instructions
+1. Dumping favourites/starred tweets
+2. Dumping bookmarks
+3. Exporting parameters (such as email address) to configuration
+
+## Copyright
+
+Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+and released under the terms of the Artistic Licence 2.0.
+
+If you need it under another licence, let me know.
--- /dev/null
+consumer_key = '…'
+consumer_secret = '…'
+access_token = '…'
+access_secret = '…'
--- /dev/null
+#!/bin/sh
+#
+# fetchtweets.sh
+#
+# Convenience wrapper for tweetfetch.py
+#
+# Using ./tweets/.sentinel, it iteratively obtains new tweets since the last
+# run, and can thus be run regularly from cron without arguments.
+#
+# Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+# Released under the Artistic Licence 2.0
+#
+
+MYDIR="${0%/*}"
+cd "$MYDIR"
+
+export LC_ALL=$(locale -a | grep utf8 | head -1)
+
+./tweetfetch.py $(cat tweets/.sentinel)
--- /dev/null
+#!/usr/bin/python3
+#
+# followerdump.py
+#
+# Dumps the list of your Twitter followers to stdout in the format
+#
+# username <tab> displayname <tab> UID
+#
+# Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+# Released under the Artistic Licence 2.0
+#
+
+from authdata import *
+
+import twython
+import itertools
+import time
+import sys
+
+twitter = twython.Twython(app_key=consumer_key,
+ app_secret=consumer_secret,
+ oauth_token=access_token,
+ oauth_token_secret=access_secret)
+
+def grouper(iterable, n, fillvalue=None):
+ "Collect data into fixed-length chunks or blocks"
+ args = [iter(iterable)] * n
+ return itertools.zip_longest(fillvalue=fillvalue, *args)
+
+i=0
+followers = twitter.cursor(twitter.get_followers_ids,
+ count=5000, stringify_ids=True)
+
+#print('Obtained followers…', file=sys.stderr)
+
+for chunk in grouper(followers, 100):
+ chunk = [c for c in chunk if c]
+ #print(' fetching user data for chunk of {0:d} users…'.format(len(chunk)),
+ # file=sys.stderr)
+ n = 0
+ for follower in twitter.lookup_user(user_id=','.join(chunk)):
+ #print(' [{0:02d}] @{1:s}'.format(n, follower['screen_name']),
+ # file=sys.stderr)
+ n += 1
+ print('\t'.join([follower[i] for i in ('screen_name','name','id_str')]))
--- /dev/null
+Subproject commit 69cf3e8a9cd84b5894c6c9528ba14b588e3daff9
--- /dev/null
+#!/bin/sh
+#
+# reportfollowers.sh
+#
+# Send a diff of the follower dumps stored under ./followers per mail,
+# designed to be run daily from cron.
+#
+# The optional argument overrides the timestamp to compare against, which
+# defaults to "yesterday"
+#
+# Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+# Released under the Artistic Licence 2.0
+#
+
+set -eu
+
+MYDIR="${0%/*}"
+cd "$MYDIR"/followers
+
+export LC_ALL=$(locale -a | grep utf8 | head -1)
+
+REVSPEC="${1:-master@{yesterday\}}"
+DATESTAMP="$(git show --pretty=format:'%ai (%ar)' --no-patch $REVSPEC)"
+COUNT=$(wc -l dump | cut -d' ' -f1)
+STATS="$(git diff --numstat $REVSPEC | \
+ sed -re 's,([[:digit:]]+)[[:space:]]+([[:digit:]]+).*,+\1/-\2,')"
+
+SENDMAIL="/usr/sbin/sendmail madduck@madduck.net"
+[ ! -t 0 ] || SENDMAIL=cat
+
+$SENDMAIL <<_eof
+From: Twitter follower report <madduck@madduck.net>
+Subject: $(wc -l dump | cut -d' ' -f1) followers ($STATS)
+
+Changes since $DATESTAMP:
+
+$(git diff $REVSPEC | grep '^[-+][^-+]' | sort -k1.1,1.1)
+
+=Total followers: $COUNT
+_eof
--- /dev/null
+#!/bin/sh
+#
+# track_followers.sh
+#
+# Convenience wrapper for followerdump.py
+#
+# Obtains the current list of followers and commits them to Git
+#
+# Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+# Released under the Artistic Licence 2.0
+#
+
+MYDIR="${0%/*}"
+cd "$MYDIR"
+
+export LC_ALL=$(locale -a | grep utf8 | head -1)
+
+./followerdump.py | sort > followers/dump
+cd followers
+[ -s dump ] || exit 0
+COUNT=$(wc -l dump | cut -d' ' -f1)
+MSG="Twitter follower set update, total=$COUNT"
+git add dump
+if git commit -m"$MSG" >&2; then
+ git show HEAD
+fi
--- /dev/null
+#!/usr/bin/python3
+#
+# tweetfetch.py
+#
+# Fetches tweets since a given timestamp and stores data in JSON files, as
+# well as an HTML dump for each in the subdirectory ./tweets.
+#
+# Usage: ./tweetfetch.py 1159606554041040896
+#
+# The timestamp of the last tweet fetched is stored in ./tweets/.sentinel
+#
+# Copyright © 2017–2019 by martin f. krafft <madduck@madduck.net>
+# Released under the Artistic Licence 2.0
+#
+
+from authdata import *
+
+from twython import Twython
+import json
+import sys
+
+twitter = Twython(app_key=consumer_key,
+ app_secret=consumer_secret,
+ oauth_token=access_token,
+ oauth_token_secret=access_secret,
+ oauth_version=1)
+
+config = {'include_rts': False,
+ 'count': 200,
+ 'trim_user': True,
+ 'exclude_replies': True,
+ }
+
+if len(sys.argv) > 1:
+ config['since_id'] = sys.argv[1]
+ print("Limiting results to tweets since ID {}".format(config['since_id']),
+ file=sys.stderr)
+
+user_timeline = twitter.get_user_timeline(screen_name="martinkrafft",
+ **config)
+
+max_id = int(config.get('since_id', 0))
+
+print("Fetched {} tweets, writing them to disk…".format(len(user_timeline)),
+ file=sys.stderr)
+
+for tweet in user_timeline:
+ with open("tweets/{}.json".format(tweet['id_str']), "wt") as tf:
+ print(json.dumps(tweet), file=tf)
+
+ with open("tweets/{}.html".format(tweet['id_str']), "wt") as tf:
+ print(Twython.html_for_tweet(tweet, use_expanded_url=True), file=tf)
+
+ print(" wrote tweet ID {}".format(tweet['id_str']),
+ file=sys.stderr)
+
+ max_id = max(tweet['id'], max_id)
+
+print("Writing ID {} to sentinel file…".format(max_id), file=sys.stderr)
+with open("tweets/.sentinel", "wt") as tf:
+ print('{0:d}'.format(max_id), file=tf)
--- /dev/null
+/*
+!/.gitignore