mirror of
https://github.com/thewesker/greptweet.git
synced 2025-12-22 13:01:06 -05:00
first commit
This commit is contained in:
124
fetch-tweets.sh
Executable file
124
fetch-tweets.sh
Executable file
@@ -0,0 +1,124 @@
|
||||
#!/bin/bash
|
||||
# Twitter backup script - hendry AT iki.fi - please mail me suggestions to make this suckless
|
||||
# http://dev.twitter.com/doc/get/statuses/user_timeline
|
||||
# Known issues:
|
||||
# API only allows 3200 tweets to be downloaded this way :((
|
||||
# Won't work on protected accounts (duh!)
|
||||
# No @mentions or DMs from other accounts
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
umask 002
|
||||
api="http://api.twitter.com/1/statuses/user_timeline.xml?"
|
||||
|
||||
if ! test "$1"
|
||||
then
|
||||
echo -e "Please specify twitter username\n e.g. $0 kaihendry"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
twitter_total=$(curl -s "http://api.twitter.com/1/users/lookup.xml?screen_name=$1" | xmlstarlet sel -t -m "//users/user/statuses_count" -v .)
|
||||
|
||||
page=1
|
||||
saved=0
|
||||
stalled=0
|
||||
|
||||
if test -f $1.txt
|
||||
then
|
||||
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
||||
since='&since_id='$(head -n1 $1.txt | awk -F"|" '{ print $1 }')
|
||||
test "$2" && since='&max_id='$(tail -n1 $1.txt | awk -F"|" '{ print $1 }') # use max_id to get older tweets
|
||||
fi
|
||||
|
||||
while test "$twitter_total" -gt "$saved"
|
||||
do
|
||||
rm -f $temp
|
||||
echo $1 tweet total "$twitter_total" is greater than the already saved "$saved"
|
||||
echo Trying to get $(($twitter_total - $saved))
|
||||
temp=$(mktemp)
|
||||
echo curl -s "${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1"
|
||||
curl -si "${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1" > $temp
|
||||
|
||||
temp2=$(mktemp)
|
||||
{
|
||||
{ while read -r
|
||||
do
|
||||
if test "$REPLY" = $'\r'
|
||||
then
|
||||
break
|
||||
else
|
||||
echo "$REPLY" >&2 # print header to stderr
|
||||
fi
|
||||
done
|
||||
cat; } < $temp > $temp2
|
||||
} 2>&1 | # redirect back to stdout for grep
|
||||
grep -iE 'rate|status' # show the interesting twitter rate limits
|
||||
# date --date='@1320361995'
|
||||
|
||||
mv $temp2 $temp
|
||||
|
||||
#head $temp # debug
|
||||
|
||||
if test $(xmlstarlet sel -t -v "count(//statuses/status)" $temp) -eq 0
|
||||
then
|
||||
|
||||
if test $stalled -gt 5 # stall limit
|
||||
then
|
||||
echo Stalled $stalled times, come back later !
|
||||
rm -f $temp
|
||||
exit
|
||||
fi
|
||||
stalled=$(($stalled + 1 ))
|
||||
echo Stalling for the $stalled time
|
||||
sleep $(( RANDOM % 5 + 1 ))
|
||||
continue
|
||||
|
||||
else
|
||||
|
||||
temp2=$(mktemp)
|
||||
xmlstarlet sel -t -m "//statuses/status" -v "id" -o "|" -v "created_at" -o "|" -v "normalize-space(text)" -n $temp |
|
||||
perl -MHTML::Entities -pe 'decode_entities($_)' > $temp2
|
||||
sed -i '/^$/d' $temp2
|
||||
if test -z $temp2
|
||||
then
|
||||
echo $temp2 is empty
|
||||
continue
|
||||
fi
|
||||
cat $temp2
|
||||
if test -f $1.txt
|
||||
then
|
||||
mv $1.txt $temp
|
||||
before=$(wc -l $temp | awk '{print $1}')
|
||||
else
|
||||
before=0
|
||||
> $temp
|
||||
fi
|
||||
if test -s $temp2
|
||||
then
|
||||
cat $temp $temp2 | sort -r -n | uniq > $1.txt
|
||||
after=$(wc -l $1.txt | awk '{print $1}')
|
||||
echo Before: $before After: $after
|
||||
if test "$before" -eq "$after"
|
||||
then
|
||||
echo "Unable to retrieve anything new, a since_id $since problem or 3200 limit?"
|
||||
echo Approximately $(( $twitter_total - $after)) missing tweets
|
||||
rm -f $temp $temp2
|
||||
exit
|
||||
fi
|
||||
else
|
||||
echo Empty $temp2
|
||||
echo Twitter is returning empty responses, so we assume we have reached the limit!
|
||||
mv $temp $1.txt
|
||||
rm -f $temp2
|
||||
exit
|
||||
fi
|
||||
rm $temp2
|
||||
|
||||
fi
|
||||
|
||||
page=$(($page + 1))
|
||||
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
||||
|
||||
done
|
||||
|
||||
echo $1 saved $saved tweets of "$twitter_total": You are uptodate!
|
||||
Reference in New Issue
Block a user