mirror of
https://github.com/thewesker/greptweet.git
synced 2025-12-20 12:11:05 -05:00
refactored
This commit is contained in:
126
fetch-tweets.sh
126
fetch-tweets.sh
@@ -6,8 +6,6 @@
|
|||||||
# Won't work on protected accounts (duh!)
|
# Won't work on protected accounts (duh!)
|
||||||
# No @mentions or DMs from other accounts
|
# No @mentions or DMs from other accounts
|
||||||
|
|
||||||
set -e
|
|
||||||
set -o pipefail
|
|
||||||
umask 002
|
umask 002
|
||||||
api="http://api.twitter.com/1/statuses/user_timeline.xml?"
|
api="http://api.twitter.com/1/statuses/user_timeline.xml?"
|
||||||
|
|
||||||
@@ -30,16 +28,19 @@ then
|
|||||||
test "$2" && since='&max_id='$(tail -n1 $1.txt | awk -F"|" '{ print $1 }') # use max_id to get older tweets
|
test "$2" && since='&max_id='$(tail -n1 $1.txt | awk -F"|" '{ print $1 }') # use max_id to get older tweets
|
||||||
fi
|
fi
|
||||||
|
|
||||||
while test "$twitter_total" -gt "$saved"
|
while test "$twitter_total" -gt "$saved" # Start of the important loop
|
||||||
do
|
do
|
||||||
rm -f $temp
|
|
||||||
echo $1 tweet total "$twitter_total" is greater than the already saved "$saved"
|
|
||||||
echo Trying to get $(($twitter_total - $saved))
|
|
||||||
temp=$(mktemp)
|
|
||||||
echo curl -s "${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1"
|
|
||||||
curl -si "${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1" > $temp
|
|
||||||
|
|
||||||
|
echo $1 tweet total "$twitter_total" is greater than the already saved "$saved"
|
||||||
|
echo Trying to get $(($twitter_total - $saved))
|
||||||
|
|
||||||
|
temp=$(mktemp)
|
||||||
temp2=$(mktemp)
|
temp2=$(mktemp)
|
||||||
|
|
||||||
|
echo "curl -s \"${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1\""
|
||||||
|
curl -si "${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=true&trim_user=1" > $temp
|
||||||
|
echo $?
|
||||||
|
|
||||||
{
|
{
|
||||||
{ while read -r
|
{ while read -r
|
||||||
do
|
do
|
||||||
@@ -57,67 +58,62 @@ grep -iE 'rate|status' # show the interesting twitter rate limits
|
|||||||
|
|
||||||
mv $temp2 $temp
|
mv $temp2 $temp
|
||||||
|
|
||||||
#head $temp # debug
|
if test $(xmlstarlet sel -t -v "count(//statuses/status)" $temp) -eq 0
|
||||||
|
then
|
||||||
|
|
||||||
if test $(xmlstarlet sel -t -v "count(//statuses/status)" $temp) -eq 0
|
head $temp
|
||||||
|
if test "$2" && test "$since"
|
||||||
then
|
then
|
||||||
|
echo No old tweets ${since}
|
||||||
if test $stalled -gt 5 # stall limit
|
elif test "$since"
|
||||||
then
|
then
|
||||||
echo Stalled $stalled times, come back later !
|
echo No new tweets ${since}
|
||||||
rm -f $temp
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
stalled=$(($stalled + 1 ))
|
|
||||||
echo Stalling for the $stalled time
|
|
||||||
sleep $(( RANDOM % 5 + 1 ))
|
|
||||||
continue
|
|
||||||
|
|
||||||
else
|
else
|
||||||
|
echo "Twitter is returning empty responses on page ${page} :("
|
||||||
temp2=$(mktemp)
|
|
||||||
xmlstarlet sel -t -m "//statuses/status" -v "id" -o "|" -v "created_at" -o "|" -v "normalize-space(text)" -n $temp |
|
|
||||||
perl -MHTML::Entities -pe 'decode_entities($_)' > $temp2
|
|
||||||
sed -i '/^$/d' $temp2
|
|
||||||
if test -z $temp2
|
|
||||||
then
|
|
||||||
echo $temp2 is empty
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
cat $temp2
|
|
||||||
if test -f $1.txt
|
|
||||||
then
|
|
||||||
mv $1.txt $temp
|
|
||||||
before=$(wc -l $temp | awk '{print $1}')
|
|
||||||
else
|
|
||||||
before=0
|
|
||||||
> $temp
|
|
||||||
fi
|
|
||||||
if test -s $temp2
|
|
||||||
then
|
|
||||||
cat $temp $temp2 | sort -r -n | uniq > $1.txt
|
|
||||||
after=$(wc -l $1.txt | awk '{print $1}')
|
|
||||||
echo Before: $before After: $after
|
|
||||||
if test "$before" -eq "$after"
|
|
||||||
then
|
|
||||||
echo "Unable to retrieve anything new, a since_id $since problem or 3200 limit?"
|
|
||||||
echo Approximately $(( $twitter_total - $after)) missing tweets
|
|
||||||
rm -f $temp $temp2
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo Empty $temp2
|
|
||||||
echo Twitter is returning empty responses, so we assume we have reached the limit!
|
|
||||||
mv $temp $1.txt
|
|
||||||
rm -f $temp2
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
rm $temp2
|
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
rm -f $temp $temp2
|
||||||
|
exit
|
||||||
|
|
||||||
page=$(($page + 1))
|
fi
|
||||||
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
|
||||||
|
xmlstarlet sel -t -m "//statuses/status" -v "id" -o "|" -v "created_at" -o "|" -v "normalize-space(text)" -n $temp > $temp2
|
||||||
|
cat $temp2 | perl -MHTML::Entities -pe 'decode_entities($_)' > $temp
|
||||||
|
cat $temp | sed '/^$/d' > $temp2
|
||||||
|
|
||||||
|
if test -z $temp2
|
||||||
|
then
|
||||||
|
echo $temp2 is empty
|
||||||
|
rm -f $temp $temp2
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
#cat $temp2
|
||||||
|
|
||||||
|
if test -f $1.txt
|
||||||
|
then
|
||||||
|
mv $1.txt $temp
|
||||||
|
before=$(wc -l $temp | awk '{print $1}')
|
||||||
|
else
|
||||||
|
before=0
|
||||||
|
> $temp
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat $temp $temp2 | sort -r -n | uniq > $1.txt
|
||||||
|
|
||||||
|
after=$(wc -l $1.txt | awk '{print $1}')
|
||||||
|
echo Before: $before After: $after
|
||||||
|
|
||||||
|
if test "$before" -eq "$after"
|
||||||
|
then
|
||||||
|
echo Uable to retrieve anything new. Approximately $(( $twitter_total - $after)) missing tweets
|
||||||
|
rm -f $temp $temp2
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f $temp $temp2
|
||||||
|
page=$(($page + 1))
|
||||||
|
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
||||||
|
echo $saved
|
||||||
|
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user