mirror of
https://github.com/thewesker/greptweet.git
synced 2025-12-23 05:21:06 -05:00
Understanding Jamie's patch
This commit is contained in:
@@ -15,19 +15,25 @@ then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
twitter_total=$(curl -s "http://api.twitter.com/1/users/lookup.xml?screen_name=$1" | xmlstarlet sel -t -m "//users/user/statuses_count" -v .)
|
if ! twitter_total=$(curl -s "http://api.twitter.com/1/users/lookup.xml?screen_name=$1" | xmlstarlet sel -t -m "//users/user/statuses_count" -v .)
|
||||||
|
then
|
||||||
|
curl "http://api.twitter.com/1/users/lookup.xml?screen_name=$1"
|
||||||
|
echo not working
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
page=1
|
page=1
|
||||||
saved=0
|
saved=0
|
||||||
stalled=0
|
stalled=0
|
||||||
|
|
||||||
if test -f $1.txt
|
if test -s $1.txt
|
||||||
then
|
then
|
||||||
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
saved=$(wc -l $1.txt | tail -n1 | awk '{print $1}')
|
||||||
since='&since_id='$(head -n1 $1.txt | awk -F"|" '{ print $1 }')
|
since='&since_id='$(head -n1 $1.txt | awk -F"|" '{ print $1 }')
|
||||||
test "$2" && since='&max_id='$(tail -n1 $1.txt | awk -F"|" '{ print $1 }') # use max_id to get older tweets
|
test "$2" && since='&max_id='$(tail -n1 $1.txt | awk -F"|" '{ print $1 }') # use max_id to get older tweets
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo T:"$twitter_total" S:"$saved"
|
||||||
while test "$twitter_total" -gt "$saved" # Start of the important loop
|
while test "$twitter_total" -gt "$saved" # Start of the important loop
|
||||||
do
|
do
|
||||||
|
|
||||||
@@ -65,7 +71,10 @@ grep -iE 'rate|status' # show the interesting twitter rate limits
|
|||||||
|
|
||||||
mv $temp2 $temp
|
mv $temp2 $temp
|
||||||
|
|
||||||
if test $(xmlstarlet sel -t -v "count(//statuses/status)" $temp) -eq 0
|
cat $temp
|
||||||
|
|
||||||
|
cstatus=$(xmlstarlet sel -t -v "count(//statuses/status)" $temp)
|
||||||
|
if test $cstatus -eq 0
|
||||||
then
|
then
|
||||||
|
|
||||||
head $temp
|
head $temp
|
||||||
@@ -83,15 +92,32 @@ then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo "Parsing $cstatus status(es)"
|
||||||
xmlstarlet sel -t -m "//statuses/status" -v "id" -o "|" -v "created_at" -o "|" -v "normalize-space(text)" -n $temp > $tmpStats
|
xmlstarlet sel -t -m "//statuses/status" -v "id" -o "|" -v "created_at" -o "|" -v "normalize-space(text)" -n $temp > $tmpStats
|
||||||
|
|
||||||
|
cat $tmpStats
|
||||||
|
|
||||||
# Get long/short URLs preformatted for sed, have to specifically escape ampersands for sed.
|
# Get long/short URLs preformatted for sed, have to specifically escape ampersands for sed.
|
||||||
xmlstarlet sel -t -m "//statuses/status/entities/urls/url" -o "s," -v "url" -o "," -v "expanded_url" -o ",g" -n $temp | sed "s,\&,\\\&,g" > $tmpURLs
|
xmlstarlet sel -t -m "//statuses/status/entities/urls/url" -o "s," -v "url" -o "," -v "expanded_url" -o ",g" -n $temp | sed "s,\&,\\\&,g" > $tmpURLs
|
||||||
# Replace short URLs with long URLs
|
# Replace short URLs with long URLs
|
||||||
|
|
||||||
|
if test -s $tmpURLs
|
||||||
|
then
|
||||||
|
cat $tmpURLs
|
||||||
|
# Each regex applied to the whole of the 200 line file? Can't be cheap ... :/
|
||||||
cat $tmpURLs | xargs -0 -I {} sed '{}' $tmpStats > $temp2
|
cat $tmpURLs | xargs -0 -I {} sed '{}' $tmpStats > $temp2
|
||||||
|
else
|
||||||
|
mv $tmpStats $temp2
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo here
|
||||||
|
cat $temp2
|
||||||
|
|
||||||
cat $temp2 | perl -MHTML::Entities -pe 'decode_entities($_)' > $temp
|
cat $temp2 | perl -MHTML::Entities -pe 'decode_entities($_)' > $temp
|
||||||
# I don't think you need cat here, sed will take the file
|
|
||||||
|
echo here2
|
||||||
|
cat $temp
|
||||||
|
|
||||||
sed '/^$/d' $temp > $temp2
|
sed '/^$/d' $temp > $temp2
|
||||||
|
|
||||||
if test -z $temp2
|
if test -z $temp2
|
||||||
@@ -112,7 +138,7 @@ else
|
|||||||
> $temp
|
> $temp
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cat $temp $temp2 | sort -r -n | uniq > $1.txt
|
cat $temp $temp2 | sort -r -n | uniq | sed 's/[ \t]*$//' > $1.txt
|
||||||
|
|
||||||
after=$(wc -l $1.txt | awk '{print $1}')
|
after=$(wc -l $1.txt | awk '{print $1}')
|
||||||
echo Before: $before After: $after
|
echo Before: $before After: $after
|
||||||
|
|||||||
Reference in New Issue
Block a user