mirror of
https://github.com/thewesker/greptweet.git
synced 2025-12-20 12:11:05 -05:00
Merge branch '1.1'
Conflicts: jquery.js
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
<a href="http://www.flickr.com/photos/hendry/7577182774/" title="Offline Greptweet on Chrome IOS by Kai Hendry, on Flickr"><img src="http://farm8.staticflickr.com/7133/7577182774_d5b654ea69_m.jpg" width="160" height="240" alt="Offline Greptweet on Chrome IOS"></a>
|
||||
|
||||
* Uses [HTML offline feature](http://www.whatwg.org/specs/web-apps/current-work/multipage/offline.html)
|
||||
* Authentication free, using <http://dev.twitter.com/doc/get/statuses/user_timeline>
|
||||
* Aims to [suck less](http://suckless.org) by keeping lines of code low
|
||||
* Encourage folks to use `fetch-tweets.sh` themselves and get into shell ;)
|
||||
* Dependencies: curl, libhtml-parser-perl (to decode HTML entities), xmlstarlet, coreutils, PHP
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# vim: set ts=4 sw=4
|
||||
|
||||
test -s "$1" || exit
|
||||
test "${1##*.}" = 'txt' || exit
|
||||
|
||||
temp=$(mktemp "$1.XXXX")
|
||||
trap "rm -f $temp" EXIT
|
||||
|
||||
IFS='|'
|
||||
while read -r id date text
|
||||
do
|
||||
url=$(echo $text | grep --only-matching --perl-regexp "http(s?):\/\/[^ \"\(\)\<\>]*")
|
||||
expandedURL=$(curl "$url" -m5 -s -L -I -o /dev/null -w '%{url_effective}')
|
||||
t=${text/$url/$expandedURL}
|
||||
echo "$id|$date|$t"
|
||||
done < $1 > $temp
|
||||
|
||||
mv $temp $1
|
||||
108
fetch-tweets.sh
108
fetch-tweets.sh
@@ -9,10 +9,12 @@ else
|
||||
mkdir lock
|
||||
fi
|
||||
|
||||
trap "rm -vrf $temp $temp2 lock; exit" EXIT
|
||||
temp=$(mktemp "$1.XXXX")
|
||||
temp2=$(mktemp "$1.XXXX")
|
||||
|
||||
trap "rm -vrf $temp $temp2 lock" EXIT
|
||||
|
||||
umask 002
|
||||
api="http://api.twitter.com/1/statuses/user_timeline.xml?"
|
||||
|
||||
if ! test "$1"
|
||||
then
|
||||
@@ -20,18 +22,6 @@ then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
command -v xmlstarlet >/dev/null && xml() { xmlstarlet "$@"; }
|
||||
if ! type xml >/dev/null; then echo Please install http://xmlstar.sourceforge.net/; exit 1; fi
|
||||
|
||||
twitter_total=$(curl -s "http://api.twitter.com/1/users/lookup.xml?screen_name=$1" |
|
||||
xml sel -t -m "//users/user/statuses_count" -v .)
|
||||
|
||||
if ! test "$twitter_total" -gt 0 2>/dev/null
|
||||
then
|
||||
echo 'Twitter API not working' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
page=1
|
||||
saved=0
|
||||
|
||||
@@ -42,85 +32,11 @@ then
|
||||
test "$2" && since='&max_id='$(tail -n1 $1.txt | cut -d'|' -f1) # use max_id to get older tweets
|
||||
fi
|
||||
|
||||
echo T:"$twitter_total" S:"$saved"
|
||||
while test "$twitter_total" -gt "$saved" # Start of the important loop
|
||||
while urlargs="screen_name=${1}&count=200&page=${page}${since}&include_rts=1&trim_user=0&include_entities=1"; echo $urlargs; $(dirname $0)/oauth.php $urlargs |
|
||||
json -d '|' -a id_str created_at -e 'this.t = this.text.replace(/\s*\n\s*/g, " "); this.entities.urls.forEach(function (u) { this.t = this.t.replace(u.url, u.expanded_url) });' t > $temp2; test $(wc -l < $temp2) -gt 0;
|
||||
do
|
||||
|
||||
echo $1 tweet total "$twitter_total" is greater than the already saved "$saved"
|
||||
echo Trying to get $(($twitter_total - $saved))
|
||||
|
||||
temp=$(mktemp "$1.XXXX")
|
||||
temp2=$(mktemp "$1.XXXX")
|
||||
|
||||
url="${api}screen_name=${1}&count=200&page=${page}${since}&include_rts=1&trim_user=0&include_entities=1"
|
||||
|
||||
echo "curl -s \"$url\""
|
||||
curl -si "$url" | tee $temp2 > $temp
|
||||
echo $?
|
||||
|
||||
# keep only headers in $temp2
|
||||
ed -s $temp2 << "EOF_ED1"
|
||||
/^[[:space:]]*$/
|
||||
.,$d
|
||||
wq
|
||||
EOF_ED1
|
||||
|
||||
# keep only content in $temp
|
||||
ed -s $temp << "EOF_ED2"
|
||||
/^[[:space:]]*$/
|
||||
1,.d
|
||||
wq
|
||||
EOF_ED2
|
||||
|
||||
|
||||
grep -iE 'rate|status' $temp2 # show the interesting twitter rate limits
|
||||
|
||||
if test "$(xml sel -t -v "count(//statuses/status)" $temp 2>/dev/null)" -eq 0
|
||||
then
|
||||
head $temp | grep -q "Over capacity" && echo "Twitter is OVER CAPACITY"
|
||||
if test "$2" && test "$since"
|
||||
then
|
||||
echo No old tweets ${since}
|
||||
elif test "$since"
|
||||
then
|
||||
echo No new tweets ${since}
|
||||
else
|
||||
echo "Twitter is returning empty responses on page ${page} :("
|
||||
fi
|
||||
rm -f $temp $temp2
|
||||
exit
|
||||
fi
|
||||
|
||||
xml sel -t -m "statuses/status" -n -o "text " -v "id" -o "|" -v "created_at" -o "|" \
|
||||
-m ".|retweeted_status" -i "(name() = 'status' and not(retweeted_status)) or name() = 'retweeted_status'" \
|
||||
-i "name() = 'retweeted_status'" -o "RT @" -v "user/screen_name" -o ": " -b \
|
||||
-v "normalize-space(text)" \
|
||||
-m "entities/urls/url" -i "expanded_url != ''" -n -o "url " -v "url" -o " " -v "expanded_url" -b -b \
|
||||
-m "entities/media/creative" -i "expanded_url != ''" -n -o "url " -v "url" -o " " -v "expanded_url" -b -b \
|
||||
$temp |
|
||||
{
|
||||
while read -r first rest
|
||||
do
|
||||
case $first in
|
||||
"text") echo "$text"; text="$rest" ;;
|
||||
"url")
|
||||
set -- $(echo $rest)
|
||||
text=${text//"$1"/$2} # BASHism #11
|
||||
;;
|
||||
esac
|
||||
done
|
||||
echo "$text"
|
||||
} > $temp2
|
||||
|
||||
perl -MHTML::Entities -pe 'decode_entities($_)' < $temp2 > $temp
|
||||
sed '/^$/d' < $temp > $temp2
|
||||
|
||||
if test -z $temp2
|
||||
then
|
||||
echo $temp2 is empty
|
||||
rm -f $temp $temp2
|
||||
continue
|
||||
fi
|
||||
#cat temp2
|
||||
|
||||
if test -f $1.txt
|
||||
then
|
||||
@@ -135,18 +51,12 @@ sort -r -n -u $temp $temp2 > "$1.txt"
|
||||
rm -f $temp $temp2
|
||||
|
||||
after=$(wc -l < "$1.txt")
|
||||
echo Before: $before After: $after
|
||||
|
||||
if test "$before" -eq "$after"
|
||||
then
|
||||
echo Unable to retrieve anything new. Approximately $(( $twitter_total - $after)) missing tweets
|
||||
exit
|
||||
fi
|
||||
echo Before: $before After: $after
|
||||
|
||||
page=$(($page + 1))
|
||||
saved=$(wc -l < "$1.txt")
|
||||
echo $saved
|
||||
|
||||
done
|
||||
|
||||
echo $1 saved $saved tweets of "$twitter_total": You are up-to-date!
|
||||
echo $1 saved $saved tweets
|
||||
|
||||
71
oauth.php
Executable file
71
oauth.php
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
if (empty($argv[1])) { exit(1); }
|
||||
|
||||
$urlargs = $argv[1];
|
||||
parse_str($urlargs, $merge_to_oauth);
|
||||
|
||||
function buildBaseString($baseURI, $method, $params) {
|
||||
$r = array();
|
||||
ksort($params);
|
||||
foreach($params as $key=>$value){
|
||||
$r[] = "$key=" . rawurlencode($value);
|
||||
}
|
||||
return $method."&" . rawurlencode($baseURI) . '&' . rawurlencode(implode('&', $r));
|
||||
}
|
||||
|
||||
function buildAuthorizationHeader($oauth) {
|
||||
$r = 'Authorization: OAuth ';
|
||||
$values = array();
|
||||
foreach($oauth as $key=>$value)
|
||||
$values[] = "$key=\"" . rawurlencode($value) . "\"";
|
||||
$r .= implode(', ', $values);
|
||||
return $r;
|
||||
}
|
||||
|
||||
$url = "https://api.twitter.com/1.1/statuses/user_timeline.json";
|
||||
|
||||
// Get $oauth_access_token, $oauth_access_token_secret, $consumer_key, $consumer_secret
|
||||
include("secret.php");
|
||||
|
||||
$oauth = array( 'oauth_consumer_key' => $consumer_key,
|
||||
'oauth_nonce' => time(),
|
||||
'oauth_signature_method' => 'HMAC-SHA1',
|
||||
'oauth_token' => $oauth_access_token,
|
||||
'oauth_timestamp' => time(),
|
||||
'oauth_version' => '1.0');
|
||||
|
||||
$oauth = array_merge($oauth, $merge_to_oauth);
|
||||
|
||||
$base_info = buildBaseString($url, 'GET', $oauth);
|
||||
$composite_key = rawurlencode($consumer_secret) . '&' . rawurlencode($oauth_access_token_secret);
|
||||
$oauth_signature = base64_encode(hash_hmac('sha1', $base_info, $composite_key, true));
|
||||
$oauth['oauth_signature'] = $oauth_signature;
|
||||
|
||||
// Make Requests
|
||||
$header = array(buildAuthorizationHeader($oauth), 'Expect:');
|
||||
|
||||
|
||||
$feed = curl_init();
|
||||
$options = array( CURLOPT_HTTPHEADER => $header,
|
||||
CURLOPT_URL => $url . '?'. $urlargs,
|
||||
CURLOPT_HEADER => true,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_SSL_VERIFYPEER => false);
|
||||
|
||||
curl_setopt_array($feed, $options);
|
||||
$content = curl_exec($feed);
|
||||
list($header, $json) = explode("\r\n\r\n", $content, 2);
|
||||
curl_close($feed);
|
||||
|
||||
file_put_contents('php://stderr', $header . "\n\n");
|
||||
|
||||
// No results returned, Twitter API issue
|
||||
if (strlen($json) == 2) { exit(1); };
|
||||
|
||||
echo $json;
|
||||
// $twitter_data = json_decode($json);
|
||||
// print_r ($twitter_data);
|
||||
|
||||
?>
|
||||
Reference in New Issue
Block a user