-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrss2muc.rb
74 lines (66 loc) · 1.87 KB
/
rss2muc.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
require 'yaml'
require 'rubygems'
require 'feed-normalizer'
require 'open-uri'
require 'gdbm'
require 'sha1'
require 'time'
require 'simple-ordered-list'
feedfile = ARGV[0] || 'feeds.yml'
feeds = YAML.load_file(feedfile)
articles = GDBM.new('read.db')
pq = Queue::Priority.new()
start = Time.now.to_i + 10
feeds.each { |feed_info|
pq.push(feed_info, start)
start = start + 10
}
# move this out of the loop so we can thread synchronise it
new_entries = []
rss = Thread.new {
puts "RSS producer alive"
# TODO use a priority queue based on the time of next update for each URL
loop {
now = Time.now.to_i
feed_info, timestamp = pq.lowest(true)
puts "= next: #{feed_info['name']}, at: #{Time.at(timestamp)}"
if timestamp > now then
puts "= sleeping for #{timestamp-now} for #{feed_info['name']}"
sleep timestamp - now
end
p feed_info, timestamp
next_timestamp = timestamp + feed_info['refresh']
feed_url = feed_info['url']
seen_feed = articles[feed_url].to_i
if seen_feed == 0 then puts "= ignoring first run: #{feed_url}"; end
feed = FeedNormalizer::FeedNormalizer.parse open(feed_url)
feed.entries.each { |f|
# do we care if the title changes?
sha1 = SHA1.hexdigest(f.title + f.url)
if articles[sha1] then
# puts "r #{f.title}"
elsif seen_feed == 0 then
puts "! #{f.title}"
else
puts "+ #{f.title}"
new_entries.push [f.title, f.url]
end
articles[sha1] = now.to_s
}
articles[feed_url] = now.to_s
puts "= queued: #{feed_info['name']} at #{Time.at(next_timestamp)}"
pq.push(feed_info, next_timestamp)
}
}
muc = Thread.new {
puts "MUC consumer alive"
loop {
if new_entries.size > 0 then
entry = new_entries.shift
puts "- #{entry[0]}"
end
sleep 2
}
}
rss.join
muc.join