source code †
$KCODE = "u"
require 'rubygems'
require 'net/http'
require 'uri'
require 'rss'
require 'dbi'
require 'mecab_util.rb'
class HatenaBookmarkRss
def initialize(genre)
@genre = genre
@items = []
end
def build_query(query_hash)
query_hash.map do |key,value|
"#{URI.encode(key)}=#{URI.encode(value)}"
end.join("&")
end
def get_rss
result = nil
Net::HTTP.version_1_2
Net::HTTP.start("b.hatena.ne.jp", 80) do |http|
response = http.get('/entrylist/'+@genre+'?' +
build_query("sort" => "hot",
"threshold" => "5",
"mode" => "rss",
"page" => "1"))
#puts response.body
result = RSS::Parser.parse(response.body, false)
end
@items = result.items
end
def size
@items.size
end
def get_at(index)
@items[index]
end
end
class NewsStockDB < HatenaBookmarkRss
def initialize(database, genre)
super(genre)
@db = database
end
def insert_db(link, title)
return if title == ''
sql = "SELECT * FROM hot_url WHERE url=?"
sth = @db.execute(sql, link)
num = sth.fetch_all.size
sth.finish
if num == 0 then
sql = "INSERT INTO hot_url VALUES(NULL,?,?,?,?,0)"
@db.do(sql, link, title, @genre, Time.now.to_i)
end
end
def get_noun_score(noun)
score = 0
sql = "SELECT score FROM noun_bank WHERE word=?"
sth = @db.execute(sql, noun)
sth.each do |row|
score = row[0].to_i
break if score > 0
end
sth.finish
score
end
def noun_brew(title)
topic = []
token = MecabParse.new(title)
nounlist = token.get_noun
nounlist.each do |pair|
noun = pair.split(' ')
if get_noun_score(noun[0]) > 0 then
topic << noun[0]
end
end
topic.join(' ')
end
def process
get_rss
num = size
for i in 0...num
insert_db(@items[i].link, noun_brew(@items[i].title))
end
end
end
# ---------------------------------------
# --- main routine
# ---------------------------------------
# ----- login information
DBNAME = 'YOURDB'
DBLOGIN = 'LOGIN'
DBPASS = 'PASS'
# --- open DB
db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
db.do('SET NAMES utf8')
hatebu = NewsStockDB.new(db, 'fun')
hatebu.process
hatebu = NewsStockDB.new(db, 'game')
hatebu.process