source cord †
require 'rubygems'
require 'dbi'
require 'mecab_util.rb'
require 'hatena_keyword.rb'
# ---------------------------------------
# --- define
# ---------------------------------------
DBNAME = 'DBI:mysql:koruri:localhost'
DBLOGIN = 'yourid'
DBPASS = 'yourpass'
VERB = false
# ---------------------------------------
# --- subroutine
# ---------------------------------------
def add_word(db, word, yomi, ref)
hit = false
sql = 'SELECT appear FROM noun_bank WHERE word=?'
sth = db.execute(sql, word)
sth.each do |row|
ref = row[0].to_i + 1
hit = true
end
sth.finish
if hit == false then
sql = 'INSERT INTO noun_bank VALUES (NULL,?,?,0,?,0,0,0,-1,NULL,NULL)'
db.do(sql, word, yomi, ref)
else
sql = 'UPDATE noun_bank SET appear=? WHERE word=?'
db.do(sql, ref, word)
end
hit
end
def check_hatena_from_db(db, word)
num = 0
word_yomi = ''
sql = 'SELECT yomi FROM hatena_keyword WHERE word=?'
sth = db.execute(sql, word)
sth.each do |row|
num += 1
word_yomi = row[0]
break
end
sth.finish
{ "found" => num, "yomi" => word_yomi }
end
def check_hatena_from_web(db, word)
search = HatenaKeywordCheck.new(word)
if search.arrival? then
simil = search.get_similar
if simil.size > 0 then
similer_word = simil.join(',')
sql = 'UPDATE noun_bank SET hatena=?,similar=? WHERE word=?'
db.do(sql, 1, similer_word, word)
# - add similer word
simil.each do |newword|
printf(",%s", newword) if VERB == true
r = check_hatena_from_db(db, newword)
add_word(db, newword, r["yomi"], 0)
sql = 'UPDATE noun_bank SET super=?,hatena=1 WHERE word=?'
db.do(sql, word, newword)
end
else
sql = 'UPDATE noun_bank SET hatena=? WHERE word=?'
db.do(sql, 1, word)
end
end
end
# ---------------------------------------
# --- main routine
# ---------------------------------------
# --- open DB
db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
# --- tweet query
sql = 'SELECT id,text FROM sentence WHERE process=0'
item_num = 0
begin
hit = false
sth = db.execute(sql)
sth.each do |row|
token = MecabParse.new(row[1])
nounlist = token.get_noun
nounlist.each do |item|
chunk = item.split
printf("%d: %s", item_num, chunk[0]) if VERB == true
hit = add_word(db, chunk[0], chunk[1], 1)
if hit == false then
r = check_hatena_from_db(db, chunk[0])
if r["found"] > 0 then
check_hatena_from_web(db, chunk[0])
# wait for hatena
sleep(0.5)
end
end
#
puts if VERB == true
end
sql = 'UPDATE sentence SET process=1 WHERE id=?'
db.do(sql, row[0])
#
item_num += 1
end
rescue => e
puts e
puts "error terminate."
ensure
# --- end
db.disconnect
end