source cord †
require 'rubygems'
require 'dbi'
require 'mecab_util.rb'
require 'hatena_keyword.rb'
# ---------------------------------------
# --- define
# ---------------------------------------
DBNAME = 'DBI:mysql:koruri:localhost'
DBLOGIN = 'yourid'
DBPASS = 'yourpass'
VERB = false
# ---------------------------------------
# --- subroutine
# ---------------------------------------
def add_word(db, word, yomi, ref)
hit = false
sql = 'SELECT appear FROM noun_bank WHERE word=?'
sth = db.execute(sql, word)
sth.each do |row|
ref = row[0].to_i + 1
hit = true
end
sth.finish
if hit == false then
sql = 'INSERT INTO noun_bank VALUES (NULL,?,?,0,?,0,0,?,-1,NULL,NULL)'
timestamp = Time.new
db.do(sql, word, yomi, ref, timestamp.to_i)
else
sql = 'UPDATE noun_bank SET appear=?,lastupdate=? WHERE word=?'
timestamp = Time.new
db.do(sql, ref, timestamp.to_i, word)
end
hit
end
def check_hatena_from_db(db, word)
num = 0
word_yomi = ''
sql = 'SELECT yomi FROM hatena_keyword WHERE word=?'
sth = db.execute(sql, word)
sth.each do |row|
num += 1
word_yomi = row[0]
break
end
sth.finish
{ "found" => num, "yomi" => word_yomi }
end
def check_hatena_from_web(db, word)
search = HatenaKeywordCheck.new(word)
if search.arrival? then
simil = search.get_similar
if simil.size > 0 then
similer_word = simil.join(',')
sql = 'UPDATE noun_bank SET hatena=?,similar=? WHERE word=?'
db.do(sql, 1, similer_word, word)
# - add similer word
simil.each do |newword|
printf(",%s", newword) if VERB == true
r = check_hatena_from_db(db, newword)
add_word(db, newword, r["yomi"], 0)
sql = 'UPDATE noun_bank SET super=?,hatena=1 WHERE word=?'
db.do(sql, word, newword)
# noun link
add_noun_link(db, word, newword, 1)
end
else
sql = 'UPDATE noun_bank SET hatena=? WHERE word=?'
db.do(sql, 1, word)
end
end
end
# --------------------------------
# --- noun link
def add_noun_link(db, noun, follow, hatena)
id1 = 0
id2 = 0
sql = 'SELECT id FROM noun_bank WHERE word=?'
sth = db.execute(sql, noun)
sth.each do |row|
id1 = row[0].to_i
end
sth.finish
sth = db.execute(sql, follow)
sth.each do |row|
id2 = row[0].to_i
end
sth.finish
if (id1 > 0) && (id2 > 0) && !(id1 == id2) then
id = 0
count = 0
sql = 'SELECT id,nounid,appear FROM noun_link WHERE nounid=? AND link=?'
sth = db.execute(sql, id1, id2)
sth.each do |row|
id = row[0].to_i
count = row[2].to_i
end
sth.finish
if id == 0 then
sql = 'INSERT INTO noun_link VALUES (NULL,?,?,?,1,?)'
db.do(sql, id1, id2, hatena, Time.now.to_i)
else
sql = 'UPDATE noun_link SET appear=? WHERE id=?'
db.do(sql, count+1, id)
end
end
end
def link_noun_list(db, nounlist)
noun_num = nounlist.size
if noun_num > 1 then
for i in 0...(noun_num-1)
chunk = nounlist[i].split
item1 = chunk[0]
printf("%s: ", item1) if VERB
for j in (i+1)...noun_num
chunk = nounlist[j].split
item2 = chunk[0]
add_noun_link(db, item1, item2, 0)
printf("%s ", item2) if VERB
end
puts if VERB
end
end
end
# --------------------------------
# --- noun follow
def add_noun_follow(db, noun, follow)
id1 = 0
id2 = 0
sql = 'SELECT id FROM noun_bank WHERE word=?'
sth = db.execute(sql, noun)
sth.each do |row|
id1 = row[0].to_i
end
sth.finish
# -- noun follow
count = 0
sql = 'SELECT id,appear FROM noun_follow WHERE word=?'
sth = db.execute(sql, follow)
sth.each do |row|
id2 = row[0].to_i
count = row[1].to_i
end
sth.finish
if id2 == 0 then
sql = 'INSERT INTO noun_follow VALUES (NULL,?,0,1,?)'
db.do(sql, follow, Time.now.to_i)
else
sql = 'UPDATE noun_follow SET appear=? WHERE id=?'
db.do(sql, count+1, id2)
sql = 'UPDATE noun_follow SET lastupdate=? WHERE id=?'
db.do(sql, Time.now.to_i, id2)
end
# -- noun follow link
sql = 'SELECT id FROM noun_follow WHERE word=?'
sth = db.execute(sql, follow)
sth.each do |row|
id2 = row[0].to_i
end
sth.finish
id = 0
sql = 'SELECT id,noun,follow,appear FROM noun_follow_link WHERE noun=? AND follow=?'
sth = db.execute(sql, id1, id2)
sth.each do |row|
id = row[0].to_i
count = row[3].to_i
end
sth.finish
if id == 0 then
sql = 'INSERT INTO noun_follow_link VALUES (NULL,?,?,1,?)'
db.do(sql, id1, id2, Time.now.to_i)
else
sql = 'UPDATE noun_follow_link SET appear=? WHERE id=?'
db.do(sql, count+1, id)
sql = 'UPDATE noun_follow_link SET lastupdate=? WHERE id=?'
db.do(sql, Time.now.to_i, id)
end
end
def parse_noun_pair(db, mecab)
token_num = mecab.size
noun = ''
follow = ''
if token_num > 1 then
for index in 0...(token_num-1)
node = mecab.get_hinshi(index)
if (noun != '') && (follow != '') then
add_noun_follow(db, noun, follow)
printf("(%s)%s\n", noun, follow) if VERB
noun = ''
follow = ''
end
if noun != '' then
noun << mecab.get_word(index)
else
noun = mecab.get_word(index)
follow = ''
end
else
if follow != '' then
follow << mecab.get_word(index)
else
follow = mecab.get_word(index)
end
end
end
if (noun != '') && (follow != '') then
add_noun_follow(db, noun, follow)
printf("(%s)%s\n", noun, follow) if VERB
end
end
end
def add_noun_precede(db, precede, noun)
id1 = 0
id2 = 0
sql = 'SELECT id FROM noun_bank WHERE word=?'
sth = db.execute(sql, noun)
sth.each do |row|
id1 = row[0].to_i
end
sth.finish
# -- noun precede link
sql = 'SELECT id FROM noun_follow WHERE word=?'
sth = db.execute(sql, precede)
sth.each do |row|
id2 = row[0].to_i
end
sth.finish
if (id1 > 0) && (id2 > 0) then
id = 0
count = 0
sql = 'SELECT id,noun,precede,appear FROM noun_precede_link WHERE noun=? AND precede=?'
sth = db.execute(sql, id1, id2)
sth.each do |row|
id = row[0].to_i
count = row[3].to_i
end
sth.finish
if id == 0 then
sql = 'INSERT INTO noun_precede_link VALUES (NULL,?,?,1,?)'
db.do(sql, id1, id2, Time.now.to_i)
else
sql = 'UPDATE noun_precede_link SET appear=? WHERE id=?'
db.do(sql, count+1, id)
sql = 'UPDATE noun_precede_link SET lastupdate=? WHERE id=?'
db.do(sql, Time.now.to_i, id)
end
end
end
def parse_noun_pair_precede(db, mecab)
token_num = mecab.size
noun = ''
precede = ''
mode = 0
if token_num > 1 then
for index in 0...(token_num-1)
node = mecab.get_hinshi(index)
if mode == 1 then
mode = 0
if (noun != '') && (precede != '') then
add_noun_precede(db, precede, noun)
printf("%s(%s)\n", precede, noun) if VERB
noun = ''
precede = ''
end
end
if noun != '' then
noun << mecab.get_word(index)
else
noun = mecab.get_word(index)
end
else
if mode == 0 then
mode = 1
if (noun != '') && (precede != '') then
add_noun_precede(db, precede, noun)
printf("%s(%s)\n", precede, noun) if VERB
noun = ''
precede = ''
end
noun = ''
end
if precede != '' then
precede << mecab.get_word(index)
else
precede = mecab.get_word(index)
end
end
end
if (noun != '') && (precede != '') then
add_noun_precede(db, precede, noun)
printf("%s(%s)\n", precede, noun) if VERB
end
end
end
# ---------------------------------------
# --- main routine
# ---------------------------------------
# --- open DB
db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
db.do('SET NAMES utf8')
# --- tweet query
sql = 'SELECT id,text FROM sentence WHERE process=0'
item_num = 0
begin
hit = false
sth = db.execute(sql)
sth.each do |row|
token = MecabParse.new(row[1])
nounlist = token.get_noun
nounlist.each do |item|
chunk = item.split
printf("%d: %s", item_num, chunk[0]) if VERB == true
hit = add_word(db, chunk[0], chunk[1], 1)
if hit == false then
r = check_hatena_from_db(db, chunk[0])
if r["found"] > 0 then
check_hatena_from_web(db, chunk[0])
# wait for hatena
sleep(0.5)
end
end
#
puts if VERB == true
end
link_noun_list(db, nounlist)
parse_noun_pair(db, token)
parse_noun_pair_precede(db, token)
sql = 'UPDATE sentence SET process=1 WHERE id=?'
db.do(sql, row[0])
#
item_num += 1
end
rescue => e
puts e
puts "error terminate."
ensure
# --- end
db.disconnect
end