source cord

  require 'rubygems'
  require 'dbi'
  require 'mecab_util.rb'
  require 'hatena_keyword.rb'
  
  # ---------------------------------------
  # --- define
  # ---------------------------------------
  DBNAME = 'DBI:mysql:koruri:localhost'
  DBLOGIN = 'yourid'
  DBPASS = 'yourpass'
  VERB = false
  
   # ---------------------------------------
   # --- subroutine
   # ---------------------------------------
   
   def add_word(db, word, yomi, ref)
     hit = false
     sql = 'SELECT appear FROM noun_bank WHERE word=?'
     sth = db.execute(sql, word)
     sth.each do |row|
       ref = row[0].to_i + 1
       hit = true
     end
     sth.finish
   
     if hit == false then
       sql = 'INSERT INTO noun_bank VALUES (NULL,?,?,0,?,0,0,?,-1,NULL,NULL)'
       timestamp = Time.new
       db.do(sql, word, yomi, ref, timestamp.to_i)
     else
       sql = 'UPDATE noun_bank SET appear=?,lastupdate=? WHERE word=?'
       timestamp = Time.new
       db.do(sql, ref, timestamp.to_i, word)
     end
     hit
   end
   
   
   def check_hatena_from_db(db, word)
     num = 0
     word_yomi = ''
     sql = 'SELECT yomi FROM hatena_keyword WHERE word=?'
     sth = db.execute(sql, word)
     sth.each do |row|
       num += 1
       word_yomi = row[0]
       break
     end
     sth.finish
     { "found" => num, "yomi" => word_yomi }
   end
   
   def check_hatena_from_web(db, word)
     search = HatenaKeywordCheck.new(word)
     if search.arrival? then
       simil = search.get_similar
       if simil.size > 0 then
         similer_word = simil.join(',')
         sql = 'UPDATE noun_bank SET hatena=?,similar=? WHERE word=?'
         db.do(sql, 1, similer_word, word)
         # - add similer word
         simil.each do |newword|
           printf(",%s", newword) if VERB == true
           r = check_hatena_from_db(db, newword)
           add_word(db, newword, r["yomi"], 0)
           sql = 'UPDATE noun_bank SET super=?,hatena=1 WHERE word=?'
           db.do(sql, word, newword)
           # noun link
           add_noun_link(db, word, newword, 1)
         end
       else
         sql = 'UPDATE noun_bank SET hatena=? WHERE word=?'
         db.do(sql, 1, word)
       end
     end
   end
   
   
   # --------------------------------
   # --- noun link
   
   def add_noun_link(db, noun, follow, hatena)
     id1 = 0
     id2 = 0
     sql = 'SELECT id FROM noun_bank WHERE word=?'
     sth = db.execute(sql, noun)
     sth.each do |row|
       id1 = row[0].to_i
     end
     sth.finish
     sth = db.execute(sql, follow)
     sth.each do |row|
       id2 = row[0].to_i
     end
     sth.finish
     if (id1 > 0) && (id2 > 0) && !(id1 == id2) then
       id = 0
       count = 0
       sql = 'SELECT id,nounid,appear FROM noun_link WHERE nounid=? AND link=?'
       sth = db.execute(sql, id1, id2)
       sth.each do |row|
         id = row[0].to_i
         count = row[2].to_i
       end
       sth.finish
       if id == 0 then
         sql = 'INSERT INTO noun_link VALUES (NULL,?,?,?,1,?)'
         db.do(sql, id1, id2, hatena, Time.now.to_i)
       else
         sql = 'UPDATE noun_link SET appear=? WHERE id=?'
         db.do(sql, count+1, id)
       end
     end 
   end
   
   def link_noun_list(db, nounlist)
     noun_num = nounlist.size
     if noun_num > 1 then
       for i in 0...(noun_num-1)
         chunk = nounlist[i].split
         item1 = chunk[0]
         printf("%s: ", item1) if VERB
         for j in (i+1)...noun_num
           chunk = nounlist[j].split
           item2 = chunk[0]
           add_noun_link(db, item1, item2, 0)
           printf("%s ", item2) if VERB
         end
         puts if VERB
       end
     end
   end
   
   
   # --------------------------------
   # --- noun follow
   
   def add_noun_follow(db, noun, follow)
     id1 = 0
     id2 = 0
     sql = 'SELECT id FROM noun_bank WHERE word=?'
     sth = db.execute(sql, noun)
     sth.each do |row|
       id1 = row[0].to_i
     end
     sth.finish
     # -- noun follow
     count = 0
     sql = 'SELECT id,appear FROM noun_follow WHERE word=?'
     sth = db.execute(sql, follow)
     sth.each do |row|
       id2 = row[0].to_i
       count = row[1].to_i
     end
     sth.finish
     if id2 == 0 then
       sql = 'INSERT INTO noun_follow VALUES (NULL,?,0,1,?)'
       db.do(sql, follow, Time.now.to_i)
     else
       sql = 'UPDATE noun_follow SET appear=? WHERE id=?'
       db.do(sql, count+1, id2)
       sql = 'UPDATE noun_follow SET lastupdate=? WHERE id=?'
       db.do(sql, Time.now.to_i, id2)
     end
     # -- noun follow link
     sql = 'SELECT id FROM noun_follow WHERE word=?'
     sth = db.execute(sql, follow)
     sth.each do |row|
       id2 = row[0].to_i
     end
     sth.finish
     id = 0
     sql = 'SELECT id,noun,follow,appear FROM noun_follow_link WHERE noun=? AND follow=?'
     sth = db.execute(sql, id1, id2)
     sth.each do |row|
       id = row[0].to_i
       count = row[3].to_i
     end
     sth.finish
     if id == 0 then
       sql = 'INSERT INTO noun_follow_link VALUES (NULL,?,?,1,?)'
       db.do(sql, id1, id2, Time.now.to_i)
     else
       sql = 'UPDATE noun_follow_link SET appear=? WHERE id=?'
       db.do(sql, count+1, id)
       sql = 'UPDATE noun_follow_link SET lastupdate=? WHERE id=?'
       db.do(sql, Time.now.to_i, id)
     end
   end
   
   def parse_noun_pair(db, mecab)
     token_num = mecab.size
     noun = ''
     follow = ''
     if token_num > 1 then
       for index in 0...(token_num-1)
         node = mecab.get_hinshi(index)

           if (noun != '') && (follow != '') then
             add_noun_follow(db, noun, follow)
             printf("(%s)%s\n", noun, follow) if VERB
             noun = ''
             follow = ''
           end
           if noun != '' then
             noun << mecab.get_word(index)
           else
             noun = mecab.get_word(index)
             follow = ''
           end
         else
           if follow != '' then
             follow << mecab.get_word(index)
           else
             follow = mecab.get_word(index)
           end
         end
       end
       if (noun != '') && (follow != '') then
         add_noun_follow(db, noun, follow)
         printf("(%s)%s\n", noun, follow) if VERB
       end
     end
   end
   
   def add_noun_precede(db, precede, noun)
     id1 = 0
     id2 = 0
     sql = 'SELECT id FROM noun_bank WHERE word=?'
     sth = db.execute(sql, noun)
     sth.each do |row|
       id1 = row[0].to_i
     end
     sth.finish
     # -- noun precede link
     sql = 'SELECT id FROM noun_follow WHERE word=?'
     sth = db.execute(sql, precede)
     sth.each do |row|
       id2 = row[0].to_i
     end
     sth.finish
     if (id1 > 0) && (id2 > 0) then
       id = 0
       count = 0
       sql = 'SELECT id,noun,precede,appear FROM noun_precede_link WHERE noun=? AND precede=?'
       sth = db.execute(sql, id1, id2)
       sth.each do |row|
         id = row[0].to_i
         count = row[3].to_i
       end
       sth.finish
       if id == 0 then
         sql = 'INSERT INTO noun_precede_link VALUES (NULL,?,?,1,?)'
        db.do(sql, id1, id2, Time.now.to_i)
       else
         sql = 'UPDATE noun_precede_link SET appear=? WHERE id=?'
         db.do(sql, count+1, id)
         sql = 'UPDATE noun_precede_link SET lastupdate=? WHERE id=?'
         db.do(sql, Time.now.to_i, id)
       end
     end
   end
   
   def parse_noun_pair_precede(db, mecab)
     token_num = mecab.size
     noun = ''
     precede = ''
     mode = 0
     if token_num > 1 then
       for index in 0...(token_num-1)
         node = mecab.get_hinshi(index)

           if mode == 1 then
             mode = 0
             if (noun != '') && (precede != '') then
               add_noun_precede(db, precede, noun)
               printf("%s(%s)\n", precede, noun) if VERB
               noun = ''
               precede = ''
             end
           end
           if noun != '' then
             noun << mecab.get_word(index)
           else
             noun = mecab.get_word(index)
           end
         else
           if mode == 0 then
             mode = 1
             if (noun != '') && (precede != '') then
               add_noun_precede(db, precede, noun)
               printf("%s(%s)\n", precede, noun) if VERB
               noun = ''
               precede = ''
             end
             noun = ''
           end
           if precede != '' then
             precede << mecab.get_word(index)
           else
             precede = mecab.get_word(index)
           end
         end
       end
       if (noun != '') && (precede != '') then
         add_noun_precede(db, precede, noun)
         printf("%s(%s)\n", precede, noun) if VERB
       end
     end
   end
   
   
   
   # ---------------------------------------
   # --- main routine
   # ---------------------------------------
   
   # --- open DB
   db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
   db.do('SET NAMES utf8')
   
   
   # --- tweet query
   sql = 'SELECT id,text FROM sentence WHERE process=0'
   item_num = 0
   begin
     hit = false
     sth = db.execute(sql)
     sth.each do |row|
       token = MecabParse.new(row[1])
       nounlist = token.get_noun
       nounlist.each do |item|
         chunk = item.split
         printf("%d: %s", item_num, chunk[0]) if VERB == true
         hit = add_word(db, chunk[0], chunk[1], 1)
         if hit == false then
           r = check_hatena_from_db(db, chunk[0])
           if r["found"] > 0 then
             check_hatena_from_web(db, chunk[0])
             # wait for hatena
             sleep(0.5)
           end
         end
         #
         puts if VERB == true
       end
       link_noun_list(db, nounlist)
       parse_noun_pair(db, token)
       parse_noun_pair_precede(db, token)
       sql = 'UPDATE sentence SET process=1 WHERE id=?'
       db.do(sql, row[0])
       #
       item_num += 1
     end
   rescue => e
     puts e
     puts "error terminate."
   ensure
     # --- end
     db.disconnect
   end

トップ   差分 バックアップ リロード   一覧 単語検索 最終更新   ヘルプ   最終更新のRSS
Last-modified: 2017-11-06 (月) 01:22:22 (2361d)