source cord

	require 'rubygems'
	require 'dbi'
	require 'mecab_util.rb'
	require 'hatena_keyword.rb'
	
	# ---------------------------------------
	# --- define
	# ---------------------------------------
	DBNAME = 'DBI:mysql:koruri:localhost'
	DBLOGIN = 'yourid'
	DBPASS = 'yourpass'
	VERB = false
	
	# ---------------------------------------
	# --- subroutine
	# ---------------------------------------
	
	def add_word(db, word, yomi, ref)
	  hit = false
	  sql = 'SELECT appear FROM noun_bank WHERE word=?'
	  sth = db.execute(sql, word)
	  sth.each do |row|
	    ref = row[0].to_i + 1
	    hit = true
	  end
	  sth.finish
	
	  if hit == false then
	    sql = 'INSERT INTO noun_bank VALUES (NULL,?,?,0,?,0,0,0,-1,NULL,NULL)'
	    db.do(sql, word, yomi, ref)
	  else
	    sql = 'UPDATE noun_bank SET appear=? WHERE word=?'
	    db.do(sql, ref, word)
	  end
	  hit
	end
	
	
	def check_hatena_from_db(db, word)
	  num = 0
	  word_yomi = ''
	  sql = 'SELECT yomi FROM hatena_keyword WHERE word=?'
	  sth = db.execute(sql, word)
	  sth.each do |row|
	    num += 1
	    word_yomi = row[0]
	    break
	  end
	  sth.finish
	  { "found" => num, "yomi" => word_yomi }
	end
	
	def check_hatena_from_web(db, word)
	  search = HatenaKeywordCheck.new(word)
	  if search.arrival? then
	    simil = search.get_similar
	    if simil.size > 0 then
	      similer_word = simil.join(',')
	      sql = 'UPDATE noun_bank SET hatena=?,similar=? WHERE word=?'
	      db.do(sql, 1, similer_word, word)
	      # - add similer word
	      simil.each do |newword|
	        printf(",%s", newword) if VERB == true
	        r = check_hatena_from_db(db, newword)
	        add_word(db, newword, r["yomi"], 0)
	        sql = 'UPDATE noun_bank SET super=?,hatena=1 WHERE word=?'
	        db.do(sql, word, newword)
	      end
	    else
	      sql = 'UPDATE noun_bank SET hatena=? WHERE word=?'
	      db.do(sql, 1, word)
	    end
	  end
	end
	
	# ---------------------------------------
	# --- main routine
	# ---------------------------------------
	
	# --- open DB
	db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
	
	
	# --- tweet query
	sql = 'SELECT id,text FROM sentence WHERE process=0'
	item_num = 0
	begin
	  hit = false
	  sth = db.execute(sql)
	  sth.each do |row|
	    token = MecabParse.new(row[1])
	    nounlist = token.get_noun
	    nounlist.each do |item|
	      chunk = item.split
	      printf("%d: %s", item_num, chunk[0]) if VERB == true
	      hit = add_word(db, chunk[0], chunk[1], 1)
	      if hit == false then
	        r = check_hatena_from_db(db, chunk[0])
	        if r["found"] > 0 then
	          check_hatena_from_web(db, chunk[0])
	          # wait for hatena
	          sleep(0.5)
	        end
	      end
	      #
	      puts if VERB == true
	    end
	    sql = 'UPDATE sentence SET process=1 WHERE id=?'
	    db.do(sql, row[0])
	    #
	    item_num += 1
	  end
	rescue => e
	  puts e
	  puts "error terminate."
	ensure
	  # --- end
	  db.disconnect
	end

トップ   一覧 単語検索 最終更新   ヘルプ   最終更新のRSS