source code

   $KCODE = "u"
   require 'rubygems'
   require 'net/http'
   require 'uri'
   require 'rss'
   require 'dbi'
   require 'mecab_util.rb'
   
   class HatenaBookmarkRss
     def initialize(genre)
       @genre = genre
       @items = []
     end
   
     def build_query(query_hash)
       query_hash.map do |key,value|
         "#{URI.encode(key)}=#{URI.encode(value)}"
       end.join("&")
     end
   
     def get_rss
       result = nil
       Net::HTTP.version_1_2
       Net::HTTP.start("b.hatena.ne.jp", 80) do |http|
         response = http.get('/entrylist/'+@genre+'?' + 
                             build_query("sort" => "hot",
                                         "threshold" => "5",
                                         "mode" => "rss",
                                         "page" => "1"))
         #puts response.body
         result = RSS::Parser.parse(response.body, false)
       end
       @items = result.items
     end
   
     def size
       @items.size
     end
   
     def get_at(index)
       @items[index]
     end
   end
   
   
   class NewsStockDB < HatenaBookmarkRss
   
     def initialize(database, genre)
       super(genre)
       @db = database
     end
   
     def insert_db(link, title)
       return if title == ''
       sql = "SELECT * FROM hot_url WHERE url=?"
       sth = @db.execute(sql, link)
       num = sth.fetch_all.size
       sth.finish
       if num == 0 then
         sql = "INSERT INTO hot_url VALUES(NULL,?,?,?,?,0)"
         @db.do(sql, link, title, @genre, Time.now.to_i)
       end
     end
   
     def get_noun_score(noun)
       score = 0
       sql = "SELECT score FROM noun_bank WHERE word=?"
       sth = @db.execute(sql, noun)
       sth.each do |row|
         score = row[0].to_i
         break if score > 0
       end
       sth.finish
       score
     end
   
     def noun_brew(title)
       topic = []
       token = MecabParse.new(title)
       nounlist = token.get_noun
       nounlist.each do |pair|
         noun = pair.split(' ')
         if get_noun_score(noun[0]) > 0 then
           topic << noun[0]
         end
       end
       topic.join(' ')
     end
   
     def process
       get_rss
       num = size
       for i in 0...num
         insert_db(@items[i].link, noun_brew(@items[i].title))
       end
     end
   
   end
   
   # ---------------------------------------
   # --- main routine
   # ---------------------------------------
   
   # ----- login information
   DBNAME = 'YOURDB'
   DBLOGIN = 'LOGIN'
   DBPASS = 'PASS'
   
   # --- open DB
   db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
   db.do('SET NAMES utf8')
   
   hatebu = NewsStockDB.new(db, 'fun')
   hatebu.process
   hatebu = NewsStockDB.new(db, 'game')
   hatebu.process

トップ   差分 バックアップ リロード   一覧 単語検索 最終更新   ヘルプ   最終更新のRSS
Last-modified: 2017-11-06 (月) 01:22:22 (2363d)