source code

	#!/usr/bin/ruby
	# -*- coding: utf-8 -*-
	
	require 'rubygems'
	require 'dbi'
	
	# ----- file
	DBNAME = 'DBI:mysql:koruri:localhost'
	DBLOGIN = 'yourid'
	DBPASS = 'yourpass'
	VERB = false
	
	# ---------------------------------
	# ----- sub routine
	
	def split_line(text)
	  output = []
	  # --- charconv
	  replace = text







	  replace.gsub!(/^>/,' ')
	  replace.gsub!(/^\[B\!\]/,' ')
	  replace.gsub!(/\([0-9]+\:[0-9]+\)/,' ')
	  replace.gsub!(/http/,' http')
	  #
	  chunk = text.split
	  line = ""
	  cr = false
	  chunk.each do |parts|
	    add = true
	    # reply
	    if parts =~ /^@/ then
	      cr = true
	      add = false
	    end
	    # retweet
	    if parts =~ /^RT / then
	      cr = true
	      add = false
	    end
	    if parts =~ /^QT / then
	      cr = true
	      add = false
	    end
	    if parts =~ /^RT:/ then
	      cr = true
	      add = false
	    end
	    # group
	    if parts =~ /^#/ then
	      cr = true
	      add = false
	    end
	    # URL
	    if parts =~ /^http/ then
	      cr = true
	      add = false
	    end
	    # period

	      cr = true
	      add = true
	    end
	    # --- add
	    if add == true then
	      line = line + ' ' + parts
	    end
	    if cr == true then
	      if (line.empty? == false) && (line.length > 3) then
	        output << line
	        line = ""
	      end
	      cr = false
	    end
	  end
	
	  if (line.empty? == false) && (line.length > 3) then
	    output << line
	  end
	  output
	end
	
	
	
	# ---------------------------------
	# ----- main work
	
	# --- open DB
	db = DBI.connect(DBNAME, DBLOGIN, DBPASS)
	
	
	# --- query
	sql = 'SELECT id,text FROM tweet_log WHERE process=0'
	item_num = 0
	sth = db.execute(sql)
	sth.each do |row|
	  sen = split_line(row[1])
	  db.transaction do
	    sen.each do |p|
	      sql_add = "insert into sentence values (NULL, ?, NULL, NULL, ?, 0, 0)"
	      db.do(sql_add, p, row[0])
	      item_num += 1
	    end
	    sql_update = "UPDATE tweet_log SET process=1 WHERE id=?"
	    db.do(sql_update, row[0])
	  end
	end
	sth.finish
	
	printf("processed size = %d\n", item_num) if VERB == true
	
	#done
	db.disconnect

トップ   差分 バックアップ リロード   一覧 単語検索 最終更新   ヘルプ   最終更新のRSS
Last-modified: 2017-11-06 (月) 01:22:22 (2803d)