# 株プロのサイトから企業一覧を得る。
# 
# 2009-09-22 katoy

require 'rubygems'
require 'open-uri'
require 'hpricot'
require 'google_chart'
require 'kconv'
require 'ya2yaml'
require 'pp'
require 'benchmark'

$KCODE = "utf8"

CATEGORY_LIST = { 
  "0001" => "水産・農林業",
  "0002" => "鉱業",
  "0003" => "建設業",
  "0004" => "食料品",
  "0005" => "繊維製品",
  "0006" => "パルプ・紙",
  "0007" => "化学",
  "0008" => "医薬品",
  "0009" => "石油・石炭製品",
  "0010" => "ゴム製品",
  "0011" => "ガラス・土石製品",
  "0012" => "鉄鋼",
  "0013" => "非鉄金属",
  "0014" => "金属製品",
  "0015" => "機械",
  "0016" => "電気機器",
  "0017" => "輸送用機器",
  "0018" => "精密機器",
  "0019" => "その他製品",
  "0020" => "電気・ガス業",
  "0021" => "陸運業",
  "0022" => "海運業",
  "0023" => "空運業",
  "0024" => "倉庫・運輸関連",
  "0025" => "情報・通信業",
  "0026" => "卸売業",
  "0027" => "小売業",
  "0028" => "銀行業",
  "0029" => "証券商品先物取引業",
  "0030" => "保険業",
  "0031" => "その他金融業",
  "0032" => "不動産業",
  "0033" => "サービス業"
}

def get_list

  companylist = { }

  CATEGORY_LIST.each do |key, val|
    sub_list = { }

    path = "http://ke.kabupro.jp/list/t#{key}.htm"
    doc = Hpricot(open(path).read.toutf8)
    doc.search("table.Quote").each do |t|
      t.search("td.CellName").each do |td|
        name = td.inner_text
        id = td.previous_sibling.inner_text
        sub_list[id] = name
      end
    end
    companylist[val] = { :cid => key, :list => sub_list }
  end
  companylist
end

def save_info(info)
  return if info == nil

  open("cprofile/categorylist.yaml", "w") { |f|
    f.write info.ya2yaml
  }
end

puts Benchmark.measure {
  clist = get_list
  save_info(clist)
  #  pp clist.size

  #  num = 0
  #  clist.each do |k,v|
  #    num += v.size
  #  end
  #  pp num
}
