
#  MSN マネーサイトの財務諸表ページ
#   http://jp.moneycentral.msn.com/investor/common/find.aspx?Company=JP:&NextPage=/investor/invsub/results/statemnt.aspx
# にアクセスして、csv形式で科目名、値を得る。
#      損益計算書、貸借対照表, キャッシュフロー (年間)
#                金額の単位:円 百万単位で表示 (1 株あたりの項目を除く)
#
#  企業コードは
#   http://jp.moneycentral.msn.com/investor/invsub/results/statemnt.aspx
#  で調べることができる。
#    例: JP:7974  任天堂
#        JP:7203 トヨタ自動車
#
# usage:  ruby msn-finance.rb code ...
#  例： usage:  ruby msn-finance.rb "JP:7974"
#        ./data/JS-7974.csv にデータが出力される。
#

require 'rubygems'
require 'hpricot'
require 'fastercsv'
require 'open-uri'
require 'kconv'
require 'pp'

$KCODE="utf8"

OUTDIR = "data"

def table2csvdata(table, nest)
  csvdata = []
  (table/"/tr").each { |row|
    name = nil
    vals = []

    subtable = row/"/td/table"
    if subtable.size != 0
      csvdata += table2csvdata(subtable, nest + 1)
      next
    end

    (row/"/td").each {|d|
      span = d/"span"
      if span.size != 0
        name = span.inner_html
      else
        val =  d.inner_html
        val = ""  if val == "&nbsp;"
        val = val.gsub(",", "")

        if name == nil   # 期日例などの為の特別処理
          name = val
        else
          vals << val
        end
      end
    }

    if vals.size != 0 then
      csvrow = []
      csvrow << "#{"  " * nest}#{name}"
      csvrow += vals
      csvdata << csvrow
    end
  }
  csvdata
end

def get3table(urls)
  csvdata = []
  urls.each { |url|
    csvdata += [url]
    doc = Hpricot(open(url))
    doc.search('/html/body/div/div/div/div/div//div/table[2]').each { |table|
      csvdata += table2csvdata(table, 0)
    }
  }
  csvdata
end

def get_urls(code)
  url = []
  url << "http://jp.moneycentral.msn.com/investor/invsub/results/statemnt.aspx?Symbol=#{code}&lstStatement=Income&stmtView=Ann"
  url << "http://jp.moneycentral.msn.com/investor/invsub/results/statemnt.aspx?Symbol=#{code}&lstStatement=Balance&stmtView=Ann"
  url << "http://jp.moneycentral.msn.com/investor/invsub/results/statemnt.aspx?Symbol=#{code}&lstStatement=CashFlow&stmtView=Ann"
end

def output_csv(code)
  # csvdata = get3table(get_urls(code))
  csvdata = get3table_test()  # for TEST

  Dir::mkdir(OUTDIR)  if !File.exist?(OUTDIR)
  out_file = OUTDIR + "/#{code.gsub(":", "-")}.csv" # ファイル名に ":" を使わない

  # 3 つのデータを 1 つの csv に出力する
  FasterCSV.open(out_file, "w", {:force_quotes => true}) { |csv|
    csvdata.each { |row|
      csv << row
    }
  }
end

# for Test
def get3table_test()
  code = "test"
  urls = []
  urls << "../data/msn-finance/Income.html"
  urls << "../data/msn-finance/Balance.html"
  urls << "../data/msn-finance/CashFlow.html"
  csvdata = get3table(urls)
end

# code = "test"
# code = "JP:7974" # 任天堂

ARGV.each {|code|
  output_csv(code)
}
# --- End of File ---