# Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
#
# This file is part of Rast.
# See the file COPYING for redistribution information.
#

require "open3"
require File.join(File.dirname(__FILE__), "read-buckets-to-file")

class ApplicationPDF
  SUPPORTED_VERSION = 1
  MIME_TYPE = "application/pdf"

  include ReadBucketsToFile

  private

  def process_file(filter, mime_type, path)
    input_encoding = nil
    db_encoding = filter.db_encoding

    s = ""
    passed_nbytes = 0
    IO.popen("pdftotext #{path} -") do |io|
      page = ""
      while line = io.gets
        if match_data = /\f/.match(line)
          page.concat(match_data.pre_match)
          if page.empty?
            next
          end

          if input_encoding.nil?
            input_encoding =
              Rast::EncodingConverter.guess(page, Rast::JAPANESE_ENCODINGS)
          end

          s = Rast::EncodingConverter.convert_encoding(input_encoding,
                                                       db_encoding, page)
          next_brigade = Rast::Brigade.new
          next_brigade.insert_tail(Rast::TransientBucket.new(s))
          filter.pass(next_brigade, "text/plain")
          passed_nbytes += s.length
          page = match_data.post_match
        else
          page.concat(line.strip)
        end
      end
    end

    if !$?.success? && passed_nbytes == 0
      raise Rast::RastError.new("failed to convert file")
    end

    Open3.popen3("pdfinfo #{path}") do |stdin, stdout, stderr|
      db_properties = {}
      s = stdout.read
      ["title", "subject", "author"].each do |name|
        if (match_data = /^#{name}:\s*(.*)$/i.match(s))
          db_properties[name] = match_data[1]
        end
      end
      db_properties.each do |key, value|
        s = Rast::EncodingConverter.convert_encoding(input_encoding,
                                                     db_encoding, value)
        filter.set_property(key, s)
      end
    end

    next_brigade = Rast::Brigade.new
    next_brigade.insert_tail(Rast::EOSBucket.new)
    filter.pass(next_brigade, "text/plain")
  end
end
