# Copyright (C) 2006 by Aiwota Programmer
# aiwotaprog@tetteke.tk
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

from HTMLParserEx import HTMLParserEx
import htmlentitydefs


class BareHTMLParser(HTMLParserEx):
    """Parses html by the minimal necessity

    to_out_func format is:
    def some_func(untied_data, is_bold, href):
    where untied_data is non markuped string
    and is_bold is whether untied_data is bold or not
    and href is url anchor if exists

    strip spaces at the head and end of line, but first line's head is unable.
    """

    def __init__(self, to_out_func):
        HTMLParserEx.__init__(self)
        self.to_out_func = to_out_func
        self.bold = False
        self.href = None
        self.buffer = ""

    def reset_func(self, to_out_func):
        self.flush()
        self.to_out_func = to_out_func

    def to_out(self, data):
        n = len(self.buffer)
        if n > 0 and self.buffer[n-1] == "\n":
            data = data.lstrip(" ")
        self.buffer = self.buffer + data

    def flush(self):
        if self.buffer:
            self.to_out_func(self.buffer, self.bold, self.href)
            self.buffer = ""

    def newline(self):
        self.buffer = self.buffer.rstrip(" ")
        self.flush()
        self.to_out("\n")

    # override
    # flush after closing
    def close(self):
        HTMLParserEx.close(self)
        self.flush()

    # override handle_*
    
    def handle_starttag(self, tag, attr):
        if tag == "b":
            self.flush()
            self.bold = True
        elif tag == "br":
            self.newline()
        elif tag == "a":
            self.flush()
            for item in attr:
                if item[0] == "href":
                    self.href = item[1]

    def handle_endtag(self, tag):
        if tag == "b":
            self.flush()
            self.bold = False
        elif tag == "a":
            self.flush()
            self.href = None

    def handle_data(self, data):
        self.to_out(data)

    def handle_charref(self, ref):
        data = None
        try:
            data = unichr(int(ref))
        except:
            data = "&#"+ref+";"
        self.to_out(data)

    def handle_entityref(self, name):
        if name in htmlentitydefs.name2codepoint:
            codepoint = htmlentitydefs.name2codepoint[name]
            self.to_out(unichr(codepoint))
        else:
            self.to_out("&"+name+";")
