/******************************************************************************
 * mod_uploader / TemplateLexer.cpp
 ******************************************************************************
 * Copyright (C) 2005 Tetsuya Kimata <kimata@acapulco.dyndns.org>
 *
 * All rights reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any
 * damages arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any
 * purpose, including commercial applications, and to alter it and
 * redistribute it freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must
 *    not claim that you wrote the original software. If you use this
 *    software in a product, an acknowledgment in the product
 *    documentation would be appreciated but is not required.
 *
 * 2. Altered source versions must be plainly marked as such, and must
 *    not be misrepresented as being the original software.
 *
 * 3. This notice may not be removed or altered from any source
 *    distribution.
 *
 * $Id: TemplateLexer.cpp 1003 2006-03-14 13:52:47Z svn $
 *****************************************************************************/

#include "TemplateLexer.h"
#include "Auxiliary.h"
#include "Misc.h"

#include "apr_strings.h"
#define APR_WANT_STRFUNC
#define APR_WANT_MEMFUNC
#include "apr_want.h"

#include <stdlib.h>
#include <ctype.h>

#ifdef DEBUG
#include <iostream>
#include <iomanip>
#endif

#ifdef DEBUG_TemplateLexer
#define DUMP_TOKEN dump_token(&tvector_)
#else
#define DUMP_TOKEN
#endif

#define input_match(str) \
    (((h->input+strlen(str)) <= h->input_end) && \
     (strncmp(h->input, str, strlen(str)) == 0))

const char TemplateLexer::CODE_BEGIN_STR[]         = "<!--@";
const char TemplateLexer::CODE_END_STR[]           = "@-->";

const char TemplateLexer::FOREACH_STR[]            = "foreach";
const char TemplateLexer::WHILE_STR[]              = "while";
const char TemplateLexer::IF_STR[]                 = "if";
const char TemplateLexer::ELSE_STR[]               = "else";
const char TemplateLexer::PRINT_STR[]              = "print";
const char TemplateLexer::IN_STR[]                 = "in";

const char TemplateLexer::ASSIGNMENT_CHAR          = '=';
const char TemplateLexer::PLUS_ASSIGNMENT_STR[]    = "+=";
const char TemplateLexer::MINUS_ASSIGNMENT_STR[]   = "-=";

const char TemplateLexer::VARIABLE_CHAR            = '$';

const char TemplateLexer::EQUAL_STR[]              = "==";
const char TemplateLexer::NOTEQUAL_STR[]           = "!=";
const char TemplateLexer::GREATER_THAN_CHAR        = '>';
const char TemplateLexer::LESS_THAN_CHAR           = '<';

const char TemplateLexer::PLUS_CHAR                = '+';
const char TemplateLexer::MINUS_CHAR               = '-';
const char TemplateLexer::RESIDUE_CHAR             = '%';

const char TemplateLexer::QUOTE_CHAR               = '"';
const char TemplateLexer::ESCAPE_CHAR              = '\\';
const char TemplateLexer::BRACE_LEFT_CHAR          = '{';
const char TemplateLexer::BRACE_RIGHT_CHAR         = '}';
const char TemplateLexer::PAREN_LEFT_CHAR          = '(';
const char TemplateLexer::PAREN_RIGHT_CHAR         = ')';
const char TemplateLexer::BRACKET_LEFT_CHAR        = '[';
const char TemplateLexer::BRACKET_RIGHT_CHAR       = ']';

const char TemplateLexer::DOT_CHAR                 = '.';
const char TemplateLexer::COMMA_CHAR               = ',';
const char TemplateLexer::DELIMITER_CHAR           = ';';

const apr_size_t TemplateLexer::TOKEN_POOL_NUM     = TLEXER_TOKEN_POOL_NUM;

#ifdef DEBUG
TemplateLexer::token_type_str TemplateLexer::token_type_list[] = {
    { TemplateLexer::FOREACH,               "foreach"   },
    { TemplateLexer::WHILE,                 "while"     },
    { TemplateLexer::IF,                    "if"        },
    { TemplateLexer::ELSE,                  "else"      },
    { TemplateLexer::PRINT,                 "print"     },
    { TemplateLexer::IN,                    "in"        },
    { TemplateLexer::ASSIGNMENT,            "="         },
    { TemplateLexer::PLUS_ASSIGNMENT,       "+="        },
    { TemplateLexer::MINUS_ASSIGNMENT,      "-="        },
    { TemplateLexer::NOTEQUAL,              "!="        },
    { TemplateLexer::EQUAL,                 "=="        },
    { TemplateLexer::GREATER_THAN,          ">"         },
    { TemplateLexer::LESS_THAN,             "<"         },
    { TemplateLexer::PLUS,                  "+"         },
    { TemplateLexer::MINUS,                 "-"         },
    { TemplateLexer::RESIDUE,               "%"         },
    { TemplateLexer::PLUSPLUS,              "++"        },
    { TemplateLexer::MINUSMINUS,            "--"        },
    { TemplateLexer::BRACE_LEFT,            "{"         },
    { TemplateLexer::BRACE_RIGHT,           "}"         },
    { TemplateLexer::PAREN_LEFT,            "("         },
    { TemplateLexer::PAREN_RIGHT,           ")"         },
    { TemplateLexer::BRACKET_LEFT,          "["         },
    { TemplateLexer::BRACKET_RIGHT,         "]"         },
    { TemplateLexer::DOT,                   "."         },
    { TemplateLexer::COMMA,                 ","         },
    { TemplateLexer::DELIMITER,             ";"         },
    { TemplateLexer::INTEGER,               "integer"   },
    { TemplateLexer::STRING,                "string"    },
    { TemplateLexer::IDENTIFIER,            "identifier"},
};
#endif


/******************************************************************************
 * public ᥽å
 *****************************************************************************/
TemplateLexer::TemplateLexer(apr_pool_t *pool, const char *input, apr_size_t length)
    : pool_(pool),
      handle_(input, length),
      token_pool_(NULL),
      token_pool_end_(NULL),
      is_done_(false)
{
    if (apr_pool_create(&tmp_pool_, pool) != APR_SUCCESS) {
        throw "γݤ˼Ԥޤ";
    }

    tvector_.reserve(TOKEN_POOL_NUM);
}

TemplateLexer::~TemplateLexer()
{
    apr_pool_destroy(tmp_pool_);
}

TemplateLexer::token_vector *TemplateLexer::get_token_list()
{
    exec();

    return &tvector_;
}

TemplateLexer::ident_map *TemplateLexer::get_ident_map()
{
    exec();

    return &imap_;
}

#ifdef DEBUG
void TemplateLexer::dump_token(token_vector::const_iterator start,
                               token_vector::const_iterator end)
{
    token_vector::const_iterator i;

    for (i = start; i != end; ++i) {
        TemplateLexer::print_token(*i);
    }
}

void TemplateLexer::dump_token(token_vector *tvector)
{
    dump_token(tvector->begin(), tvector->end());
}

const char *TemplateLexer::get_token_type(token *token)
{
    for (apr_size_t i = 0; i < sizeof(token_type_list)/sizeof(token_type_str); ++i) {
        if (token->type == token_type_list[i].type) {
            return token_type_list[i].str;
        }
    }

    return NULL;
}

void TemplateLexer::print_token(token *token, ident_map *imap)
{
    cerr << get_token_type(token) << "\t";
    if (token->type == TemplateLexer::IDENTIFIER) {
        if (imap == NULL) {
            cerr << token->i_val;
        } else {
            cerr << imap->at(token->i_val) << '(' << token->i_val << ')';
        }
    } else if (token->type == TemplateLexer::STRING) {
        const char *c = token->s_val;
        while (isspace(*c) && (*c != '\0')) {
            c++;
        }
        cerr << c;
    } else if (token->type == TemplateLexer::INTEGER) {
        cerr << token->i_val;
    }
    cerr << endl;
    cerr << setw(70) << setfill('-') << '-' << endl;
}
#endif


/******************************************************************************
 * private ᥽å
 *****************************************************************************/
void TemplateLexer::exec()
{
    if (is_done_) {
        return;
    }

    token *token;
    while ((token = get_token(&handle_)) != NULL) {
        tvector_.push_back(token);
    }

    is_done_ = true;
}

TemplateLexer::token *TemplateLexer::get_token(handle *h,
                                               bool is_expr_only)
{
    token *token;
    char *s_value;
    apr_size_t i_value;
    const char *start;

    if ((h->input == h->input_start) && !is_expr_only) {
        return read_tmpl_string(h);
    }

    if (h->input == h->input_end) {
        return NULL;
    }

    while (isspace(*(h->input))) {
        if (++(h->input) == h->input_end) {
            DUMP_TOKEN;
            throw "ƥץ졼ȤǽλƤޤ";
        }
    }

    token = create_token();
    switch (*(h->input)) {
    case ASSIGNMENT_CHAR:
        if (((h->input+1) < h->input_end) &&
            (*(h->input+1) == EQUAL_STR[1])) {
            h->input += 2;
            token->type = EQUAL;
        } else {
            token->type = ASSIGNMENT;
            h->input++;
        }
        break;

    case PLUS_CHAR:
        if (((h->input+1) != h->input_end) && (*(h->input+1) == PLUS_CHAR)) {
            h->input += 2;
            token->type = PLUSPLUS;
        } else if (((h->input+1) != h->input_end) &&
                   (*(h->input+1) == PLUS_ASSIGNMENT_STR[1])) {
            h->input += 2;
            token->type = PLUS_ASSIGNMENT;
        } else {
            token->type = PLUS;
            h->input++;
        }
        break;

    case MINUS_CHAR:
        if (((h->input+1) != h->input_end) && (*(h->input+1) == MINUS_CHAR)) {
            h->input += 2;
            token->type = MINUSMINUS;
        } else if (((h->input+1) != h->input_end) &&
                   (*(h->input+1) == MINUS_ASSIGNMENT_STR[1])) {

            token->type = MINUS_ASSIGNMENT;
            h->input += 2;
        } else {
            token->type = MINUS;
            h->input++;
        }
        break;

    case RESIDUE_CHAR:
        token->type = RESIDUE;
        h->input++;
        break;

    case QUOTE_CHAR:
        start = ++h->input;
        while ((h->input != h->input_end) &&
               !((*(h->input) == QUOTE_CHAR) &&
                 (*(h->input-1) != ESCAPE_CHAR))) { // 
            h->input++;
        }
        if (h->input == h->input_end) {
            DUMP_TOKEN;
            throw "ʸǽλƤޤ";
        }
        token->type = STRING;

        APR_PALLOC(token->s_val, char *, pool_, sizeof(char)*(h->input-start+1));

        s_value = const_cast<char *>(token->s_val);
        while (start != h->input) {
            if (*start == ESCAPE_CHAR) {
                start++;
            } else {
                *(s_value++) = *(start++);
            }
        }
        *s_value = '\0';

        h->input++;
        break;
    case GREATER_THAN_CHAR:  token->type = GREATER_THAN;  h->input++; break;
    case LESS_THAN_CHAR:     token->type = LESS_THAN;     h->input++; break;
    case BRACE_LEFT_CHAR:    token->type = BRACE_LEFT;    h->input++; break;
    case BRACE_RIGHT_CHAR:   token->type = BRACE_RIGHT;   h->input++; break;
    case PAREN_LEFT_CHAR:    token->type = PAREN_LEFT;    h->input++; break;
    case PAREN_RIGHT_CHAR:   token->type = PAREN_RIGHT;   h->input++; break;
    case BRACKET_LEFT_CHAR:  token->type = BRACKET_LEFT;  h->input++; break;
    case BRACKET_RIGHT_CHAR: token->type = BRACKET_RIGHT; h->input++; break;
    case DOT_CHAR:           token->type = DOT;           h->input++; break;
    case COMMA_CHAR:         token->type = COMMA;         h->input++; break;
    case DELIMITER_CHAR:     token->type = DELIMITER;     h->input++; break;
    default:
        if (input_match(CODE_END_STR)) {
            h->input += strlen(CODE_END_STR);
            return read_tmpl_string(h);
        } else if (input_match(FOREACH_STR)) {
            h->input += strlen(FOREACH_STR);
            token->type = FOREACH;
        } else if (input_match(WHILE_STR)) {
            h->input += strlen(WHILE_STR);
            token->type = WHILE;
        } else if (input_match(IF_STR)) {
            h->input += strlen(IF_STR);
            token->type = IF;
        } else if (input_match(ELSE_STR)) {
            h->input += strlen(ELSE_STR);
            token->type = ELSE;
        } else if (input_match(PRINT_STR)) {
            h->input += strlen(PRINT_STR);
            token->type = PRINT;
        } else if (input_match(IN_STR)) {
            h->input += strlen(IN_STR);
            token->type = IN;
        } else if (isdigit(*(h->input))) {
            i_value = *(h->input++) - '0';
            while ((h->input != h->input_end) &&
                   (isdigit(*(h->input)))) {
                i_value = i_value*10 + *(h->input);
                h->input++;
            }
            token->type = INTEGER;
            token->i_val = static_cast<int>(i_value);
        } else if (input_match(NOTEQUAL_STR)) {
            h->input += strlen(NOTEQUAL_STR);
            token->type = NOTEQUAL;
        } else if (isalpha(*(h->input))) {
            s_value = const_cast<char *>(h->input++);
            while ((h->input != h->input_end) &&
                   (isalpha(*(h->input)) ||
                    isdigit(*(h->input)) ||
                    (*(h->input) == '_'))) {
                h->input++;
            }
            token->type = IDENTIFIER;
            token->i_val = static_cast<int>(get_imap_id(s_value, h->input-s_value));
        } else {
            DUMP_TOKEN;
            throw "ʸǤ";
        }
        break;
    }

    return token;
}

TemplateLexer::token *TemplateLexer::read_tmpl_string(handle *h)
{
    bool is_code_begin;
    const char *str_end;
    const char *var_start;
    const char *var_end;
    token *token;

    str_end = static_cast<char *>(memmem(h->input, h->input_end-h->input,
                                         CODE_BEGIN_STR, strlen(CODE_BEGIN_STR)));

    if (str_end == NULL) {
        is_code_begin = false;
        str_end = h->input_end;
    } else {
        is_code_begin = true;
    }

    token = create_token(PRINT);
    tvector_.push_back(token);

    while ((h->input != str_end) &&
           ((var_start = strnchr(h->input, str_end-h->input,
                                 VARIABLE_CHAR)) != NULL) &&
           !((var_start == h->input_start) &&
             *(var_start-1) == ESCAPE_CHAR)) {
        if ((var_start+1) == str_end) {
            DUMP_TOKEN;
            throw "ѿǽλƤޤ";
        }

        token = create_token(STRING);
        token->s_val = apr_pstrndup(pool_, h->input, var_start-h->input);
        tvector_.push_back(token);

        token = create_token(COMMA);
        tvector_.push_back(token);

        h->input = ++var_start;

        if (*(h->input) == BRACE_LEFT_CHAR) {   //  ${ ... }׷
            if ((var_end = strnchr(h->input, str_end-h->input,
                                   BRACE_RIGHT_CHAR)) == NULL) {
                DUMP_TOKEN;
                throw "ѿŸǤ";
            }

            handle expr_handle(h->input + 1, var_end-h->input-1);
            while ((token = get_token(&expr_handle, true)) != NULL) {
                tvector_.push_back(token);
            }
            h->input = var_end + 1;
        } else {                                    // $...׷
            if (!isalpha(*(h->input))) {
                DUMP_TOKEN;
                throw "ѿ̾Ǥ";
            }
            h->input++;
            while ((h->input != h->input_end) &&
                   (isalpha(*(h->input)) ||
                    isdigit(*(h->input)) ||
                    (*(h->input) == '_'))) {
                h->input++;
            }

            token = create_token(IDENTIFIER);
            token->i_val = static_cast<int>(get_imap_id(var_start, h->input-var_start));

            tvector_.push_back(token);
        }

        if (h->input == str_end) {
            goto STR_END;
        }

        token = create_token(COMMA);
        tvector_.push_back(token);
    }

    token = create_token(STRING);
    token->s_val = apr_pstrndup(pool_, h->input, str_end-h->input);
    tvector_.push_back(token);

 STR_END:
    token = create_token(DELIMITER);

    if (is_code_begin) {
        h->input = str_end + strlen(CODE_BEGIN_STR);
    } else {
        h->input = str_end;
    }

    return token;
}

apr_size_t TemplateLexer::get_imap_id(const char * const start, apr_size_t length)
{
    apr_size_t id;

    for (id = 0; id < imap_.size(); id++) {
        if (strncmp(imap_.at(id), start, length) == 0) {
            break;
        }
    }

    if (id == imap_.size()) {
        imap_.push_back(static_cast<const char *>(apr_pstrndup(pool_, start,
                                                                    length)));
    }

    return id;
}

TemplateLexer::token *TemplateLexer::create_token(token_type type)
{
    if (token_pool_ == token_pool_end_) {
        APR_PALLOC(token_pool_, token *, tmp_pool_, sizeof(token)*TOKEN_POOL_NUM);

        token_pool_end_ = token_pool_ + TOKEN_POOL_NUM;
    }

    token *token = token_pool_++;
    token->type = type;

    return token;
}


/******************************************************************************
 * ƥ
 *****************************************************************************/
#ifdef DEBUG_TemplateLexer
#include "apr_general.h"
#include "apr_file_io.h"
#include "apr_mmap.h"

#include <iomanip>

void usage(const char *prog_name)
{
    cerr << "Usage: " << prog_name << " <INPUT>" << endl;
}

int main(int argc, const char * const *argv)
{
    apr_pool_t *pool;
    apr_file_t *fd;
    apr_mmap_t *file_map;
    apr_finfo_t info;
    TemplateLexer::token_vector *tvector;
    TemplateLexer::ident_map *imap;

    apr_app_initialize(&argc, &argv, NULL);
    apr_pool_create(&pool, NULL);

    try {
        if (argc != 2) {
            throw "ѤΥե뤬ꤵƤޤ";
        }

        if (apr_file_open(&fd, argv[1], APR_READ,
                          APR_OS_DEFAULT, pool) != APR_SUCCESS) {
            throw "ե open ˼Ԥޤ";
        }

        if (apr_file_info_get(&info, APR_FINFO_SIZE, fd) != APR_SUCCESS) {
            throw "ե륵ǤޤǤ";
        }

        if (apr_mmap_create(&file_map, fd, 0,
                            static_cast<apr_size_t>(info.size), APR_MMAP_READ,
                            pool) != APR_SUCCESS) {
            throw "ե mmap ˼Ԥޤ";
        }

        TemplateLexer lexer(pool,
                            static_cast<const char *>(file_map->mm),
                            static_cast<apr_size_t>(info.size));

        tvector = lexer.get_token_list();
        imap = lexer.get_ident_map();

        TemplateLexer::token_vector::iterator i;
        for (i = tvector->begin(); i != tvector->end(); ++i) {
            TemplateLexer::print_token(*i, imap);
        }

        if (apr_mmap_delete(file_map) != APR_SUCCESS) {
            throw "ե munmap ˼Ԥޤ";
        }

        if (apr_file_close(fd) != APR_SUCCESS) {
            throw "ե close ˼Ԥޤ";
        }
    } catch(const char *message) {
        cerr << "Error: " << message << endl;
        usage(argv[0]);

        return EXIT_FAILURE;
    }

    apr_terminate();

    return EXIT_SUCCESS;
}

#endif

// Local Variables:
// mode: c++
// buffer-file-coding-system: euc-japan-dos
// End:
