/* 
 * Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
 *
 * This file is part of Rast.
 * See the file COPYING for redistribution information.
 *
 */

#include <stdlib.h>
#include <ctype.h>

#include <apr_strings.h>

#include "rast/config.h"
#include "rast/query.h"
#include "rast/string.h"

static rast_error_t *
default_query_optimize(rast_query_t *query, rast_query_t **optimized_query,
                       apr_pool_t *pool)
{
    *optimized_query = query;
    return RAST_OK;
}

typedef struct {
    rast_query_t base;
} null_query_t;

static char *
null_query_inspect(rast_query_t *base, apr_pool_t *pool)
{
    return "(null)";
}

static rast_error_t *
null_query_exec(rast_query_t *query,
                rast_local_db_t *db, rast_query_option_t *options,
                rast_query_result_t **result, apr_pool_t *pool)
{
    return rast_error(RAST_ERROR_EMPTY_QUERY, NULL);
}

static rast_query_type_t null_query_type = {
    null_query_inspect,
    null_query_exec,
    default_query_optimize,
};

static rast_query_t *
null_query_create(apr_pool_t *pool)
{
    null_query_t *query;

    query = (null_query_t *) apr_palloc(pool, sizeof(null_query_t));
    query->base.type = &null_query_type;
    return (rast_query_t *) query;
}

int
rast_query_is_null(rast_query_t *query)
{
    return query->type == &null_query_type;
}

typedef struct {
    rast_query_t base;
    const char *term;
} term_query_t;

static char *
term_query_inspect(rast_query_t *base, apr_pool_t *pool)
{
    term_query_t *query = (term_query_t *) base;

    return apr_psprintf(pool, "\"%s\"", query->term);
}

static rast_error_t *
term_query_exec(rast_query_t *base,
                rast_local_db_t *db, rast_query_option_t *options,
                rast_query_result_t **result, apr_pool_t *pool)
{
    term_query_t *query = (term_query_t *) base;
    int need_tf = options->score_method != RAST_SCORE_METHOD_NONE;

    return rast_text_index_search(db->text_index, query->term,
                                  need_tf, result, pool);
}

static rast_query_type_t term_query_type = {
    term_query_inspect,
    term_query_exec,
    default_query_optimize,
};

static rast_query_t *
term_query_create(apr_pool_t *pool, const char *term)
{
    term_query_t *query;

    query = (term_query_t *) apr_palloc(pool, sizeof(term_query_t));
    query->base.type = &term_query_type;
    query->term = term;
    return (rast_query_t *) query;
}

typedef struct {
    rast_query_t base;
    const char *name;
    const char *value;
} property_query_t;

static char *
property_query_inspect(rast_query_t *base, const char *op, apr_pool_t *pool)
{
    property_query_t *query = (property_query_t *) base;

    return apr_psprintf(pool, "(%s%s \"%s\")", query->name, op, query->value);
}

static rast_query_t *
property_query_create(apr_pool_t *pool, rast_query_type_t *type,
                      const char *name, const char *value)
{
    property_query_t *query;

    query = (property_query_t *) apr_palloc(pool, sizeof(property_query_t));
    query->base.type = type;
    query->name = name;
    query->value = value;
    return (rast_query_t *) query;
}

static char *
property_pe_query_inspect(rast_query_t *base, apr_pool_t *pool)
{
    return property_query_inspect(base, ":", pool);
}

static rast_error_t *
get_property(rast_local_db_t *db, const char *name,
             rast_property_t **property,
             rast_property_index_t **property_index)
{
    int i;

    for (i = 0; i < db->num_properties; i++) {
        if (strcmp(db->properties[i].name, name) == 0) {
            *property = db->properties + i;
            *property_index = db->property_indices + i;
            return RAST_OK;
        }
    }
    return rast_error(RAST_ERROR_INVALID_QUERY, "unknown property: %s", name);
}

static rast_error_t *
property_pe_query_exec(rast_query_t *base,
                       rast_local_db_t *db, rast_query_option_t *options,
                       rast_query_result_t **result, apr_pool_t *pool)
{
    property_query_t *query = (property_query_t *) base;
    rast_property_t *property;
    rast_property_index_t *property_index;
    rast_error_t *error;

    error = get_property(db, query->name, &property, &property_index);
    if (error != RAST_OK) {
        return error;
    }
    if (!(property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH)) {
        return rast_error(RAST_ERROR_INVALID_QUERY,
                          ": not supported for %s", query->name);
    }
    return rast_text_index_search(property_index->text, query->value, 0,
                                  result, pool);
}

static rast_query_type_t property_pe_query_type = {
    property_pe_query_inspect,
    property_pe_query_exec,
    default_query_optimize,
};

rast_query_result_t *
rast_query_result_create(apr_pool_t *pool)
{
    rast_query_result_t *result;

    result = (rast_query_result_t *) apr_palloc(pool,
                                                sizeof(rast_query_result_t));
    APR_RING_INIT(&result->terms, rast_term_t, link);
    APR_RING_INIT(&result->candidates, rast_candidate_t, link);
    return result;
}

typedef struct {
    rast_query_t base;
    const char *name;
    const char *min_value;
    const char *max_value;
    int include_min;
    int include_max;
} property_range_query_t;

static char
format_bool_value(int n)
{
    return n ? 't' : 'f';
}

static const char *
format_string_value(apr_pool_t *pool, const char *s)
{
    return s == NULL ? "nil" : apr_psprintf(pool, "\"%s\"", s);
}

static char *
property_range_query_inspect(rast_query_t *base, apr_pool_t *pool)
{
    property_range_query_t *query = (property_range_query_t *) base;

    return apr_psprintf(pool, "(range \"%s\" %s %c %s %c)", query->name,
                        format_string_value(pool, query->min_value),
                        format_bool_value(query->include_min),
                        format_string_value(pool, query->max_value),
                        format_bool_value(query->include_max));
}

typedef struct {
    apr_pool_t *pool;
    rast_local_db_t *db;
    property_range_query_t *query;
    rast_type_e property_type;
    DB *inv_db;
    DBT db_key;
    DBT db_value;
    DBT min_key;
    DBT max_key;
    DBC *cursor;
    int found;
} range_cursor_t;

static rast_error_t *
pack_property_key(apr_pool_t *pool, DBT *key,
                  rast_type_e type, const char *value, int is_native)
{
    char *invalid;
    rast_uint_t n, *fixed_number;

    if (value == NULL) {
        return RAST_OK;
    }
    memset(key, 0, sizeof(DBT));
    switch (type) {
    case RAST_TYPE_STRING:
    case RAST_TYPE_DATE:
    case RAST_TYPE_DATETIME:
        key->data = (void *) value;
        key->size = strlen(value);
        break;
    case RAST_TYPE_UINT:
        n = strtol(value, &invalid, 10);
        if (*invalid != '\0') {
            return rast_error(RAST_ERROR_INVALID_QUERY,
                              "invalid number: %s", value);
        }
        fixed_number = (rast_uint_t *) apr_palloc(pool, sizeof(rast_uint_t));
        *fixed_number = rast_fix_byte_order(n, is_native);
        key->data = fixed_number;
        key->size = sizeof(rast_uint_t);
        break;
    }
    return RAST_OK;
}

static rast_error_t *
range_cursor_create(apr_pool_t *pool,
                    rast_local_db_t *db, property_range_query_t *query,
                    range_cursor_t **result)
{
    int dberr;
    rast_error_t *error;
    range_cursor_t *cursor;
    rast_property_t *property;
    rast_property_index_t *property_index;

    cursor = (range_cursor_t *) apr_palloc(pool, sizeof(range_cursor_t));

    cursor->pool = pool;
    cursor->db = db;
    cursor->query = query;
    error = get_property(db, query->name, &property, &property_index);
    if (error != RAST_OK) {
        return error;
    }
    if (!(property->flags & RAST_PROPERTY_FLAG_SEARCH)) {
        return rast_error(RAST_ERROR_INVALID_QUERY,
                          ": not supported for `%s'", query->name);
    }
    cursor->inv_db = property_index->inv;
    memset(&cursor->db_key, 0, sizeof(DBT));
    memset(&cursor->db_value, 0, sizeof(DBT));
    error = pack_property_key(pool, &cursor->min_key,
                              property->type, query->min_value, db->is_native);
    if (error != RAST_OK) {
        return error;
    }
    error = pack_property_key(pool, &cursor->max_key,
                              property->type, query->max_value, db->is_native);
    if (error != RAST_OK) {
        return error;
    }
    cursor->property_type = property->type;
    dberr = property_index->inv->cursor(property_index->inv, db->bdb_txn,
                                        &cursor->cursor, 0);
    if (dberr != 0) {
        return db_error_to_rast_error(dberr);
    }
    cursor->found = 0;
    *result = cursor;
    return RAST_OK;
}

static void
range_cursor_destroy(range_cursor_t *cursor)
{
    cursor->cursor->c_close(cursor->cursor);
}

static rast_error_t *
check_db_result(int dberr, int *found)
{
    if (dberr != 0 && dberr != DB_NOTFOUND) {
        return db_error_to_rast_error(dberr);
    }
    *found = (dberr != DB_NOTFOUND);
    return RAST_OK;
}

static rast_error_t *
range_cursor_skip_min_value(range_cursor_t *cursor)
{
    int dberr;

    if (cursor->query->min_value != NULL && !cursor->query->include_min) {
        while (rast_compare_keys(cursor->property_type, cursor->inv_db,
                                 &cursor->db_key, &cursor->min_key) == 0) {
            dberr = cursor->cursor->c_get(cursor->cursor,
                                          &cursor->db_key,
                                          &cursor->db_value, DB_NEXT);
            if (dberr != 0) {
                return check_db_result(dberr, &cursor->found);
            }
        }
    }
    return RAST_OK;
}

static rast_error_t *
range_cursor_get_first(range_cursor_t *cursor)
{
    int dberr;
    rast_error_t *error;

    if (cursor->query->min_value == NULL) {
        dberr = cursor->cursor->c_get(cursor->cursor,
                                      &cursor->db_key,
                                      &cursor->db_value, DB_FIRST);
    }
    else {
        cursor->db_key.data = cursor->min_key.data;
        cursor->db_key.size = cursor->min_key.size;
        dberr = cursor->cursor->c_get(cursor->cursor,
                                      &cursor->db_key, &cursor->db_value,
                                      DB_SET_RANGE);
    }
    error = check_db_result(dberr, &cursor->found);
    if (error != RAST_OK) {
        return error;
    }
    if (!cursor->found) {
        return RAST_OK;
    }
    return range_cursor_skip_min_value(cursor);
}

static rast_error_t *
range_cursor_get_next(range_cursor_t *cursor)
{
    int dberr;

    dberr = cursor->cursor->c_get(cursor->cursor,
                                  &cursor->db_key, &cursor->db_value, DB_NEXT);
    return check_db_result(dberr, &cursor->found);
}

static int
range_cursor_is_done(range_cursor_t *cursor)
{
    int n;

    if (!cursor->found) {
        return 1;
    }
    if (cursor->query->max_value == NULL) {
        return 0;
    }

    n = rast_compare_keys(cursor->property_type, cursor->inv_db,
                          &cursor->db_key, &cursor->max_key);
    if (cursor->query->include_max) {
        return n > 0;
    }
    else {
        return n >= 0;
    }
}

static void
range_query_add_candidate(apr_pool_t *pool, apr_array_header_t *candidates,
                          range_cursor_t *cursor)
{
    rast_candidate_t *candidate;

    candidate =
        (rast_candidate_t *) apr_palloc(pool, sizeof(rast_candidate_t));
    candidate->doc_id =
        rast_fix_byte_order(*(rast_size_t *) cursor->db_value.data,
                            cursor->db->is_native);
    APR_RING_INIT(&candidate->terms, rast_term_frequency_t, link);
    *(rast_candidate_t **) apr_array_push(candidates) = candidate;
}

static int
compare_candidates(const void *v1, const void *v2)
{
    const rast_candidate_t *c1 = *(const rast_candidate_t **) v1;
    const rast_candidate_t *c2 = *(const rast_candidate_t **) v2;
    return c1->doc_id - c2->doc_id;
}

static rast_error_t *
range_query_result_create(apr_pool_t *pool, apr_array_header_t *candidates,
                          rast_query_result_t **result)
{
    int i;

    qsort(candidates->elts, candidates->nelts, sizeof(rast_candidate_t *),
          compare_candidates);
    *result = rast_query_result_create(pool);
    for (i = 0; i < candidates->nelts; i++) {
        APR_RING_INSERT_TAIL(&(*result)->candidates,
                             ((rast_candidate_t **) candidates->elts)[i],
                             rast_candidate_t, link);
    }
    return RAST_OK;
}

static rast_error_t *
property_range_query_exec(rast_query_t *base,
                          rast_local_db_t *db, rast_query_option_t *options,
                          rast_query_result_t **result, apr_pool_t *pool)
{
    property_range_query_t *query = (property_range_query_t *) base;
    range_cursor_t *cursor;
    apr_pool_t *sub_pool;
    rast_error_t *error;
    apr_array_header_t *candidates;

    apr_pool_create(&sub_pool, pool);
    error = range_cursor_create(sub_pool, db, query, &cursor);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }
    error = range_cursor_get_first(cursor);
    if (error != RAST_OK) {
        return error;
    }
    candidates = apr_array_make(sub_pool, 10, sizeof(rast_candidate_t *));
    while (!range_cursor_is_done(cursor)) {
        range_query_add_candidate(pool, candidates, cursor);
        error = range_cursor_get_next(cursor);
        if (error != RAST_OK) {
            range_cursor_destroy(cursor);
            apr_pool_destroy(sub_pool);
            return error;
        }
    }
    error = range_query_result_create(pool, candidates, result);
    range_cursor_destroy(cursor);
    apr_pool_destroy(sub_pool);
    return error;
}

static rast_query_type_t property_range_query_type = {
    property_range_query_inspect,
    property_range_query_exec,
    default_query_optimize,
};

static rast_query_t *
property_range_query_create(apr_pool_t *pool, const char *name,
                            const char *min_value, int include_min,
                            const char *max_value, int include_max)
{
    property_range_query_t *query;

    query = (property_range_query_t *)
        apr_palloc(pool, sizeof(property_range_query_t));
    query->base.type = &property_range_query_type;
    query->name = name;
    query->min_value = min_value;
    query->include_min = include_min;
    query->max_value = max_value;
    query->include_max = include_max;
    return (rast_query_t *) query;
}

typedef struct {
    rast_query_type_t base;
    const char *operator;
    rast_candidate_t *(*merge_candidates)(rast_candidate_t *c1,
                                          rast_candidate_t *c2);
    void (*process_rest_candidates)(rast_candidate_ring_t *candidates1,
                                    rast_query_result_t *r1,
                                    rast_candidate_t *c1,
                                    rast_query_result_t *r2,
                                    rast_candidate_t *c2);
} composite_query_type_t;

typedef struct {
    rast_query_t base;
    rast_query_ring_t operands;
} composite_query_t;

static rast_error_t *composite_query_exec(rast_query_t *base,
                                          rast_local_db_t *db,
                                          rast_query_option_t *options,
                                          rast_query_result_t **result,
                                          apr_pool_t *pool);
static char *composite_query_inspect(rast_query_t *base, apr_pool_t *pool);

static rast_candidate_t *
and_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2)
{
    if (c2->doc_id > c1->doc_id) {
        APR_RING_REMOVE(c1, link);
        return NULL;
    }
    if (c1->doc_id == c2->doc_id) {
        APR_RING_CONCAT(&c1->terms, &c2->terms, rast_term_frequency_t, link);
        c2 = APR_RING_NEXT(c2, link);
        return NULL;
    }
    return APR_RING_NEXT(c2, link);
}

static void
and_query_process_rest_candidates(rast_candidate_ring_t *candidates1,
                                  rast_query_result_t *r1,
                                  rast_candidate_t *c1,
                                  rast_query_result_t *r2,
                                  rast_candidate_t *c2)
{
    if (c1 != APR_RING_SENTINEL(candidates1, rast_candidate_t, link)) {
        rast_candidate_t *last_c = APR_RING_LAST(candidates1);
        APR_RING_UNSPLICE(c1, last_c, link);
    }
}

static void
merge_property_range_queries(property_range_query_t *rq,
                             property_range_query_t *rq2)
{
    if (rq->min_value == NULL) {
        rq->min_value = rq2->min_value;
        rq->include_min = rq2->include_min;
    }
    if (rq->max_value == NULL) {
        rq->max_value = rq2->max_value;
        rq->include_max = rq2->include_max;
    }
}

static rast_error_t *
and_query_optimize(rast_query_t *base, rast_query_t **optimized_query,
                   apr_pool_t *pool)
{
    composite_query_t *query = (composite_query_t *) base;
    rast_query_t *q, *q2, *next_q2;
    property_range_query_t *rq, *rq2;

    q = APR_RING_FIRST(&query->operands);
    while (q != APR_RING_SENTINEL(&query->operands, rast_query_t, link)) {
        if (q->type == &property_range_query_type) {
            rq = (property_range_query_t *) q;
            q2 = APR_RING_NEXT(q, link);
            if (q2 != APR_RING_SENTINEL(&query->operands, rast_query_t,
                                        link) &&
                q2->type == &property_range_query_type) {
                rq2 = (property_range_query_t *) q2;
                if (strcmp(rq->name, rq2->name) != 0) {
                    break;
                }
                merge_property_range_queries(rq, rq2);
                next_q2 = APR_RING_NEXT(q2, link);
                APR_RING_REMOVE(q2, link);
                q2 = next_q2;
            }
        }
        q = APR_RING_NEXT(q, link);
    }
    q = APR_RING_FIRST(&query->operands);
    if (APR_RING_NEXT(q, link) ==
        APR_RING_SENTINEL(&query->operands, rast_query_t, link)) {
        *optimized_query = q;
    }
    else {
        *optimized_query = base;
    }
    return RAST_OK;
}

static composite_query_type_t and_query_type = {
    {
        composite_query_inspect,
        composite_query_exec,
        and_query_optimize,
    },
    "&",
    and_query_merge_candidates,
    and_query_process_rest_candidates,
};

static rast_candidate_t *
or_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2)
{
    rast_candidate_t *next;
    if (c1->doc_id == c2->doc_id) {
        APR_RING_CONCAT(&c1->terms, &c2->terms, rast_term_frequency_t, link);
        c2 = APR_RING_NEXT(c2, link);
        return c2;
    }
    if (c2->doc_id >= c1->doc_id) {
        return NULL;
    }
    next = APR_RING_NEXT(c2, link);
    APR_RING_INSERT_BEFORE(c1, c2, link);
    return next;
}

static void
or_query_process_rest_candidates(rast_candidate_ring_t *candidates1,
                                 rast_query_result_t *r1, rast_candidate_t *c1,
                                 rast_query_result_t *r2, rast_candidate_t *c2)
{
    if (c2 != APR_RING_SENTINEL(&r2->candidates, rast_candidate_t, link)) {
        APR_RING_SPLICE_BEFORE(APR_RING_SENTINEL(&r1->candidates,
                                                 rast_candidate_t, link),
                               c2, APR_RING_LAST(&r2->candidates), link);
    }
}

static composite_query_type_t or_query_type = {
    {
        composite_query_inspect,
        composite_query_exec,
        default_query_optimize,
    },
    "|",
    or_query_merge_candidates,
    or_query_process_rest_candidates
};

static rast_candidate_t *
not_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2)
{
    if (c2->doc_id >= c1->doc_id) {
        if (c2->doc_id == c1->doc_id) {
            APR_RING_REMOVE(c1, link);
        }
        return NULL;
    }
    else {
        return APR_RING_NEXT(c2, link);
    }
}

static void
not_query_process_rest_candidates(rast_candidate_ring_t *candidates1,
                                  rast_query_result_t *r1,
                                  rast_candidate_t *c1,
                                  rast_query_result_t *r2,
                                  rast_candidate_t *c2)
{
}

static composite_query_type_t not_query_type = {
    {
        composite_query_inspect,
        composite_query_exec,
        default_query_optimize,
    },
    "!",
    not_query_merge_candidates,
    not_query_process_rest_candidates
};

static char *
composite_query_inspect(rast_query_t *base, apr_pool_t *pool)
{
    composite_query_type_t *query_type = (composite_query_type_t *) base->type;
    composite_query_t *query = (composite_query_t *) base;
    char *s;
    rast_query_t *q;

    s = apr_pstrcat(pool, "(", query_type->operator, NULL);
    for (q = APR_RING_FIRST(&query->operands);
         q != APR_RING_SENTINEL(&query->operands, rast_query_t, link);
         q = APR_RING_NEXT(q, link)) {
        s = apr_pstrcat(pool, s, " ", rast_query_inspect(q, pool), NULL);
    }
    s = apr_pstrcat(pool, s, ")", NULL);
    return s;
}

static rast_error_t *
composite_query_exec(rast_query_t *base,
                     rast_local_db_t *db, rast_query_option_t *options,
                     rast_query_result_t **result, apr_pool_t *pool)
{
    composite_query_type_t *query_type = (composite_query_type_t *) base->type;
    composite_query_t *query = (composite_query_t *) base;
    rast_query_t *q;
    rast_query_result_t *r, *r2;
    rast_candidate_t *c, *c2;
    rast_score_method_e save = options->score_method;
    rast_error_t *error;

    q = APR_RING_FIRST(&query->operands);
    if ((error = q->type->exec(q, db, options, &r, pool)) != RAST_OK) {
        return error;
    }

    if (query_type == &not_query_type) {
        options->score_method = RAST_SCORE_METHOD_NONE;
    }
    q = APR_RING_NEXT(q, link);
    do {
        if ((error = q->type->exec(q, db, options, &r2, pool)) != RAST_OK) {
            return error;
        }
        APR_RING_CONCAT(&r->terms, &r2->terms, rast_term_t, link);
        c = APR_RING_FIRST(&r->candidates);
        c2 = APR_RING_FIRST(&r2->candidates);
        while (c != APR_RING_SENTINEL(&r->candidates,
                                      rast_candidate_t, link)) {
            rast_candidate_t *next_c = APR_RING_NEXT(c, link);
            while (c2 != APR_RING_SENTINEL(&r2->candidates, rast_candidate_t,
                                           link)) {
                rast_candidate_t *next_c2;
                next_c2 = query_type->merge_candidates(c, c2);
                if (next_c2 == NULL) {
                    break;
                }
                c2 = next_c2;
            }
            if (c2 == APR_RING_SENTINEL(&r2->candidates, rast_candidate_t,
                                        link)) {
                break;
            }
            c = next_c;
        }
        query_type->process_rest_candidates(&r->candidates, r, c, r2, c2);
        q = APR_RING_NEXT(q, link);
    } while (q != APR_RING_SENTINEL(&query->operands, rast_query_t, link));

    if (query_type == &not_query_type) {
        options->score_method = save;
    }

    *result = r;
    return RAST_OK;
}

static void
composite_query_add_operand(composite_query_t *query, rast_query_t *operand)
{
    APR_RING_INSERT_TAIL(&query->operands, operand, rast_query_t, link);
}

static rast_query_t *
composite_query_create(apr_pool_t *pool, composite_query_type_t *type,
                       rast_query_t *q1, rast_query_t *q2)
{
    composite_query_t *query;

    query = (composite_query_t *) apr_palloc(pool, sizeof(composite_query_t));
    query->base.type = (rast_query_type_t *) type;
    APR_RING_INIT(&query->operands, rast_query_t, link);
    composite_query_add_operand(query, q1);
    composite_query_add_operand(query, q2);
    return (rast_query_t *) query;
}

char *
rast_query_inspect(rast_query_t *query, apr_pool_t *pool)
{
    return query->type->inspect(query, pool);
}

rast_error_t *
rast_query_exec(rast_query_t *query,
                rast_local_db_t *db, rast_query_option_t *options,
                rast_query_result_t **result,
                apr_pool_t *pool)
{
    return query->type->exec(query, db, options, result, pool);
}

rast_error_t *
rast_query_optimize(rast_query_t *query, rast_query_t **optimized_query,
                    apr_pool_t *pool)
{
    return query->type->optimize(query, optimized_query, pool);
}

typedef enum {
    TOKEN_NONE,
    TOKEN_ERROR,
    TOKEN_EOF,
    TOKEN_TERM,
    TOKEN_AND,
    TOKEN_OR,
    TOKEN_NOT,
    TOKEN_LPAREN,
    TOKEN_RPAREN,
    TOKEN_COLON,
    TOKEN_EQ,
    TOKEN_LT,
    TOKEN_GT,
    TOKEN_LE,
    TOKEN_GE,
} token_e;

static const char *
token_name(token_e token)
{
    switch (token) {
    case TOKEN_NONE:
        return "NONE";
    case TOKEN_ERROR:
        return "ERROR";
    case TOKEN_EOF:
        return "EOF";
    case TOKEN_TERM:
        return "TERM";
    case TOKEN_AND:
        return "AND";
    case TOKEN_OR:
        return "OR";
    case TOKEN_NOT:
        return "NOT";
    case TOKEN_LPAREN:
        return "LPAREN";
    case TOKEN_RPAREN:
        return "RPAREN";
    case TOKEN_COLON:
        return "COLON";
    case TOKEN_EQ:
        return "EQ";
    case TOKEN_LT:
        return "LT";
    case TOKEN_GT:
        return "GT";
    case TOKEN_LE:
        return "LE";
    case TOKEN_GE:
        return "GE";
    default:
        return "UNKNOWN";
    }
}

typedef struct {
    apr_pool_t *pool;
    rast_tokenizer_t *tokenizer;
    const char *value;
    rast_error_t *error;
} lexer_t;

static lexer_t *
lexer_create(apr_pool_t *pool, apr_pool_t *result_pool,
             rast_encoding_module_t *encoding_module,
             const char *s)
{
    lexer_t *lexer;

    lexer = (lexer_t *) apr_palloc(pool, sizeof(lexer_t));
    lexer->pool = result_pool;
    lexer->tokenizer =
        rast_char_tokenizer_create(pool, encoding_module, s, strlen(s));
    lexer->value = NULL;
    lexer->error = RAST_OK;
    return lexer;
}

static token_e
get_quoted_term(lexer_t *lexer, rast_char_t *ch)
{
    rast_string_t *string;

    string = rast_string_create(lexer->pool, "", 0, 8);
    while (1) {
        if (rast_char_tokenizer_is_done(lexer->tokenizer)) {
            lexer->error = rast_error(RAST_ERROR_INVALID_QUERY,
                                      "unterminated string");
            return TOKEN_ERROR;
        }
        rast_char_tokenizer_get_current(lexer->tokenizer, ch);
        switch (*ch->ptr) {
        case '"':
            rast_char_tokenizer_next(lexer->tokenizer);
            lexer->value = string->ptr;
            return TOKEN_TERM;
        case '\\':
            rast_char_tokenizer_next(lexer->tokenizer);
            if (rast_char_tokenizer_is_done(lexer->tokenizer)) {
                lexer->error = rast_error(RAST_ERROR_INVALID_QUERY,
                                          "unterminated string");
                return TOKEN_ERROR;
            }
            rast_char_tokenizer_get_current(lexer->tokenizer, ch);
            rast_string_append(string, ch->ptr, ch->nbytes);
            break;
        default:
            rast_string_append(string, ch->ptr, ch->nbytes);
            break;
        }
        rast_char_tokenizer_next(lexer->tokenizer);
    }
}

static token_e
get_term(lexer_t *lexer, rast_char_t *ch)
{
    const char *start;

    start = ch->ptr;
    while (1) {
        if (rast_char_tokenizer_is_done(lexer->tokenizer)) {
            ch->ptr += ch->nbytes;
            break;
        }
        rast_char_tokenizer_get_current(lexer->tokenizer, ch);
        if (rast_char_is_space(ch) || *ch->ptr == ')') {
            goto end;
        }
        rast_char_tokenizer_next(lexer->tokenizer);
    }
end:
    lexer->value = apr_pstrndup(lexer->pool, start, ch->ptr - start);
    return TOKEN_TERM;
}

static token_e
lexer_get_token(lexer_t *lexer)
{
    rast_char_t ch;

    while (!rast_char_tokenizer_is_done(lexer->tokenizer)) {
        rast_char_tokenizer_get_current(lexer->tokenizer, &ch);

        if (!rast_char_is_space(&ch)) {
            break;
        }
        rast_char_tokenizer_next(lexer->tokenizer);
    }
    if (rast_char_tokenizer_is_done(lexer->tokenizer)) {
        return TOKEN_EOF;
    }
    rast_char_tokenizer_get_current(lexer->tokenizer, &ch);
    switch (*ch.ptr) {
    case '&':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_AND;
    case '|':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_OR;
    case '!':
    case '-':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_NOT;
    case '(':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_LPAREN;
    case ')':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_RPAREN;
    case ':':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_COLON;
    case '=':
        rast_char_tokenizer_next(lexer->tokenizer);
        return TOKEN_EQ;
    case '<':
        rast_char_tokenizer_next(lexer->tokenizer);
        if (!rast_char_tokenizer_is_done(lexer->tokenizer)) {
            rast_char_tokenizer_get_current(lexer->tokenizer, &ch);
            if (*ch.ptr == '=') {
                rast_char_tokenizer_next(lexer->tokenizer);
                return TOKEN_LE;
            }
        }
        return TOKEN_LT;
    case '>':
        rast_char_tokenizer_next(lexer->tokenizer);
        if (!rast_char_tokenizer_is_done(lexer->tokenizer)) {
            rast_char_tokenizer_get_current(lexer->tokenizer, &ch);
            if (*ch.ptr == '=') {
                rast_char_tokenizer_next(lexer->tokenizer);
                return TOKEN_GE;
            }
        }
        return TOKEN_GT;
    case '"':
        rast_char_tokenizer_next(lexer->tokenizer);
        return get_quoted_term(lexer, &ch);
    default:
        return get_term(lexer, &ch);
    }
}

typedef struct {
    rast_encoding_module_t *encoding_module;
    const char *query_string;
    lexer_t *lexer;
    token_e token;
} parser_t;

static token_e
lookahead(parser_t *parser)
{
    if (parser->token == TOKEN_NONE) {
        parser->token = lexer_get_token(parser->lexer);
    }
    return parser->token;
}

static token_e
shift_token(parser_t *parser)
{
    token_e token;

    token = lookahead(parser);
    parser->token = TOKEN_NONE;
    return token;
}

static rast_error_t *
token_error(parser_t *parser, token_e token, token_e expected)
{
    if (token == TOKEN_ERROR) {
        return parser->lexer->error;
    }
    if (expected == TOKEN_NONE) {
        return rast_error(RAST_ERROR_INVALID_QUERY,
                          "unexpected token %s: `%s'",
                          token_name(token),
                          parser->query_string);
    }
    else {
        return rast_error(RAST_ERROR_INVALID_QUERY,
                          "unexpected token %s (expected %s): `%s'",
                          token_name(token),
                          token_name(expected),
                          parser->query_string);
    }
}

static rast_error_t *
match_token(parser_t *parser, token_e expected, apr_pool_t *pool)
{
    token_e token;

    token = shift_token(parser);
    if (token != expected) {
        return token_error(parser, token, expected);
    }
    return RAST_OK;
}

static rast_error_t *
parse_property_pe_query(parser_t *parser, const char *name,
                        rast_query_t **result,
                        rast_query_type_t *type, apr_pool_t *pool)
{
    const char *value;
    rast_error_t *error;

    shift_token(parser);
    if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) {
        return error;
    }
    value = parser->lexer->value;
    value = rast_normalize_text(parser->encoding_module,
                                value, strlen(value), NULL, pool);
    *result = property_query_create(pool, &property_pe_query_type,
                                    name, value);
    return RAST_OK;
}

static rast_error_t *
parse_optional_range(parser_t *parser, rast_query_t *query, apr_pool_t *pool)
{
    property_range_query_t *range_query = (property_range_query_t *) query;
    token_e token;
    const char *p;
    rast_error_t *error;

    token = lookahead(parser);
    if (range_query->min_value == NULL &&
        (token == TOKEN_LT || token == TOKEN_LE)) {
        shift_token(parser);
        if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) {
            return error;
        }
        range_query->min_value = range_query->name;
        range_query->include_min = range_query->include_max;
        range_query->name = range_query->max_value;
        range_query->max_value = parser->lexer->value;
        range_query->include_max = (token == TOKEN_LE);
    }
    else if (range_query->max_value == NULL &&
             (token == TOKEN_GT || token == TOKEN_GE)) {
        shift_token(parser);
        if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) {
            return error;
        }
        range_query->max_value = range_query->name;
        range_query->include_max = range_query->include_min;
        range_query->name = range_query->min_value;
        range_query->min_value = parser->lexer->value;
        range_query->include_min = (token == TOKEN_GE);
    }

    for (p = range_query->name; *p != '\0'; p++) {
        if (strchr("abcdefghijklmnlopqrstuvwxyzABCDEFGHIJKLMNLOPQRSTUVWXYZ"
                   "0123456789_-", *p) == NULL) {
            return rast_error(RAST_ERROR_GENERAL, "invalid property name: %s",
                              range_query->name);
        }
    }

    return RAST_OK;
}

static rast_error_t *
parse_property_range_query(parser_t *parser, token_e token,
                           const char *name, rast_query_t **result,
                           apr_pool_t *pool)
{
    const char *value;
    rast_error_t *error;

    shift_token(parser);
    if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) {
        return error;
    }
    value = parser->lexer->value;
    switch (token) {
    case TOKEN_EQ:
        *result = property_range_query_create(pool, name, value, 1, value, 1);
        break;
    case TOKEN_LT:
        *result = property_range_query_create(pool, name, NULL, 0, value, 0);
        break;
    case TOKEN_GT:
        *result = property_range_query_create(pool, name, value, 0, NULL, 0);
        break;
    case TOKEN_LE:
        *result = property_range_query_create(pool, name, NULL, 0, value, 1);
        break;
    case TOKEN_GE:
        *result = property_range_query_create(pool, name, value, 1, NULL, 0);
        break;
    default:
        return token_error(parser, token, TOKEN_NONE);
    }
    return parse_optional_range(parser, *result, pool);
}

static rast_error_t *parse_query(parser_t *parser, rast_query_t **result,
                                 apr_pool_t *pool);

static rast_error_t *
parse_term_or_property_query(parser_t *parser, rast_query_t **result,
                         apr_pool_t *pool)
{
    token_e token;
    const char *term;

    term = parser->lexer->value;
    shift_token(parser);
    token = lookahead(parser);
    switch (token) {
    case TOKEN_COLON:
        return parse_property_pe_query(parser, term, result,
                                       &property_pe_query_type, pool);
    case TOKEN_EQ: case TOKEN_LT: case TOKEN_GT: case TOKEN_LE: case TOKEN_GE:
        return parse_property_range_query(parser, token, term, result, pool);
    default:
        term = rast_normalize_text(parser->encoding_module, term, strlen(term),
                                   NULL, pool);
        *result = term_query_create(pool, term);
        return RAST_OK;
    }
}

static rast_error_t *
parse_paren_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool)
{
    rast_error_t *error;

    shift_token(parser);
    if ((error = parse_query(parser, result, pool)) != RAST_OK) {
        return error;
    }
    if ((error = match_token(parser, TOKEN_RPAREN, pool)) != RAST_OK) {
        return error;
    }
    return RAST_OK;
}

static rast_error_t *
parse_primary_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool)
{
    token_e token;

    token = lookahead(parser);
    switch (token) {
    case TOKEN_TERM:
        return parse_term_or_property_query(parser, result, pool);
    case TOKEN_LPAREN:
        return parse_paren_query(parser, result, pool);
    default:
        return token_error(parser, token, TOKEN_NONE);
    }
}

static rast_query_t *
merge_queries(apr_pool_t *pool, rast_query_t *q1, rast_query_t *q2,
              composite_query_type_t *type)
{
        if (q1 == NULL) {
            return q2;
        }
        else if (q1->type == (rast_query_type_t *) type) {
            composite_query_add_operand((composite_query_t *) q1, q2);
            return q1;
        }
        else {
            return composite_query_create(pool, type, q1, q2);
        }
}

static composite_query_type_t *
get_composite_query_type(token_e token)
{
    switch (token) {
    case TOKEN_AND:
        return &and_query_type;
    case TOKEN_OR:
        return &or_query_type;
    case TOKEN_NOT:
        return &not_query_type;
    default:
        return NULL;
    }
}

static rast_error_t *
parse_composite_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool)
{
    rast_query_t *query;
    composite_query_type_t *type;
    rast_error_t *error;

    if ((error = parse_primary_query(parser, &query, pool)) != RAST_OK) {
        return error;
    }
    while ((type = get_composite_query_type(lookahead(parser))) != NULL) {
        rast_query_t *q;

        shift_token(parser);
        if ((error = parse_primary_query(parser, &q, pool)) != RAST_OK) {
            return error;
        }
        query = merge_queries(pool, query, q, type);
    }
    *result = query;
    return RAST_OK;
}

static rast_error_t *
parse_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool)
{
    rast_query_t *query = NULL;
    token_e token;
    rast_error_t *error;

    while ((token = lookahead(parser)) != TOKEN_EOF &&
           token != TOKEN_RPAREN) {
        rast_query_t *q;

        if ((error = parse_composite_query(parser, &q, pool)) != RAST_OK) {
            return error;
        }
        query = merge_queries(pool, query, q, &and_query_type);
    }
    if (query == NULL) {
        *result = null_query_create(pool);
    }
    else {
        *result = query;
    }
    return RAST_OK;
}

rast_error_t *
rast_parse_query(apr_pool_t *pool, rast_encoding_module_t *encoding_module,
                 const char *s, rast_query_t **result)
{
    apr_pool_t *sub_pool;
    rast_error_t *error;
    parser_t *parser;

    apr_pool_create(&sub_pool, pool);
    parser = (parser_t *) apr_palloc(sub_pool, sizeof(parser_t));
    parser->encoding_module = encoding_module;
    parser->query_string = s;
    parser->lexer = lexer_create(sub_pool, pool, encoding_module, s);
    parser->token = TOKEN_NONE;
    error = parse_query(parser, result, pool);
    apr_pool_destroy(sub_pool);
    return error;
}

/* vim: set filetype=c sw=4 expandtab : */
