/******************************************************************************
 * mod_uploader / RFC1867Parser.cpp
 ******************************************************************************
 * Copyright (C) 2004 Tetsuya Kimata <kimata@acapulco.dyndns.org>
 *
 * All rights reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any
 * damages arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any
 * purpose, including commercial applications, and to alter it and
 * redistribute it freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must
 *    not claim that you wrote the original software. If you use this
 *    software in a product, an acknowledgment in the product
 *    documentation would be appreciated but is not bcktuired.
 *
 * 2. Altered source versions must be plainly marked as such, and must
 *    not be misrepresented as being the original software.
 *
 * 3. This notice may not be removed or altered from any source
 *    distribution.
 *
 * $Id: RFC1867Parser.cpp 1063 2006-05-13 03:10:51Z svn $
 *****************************************************************************/

#include "RFC1867Parser.h"
#include "MessageDigest5.h"
#include "DirectoryCleaner.h"
#include "Auxiliary.h"
#include "Misc.h"

#include "apr_file_io.h"
#include "apr_strings.h"
#define APR_WANT_MEMFUNC
#define APR_WANT_STRFUNC
#include "apr_want.h"

#include <stdlib.h>

#ifdef DEBUG
#include <iostream>
#include <iomanip>
#endif

#ifdef DEBUG_RFC1867Parser
#define DUMP_INPUT(input) cerr << input << endl;
#else
#define DUMP_INPUT(input)
#endif

#define start_with(pos, pattern) (strncmp(pos, pattern, strlen(pattern)) == 0)

template<class FileWriterClass, class PostReaderClass> const apr_size_t
RFC1867Parser<FileWriterClass, PostReaderClass>::READ_BLOCK_SIZE    = RFC1867_READ_BLOCK_SIZE;

template<class FileWriterClass, class PostReaderClass> const apr_size_t
RFC1867Parser<FileWriterClass, PostReaderClass>::READ_TIMEOUT_SEC   = RFC1867_READ_TIMEOUT_SEC;

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::CR_LF[]            = "\r\n";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::MULTIPART_FORM_DATA[]= "multipart/form-data; ";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::CONTENT_TYPE[]     = "Content-Type: ";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::CONTENT_DISP[]     = "Content-Disposition: ";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::FORM_DATA[]        = "form-data; ";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::BOUNDARY_PARAM[]   = "boundary";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::BOUNDARY_PREFIX[]  = "--";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::ASSIGN             = '=';

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::QUOTE              = '"';

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::DELIMITER          = ';';

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::NAME_PARAM[]       = "name";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::FILENAME_PARAM[]   = "filename";

template<class FileWriterClass, class PostReaderClass> const char
RFC1867Parser<FileWriterClass, PostReaderClass>::FILE_NAME_TEMPLATE[]= "tmp.XXXXXX";

template<class FileWriterClass, class PostReaderClass> const apr_size_t
RFC1867Parser<FileWriterClass, PostReaderClass>::MKTEMP_RETRY_NUMBER = 10;

/******************************************************************************
 * public ᥽å
 *****************************************************************************/
template<class FileWriterClass, class PostReaderClass>
RFC1867Parser<FileWriterClass, PostReaderClass>::RFC1867Parser(const char *file_dir,
                                                               apr_size_t max_text_size,
                                                               apr_uint64_t max_file_size,
                                                               apr_size_t max_item_num,
                                                               apr_size_t file_offset)
    : file_dir_(file_dir),
      max_text_size_(max_text_size),
      max_file_size_(max_file_size),
      max_item_num_(max_item_num),
      file_offset_(file_offset)
{

}

template<class FileWriterClass, class PostReaderClass>
RFC1867Data::query_map
*RFC1867Parser<FileWriterClass, PostReaderClass>::parse(apr_pool_t *pool,
                                                        PostReaderClass& reader,
                                                        const char *content_type,
                                                        apr_uint64_t content_size)
{
    const char *boundary;
    query_map *qmap = NULL;
    const char *start;
    apr_size_t item_num;
    string name;
    rfc1867_content content;

    // Ťե
    DirectoryCleaner::cleanOldFiles(pool, file_dir_, READ_TIMEOUT_SEC);

    // "" å
    if (content_size > (max_text_size_+max_file_size_)*max_item_num_) {
        throw "åץɥǡ礭ޤ";
    }

    qmap = new query_map();
    boundary = get_boundary(pool, content_type);
    try {
        handle h(pool, reader, boundary, qmap);
        if (fill_buffer(h) == 0) {
            DUMP_INPUT(h.buffer.data());
            throw "Ϥޤ";
        }

        start = skip_line(h.pool, h.buffer.data()) - strlen(CR_LF);
        h.buffer.erase(start-h.buffer.data());

        item_num = 0;
        while ((item_num++ < max_item_num_) && get_content(h, name, content)) {
            qmap->insert(query_pair(name, content));
        }
    } catch(const char *) {
        if (qmap != NULL) {
            clean_tmp_file(pool, qmap);
            delete qmap;
        }

        throw;
    }

    return qmap;
}

template<class FileWriterClass, class PostReaderClass>
void RFC1867Parser<FileWriterClass, PostReaderClass>::clean_tmp_file(apr_pool_t *pool,
                                                                     RFC1867Data::query_map *qmap,
                                                                     bool is_strict)
{
    typename query_map::const_iterator i = qmap->begin();

    for (; i != qmap->end(); ++i) {
        if (i->second.type != RFC1867Data::FILE) {
            continue;
        }

        if (apr_file_remove(i->second.file.tmp_path.c_str(), pool) != APR_SUCCESS) {
            if (is_strict) {
                throw "եǤޤǤ";
            }
        }
    }
}

#ifdef DEBUG
template<class FileWriterClass, class PostReaderClass>
void RFC1867Parser<FileWriterClass, PostReaderClass>::dump_map(RFC1867Data::query_map *qmap)
{
    typename query_map::const_iterator i = qmap->begin();

    for (; i != qmap->end(); ++i) {
        cerr << setw(70) << setfill('-') << '-' << endl;
        cerr << "name: " << i->first << endl;
        cerr << "type: " << ((i->second.type == RFC1867Data::TEXT) ? "TEXT" : "FILE") << endl;

        if (i->second.type == RFC1867Data::TEXT) {
            cerr << "value: " << i->second.text << endl;
        } else {
            cerr << "file name: " << i->second.file.name << endl;
            cerr << "file tmp_path: " << i->second.file.tmp_path << endl;
            cerr << "file type: " << i->second.file.mime << endl;
            cerr << "file size: " << static_cast<unsigned int>(i->second.file.size) << endl;
        }
    }
    cerr << setw(70) << setfill('-') << '-' << endl;
}
#endif


/******************************************************************************
 * private ᥽å
 *****************************************************************************/
template<class FileWriterClass, class PostReaderClass>
bool RFC1867Parser<FileWriterClass, PostReaderClass>::get_content(handle& h,
                                                                  string& name,
                                                                  rfc1867_content& content)
{
    const char *param_name = NULL;
    const char *file_name;
    const char *file_mime;
    const char *start;
    const char *end;

    if ((fill_buffer(h) == 0) && (h.buffer.size() == 0)) {
        DUMP_INPUT(h.buffer.data());
        throw "ϤǽλƤޤ(1)";
    }

    start = h.buffer.data();
    if (start_with(start, CR_LF)) {
        start += strlen(CR_LF);
    } else {
        return false;
    }
    end = skip_line(h.pool, start);

    start = skip(h.pool, start, CONTENT_DISP);
    start = skip(h.pool, start, FORM_DATA);

    start = get_param(h.pool, start, end, NAME_PARAM, &param_name);
    name = param_name;

    if (start == NULL) {
        DUMP_INPUT(start);
        throw "name ĤޤǤ";
    }

    start = get_param(h.pool, start, end, FILENAME_PARAM, &file_name);

    if (start == NULL) {
        start = skip_line(h.pool, end);
        h.buffer.erase(start-h.buffer.data());

        content.type = RFC1867Data::TEXT;
        content.text = get_text_content(h);
    } else {
        const char *tmp_path;
        const char *file_digest;
        apr_size_t file_size = 0;


        file_name = basename_ex(file_name);

        start = skip_line(h.pool, start);
        end = skip_line(h.pool, start);

        start = skip(h.pool, start, CONTENT_TYPE);
        file_mime = static_cast<const char *>(apr_pstrndup(h.pool,
                                                                start,
                                                                end-start-strlen(CR_LF)));
        start = skip_line(h.pool, end);
        h.buffer.erase(start-h.buffer.data());

        tmp_path = get_file_content(h, &file_digest, &file_size);
        file_content file_content(file_name, tmp_path, file_mime, file_digest, file_size);

        content.type = RFC1867Data::FILE;
        content.file = file_content;
    }

    return true;
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::get_text_content(handle& h)
{
    char *text = NULL;
    char *old_text = NULL;
    const char *start;
    const char *end;
    apr_size_t read_size = READ_BLOCK_SIZE;

    fill_buffer(h);
    start = h.buffer.data();
    end = static_cast<const char *>(memmem(start, h.buffer.size(), h.boundary, h.boundary_len));

    if (end == NULL) {
        try {
            const apr_size_t barrier_size = h.boundary_len + strlen(CR_LF) + strlen(BOUNDARY_PREFIX);
            apr_size_t store_size = h.buffer.size();
            apr_size_t buf_size;
            apr_size_t offset;
            apr_size_t tail_size;

            if ((text = static_cast<char *>(malloc(sizeof(char *)*store_size))) == NULL) {
                DUMP_INPUT(h.buffer.data());
                throw "γݤ˼Ԥޤ";
            }
            memcpy(text, start, store_size);
            buf_size = store_size;

            while (true) {
                if (store_size > max_text_size_) {
                    DUMP_INPUT(h.buffer.data());
                    throw "ƥȤĹޤ";
                }

                old_text = text;
                if ((text = static_cast<char *>(realloc(text, buf_size+read_size))) == NULL) {
                    DUMP_INPUT(h.buffer.data());
                    throw "γݤ˼Ԥޤ";
                }
                buf_size += read_size;
                start = text + store_size;

                read_size = read(h, const_cast<char *>(start));
                offset = (store_size < barrier_size) ? store_size : barrier_size;
                store_size += read_size;

                if ((end = static_cast<const char *>(memmem(start-offset, read_size+offset,
                                                                 h.boundary, h.boundary_len))) != NULL) {
                    break;
                } else if (read_size == 0) {
                    DUMP_INPUT(h.buffer.data());
                    throw "ϤǽλƤޤ(2)";
                }
            }

            start = text;
            text = apr_pstrndup(h.pool,
                                start,
                                end-start - strlen(CR_LF) - strlen(BOUNDARY_PREFIX));

            end += h.boundary_len;
            tail_size = end - start;

            free(const_cast<char *>(start));

            memcpy(h.buffer.data(), end, store_size-tail_size);
            h.buffer.size(store_size-tail_size);
        } catch(const char *) {
            if (text != NULL) {
                free(text);
            }

            throw;
        }
    } else {
        text = apr_pstrndup(h.pool,
                            start,
                            end-start - strlen(CR_LF) - strlen(BOUNDARY_PREFIX));
        end += h.boundary_len;
        h.buffer.erase(end-h.buffer.data());
    }

    return text;
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::get_file_content(handle& h,
                                                                              const char **file_digest,
                                                                              apr_size_t *file_size)
{
    char *file_path = NULL;
    const char *start;
    const char *end;
    void *boundary;
    apr_size_t read_size;
    apr_size_t write_size;

    *file_size = 0;

    FileWriterClass writer(h.pool, create_tmp_file(h.pool, file_dir_, &file_path),
                           file_offset_);
    MessageDigest5 digest;

    fill_buffer(h);
    start = h.buffer.data();
    end = static_cast<const char *>(memmem(start, h.buffer.size(), h.boundary, h.boundary_len));

    if (end == NULL) {
        while (true) {
            if (UNLIKELY(*file_size > max_file_size_)) {
                DUMP_INPUT(h.buffer.data());
                throw "ե뤬礭ޤ";
            }

            write_size = h.buffer.size()-h.boundary_len-strlen(CR_LF)-strlen(BOUNDARY_PREFIX);

            writer.write(start, write_size);
            digest.update(reinterpret_cast<const apr_byte_t *>(start), write_size);
            *file_size += write_size;
            h.buffer.erase(write_size);

            read_size = fill_buffer(h);

            start = h.buffer.data();

            if (UNLIKELY((boundary = memmem(start, h.buffer.size(), h.boundary, h.boundary_len)) != NULL)) {
                end = static_cast<const char *>(boundary);
                break;
            } else if (UNLIKELY(read_size == 0)) {
                DUMP_INPUT(h.buffer.data());
                throw "ϤǽλƤޤ(3)";
            }
        }
    }

    write_size = end-start-strlen(CR_LF)-strlen(BOUNDARY_PREFIX);

    writer.write(start, write_size);
    digest.update(reinterpret_cast<const apr_byte_t *>(start), write_size);
    *file_size += write_size;

    end += h.boundary_len;
    h.buffer.erase(end-start);

    digest.finish();

    *file_digest = apr_pstrdup(h.pool, digest.c_str());

    return file_path;
}

template<class FileWriterClass, class PostReaderClass>
apr_size_t RFC1867Parser<FileWriterClass, PostReaderClass>::fill_buffer(handle& h)
{
    apr_size_t size;
    apr_size_t read_size = 0;

    while (h.buffer.size() < READ_BLOCK_SIZE) {
        size = read(h, h.buffer.data()+h.buffer.size());

        if (size == 0) {
            break;
        }

        h.buffer.size(h.buffer.size()+size);
        read_size += size;
    }

    return read_size;
}

template<class FileWriterClass, class PostReaderClass>
apr_size_t RFC1867Parser<FileWriterClass, PostReaderClass>::read(handle& h,
                                                                 char *buffer)
{
    apr_size_t read_size = 0;

    h.reader.read(buffer, READ_BLOCK_SIZE, &read_size);

    return read_size;
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::get_boundary(apr_pool_t *pool,
                                                                          const char *content_type)
{
    const char *boundary;
    const char *start;
    const char *end;

    start = skip(pool, content_type, MULTIPART_FORM_DATA);
    end = content_type+strlen(content_type);

    if (get_param(pool, start, end, BOUNDARY_PARAM, &boundary) == NULL) {
        DUMP_INPUT(content_type);
        throw "boundary ɤ߼ޤǤ";
    }

    if (strlen(boundary) <= strlen(BOUNDARY_PREFIX)) {
        DUMP_INPUT(content_type);
        throw "boundary ûޤ";
    } else if (strlen(boundary) > (READ_BLOCK_SIZE/2)) {
        DUMP_INPUT(content_type);
        throw "boundary Ĺޤ";
    }

    return boundary;
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::get_param(apr_pool_t *pool,
                                                                       const char *input_start,
                                                                       const char *input_end,
                                                                       const char *name,
                                                                       const char **value)
{
    const char *end;

    while (isspace(*input_start & 0xff) || (*input_start == DELIMITER)) {
        input_start++;
    }

    if (!start_with(input_start, name)) {
        *value = NULL;

        return NULL;
    }

    input_start += strlen(name);
    if (*(input_start++) != ASSIGN) {
        *value = NULL;

        return NULL;
    }

    if (*input_start == QUOTE) {
        input_start++;
        end = strnchr(input_start, input_end-input_start, QUOTE);
        if (end == NULL) {
            DUMP_INPUT(input_start);
            throw "б QUOTE ޤ";
        }

        *value = static_cast<const char *>(apr_pstrndup(pool, input_start,
                                                             end-input_start));
        end++;
    } else {
        end = input_start + 1;
        while ((end < input_end) && !isspace(*end & 0xff)) {
            end++;
        }
        *value = static_cast<const char *>(apr_pstrndup(pool, input_start,
                                                             end-input_start));

    }

    return end;
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::skip(apr_pool_t *pool,
                                                                  const char * const input_start,
                                                                  const char * const pattern,
                                                                  bool is_must)
{
    bool is_exist = start_with(input_start, pattern);

    if (is_must && !is_exist) {
        DUMP_INPUT(input_start);
        throw apr_pstrcat(pool, pattern, " ޤ", NULL);
    }

    return input_start + strlen(pattern);
}

template<class FileWriterClass, class PostReaderClass>
const char *RFC1867Parser<FileWriterClass, PostReaderClass>::skip_line(apr_pool_t *pool,
                                                                       const char * const input_start)
{
    const char *start = strstr(input_start, CR_LF);

    if (start == NULL) {
        DUMP_INPUT(input_start);
        throw "CR_LF ޤ";
    }

    return start + strlen(CR_LF);
}

template<class FileWriterClass, class PostReaderClass>
apr_file_t *RFC1867Parser<FileWriterClass, PostReaderClass>::create_tmp_file(apr_pool_t *pool,
                                                                             const char *dir,
                                                                             char **file_path)
{
    apr_file_t *file;
    apr_size_t i;

    if (apr_filepath_merge(file_path, dir, FILE_NAME_TEMPLATE,
                           APR_FILEPATH_NOTABOVEROOT, pool) != APR_SUCCESS) {
        throw "ե̾ǤޤǤ";
    }

    i = 0;
    while (apr_file_mktemp(&file, *file_path,
                           APR_READ|APR_WRITE|APR_CREATE|APR_EXCL|APR_BINARY
#ifdef WRITER_TYPE_BASIC
                           |APR_BUFFERED
#endif
                           , pool) != APR_SUCCESS) {
        if (i++ == MKTEMP_RETRY_NUMBER) {
            throw "եǤޤǤ";
        }
    }

    return file;
}


/******************************************************************************
 * ƥ
 *****************************************************************************/
#ifdef DEBUG_RFC1867Parser
#include "apr_general.h"

#include "RFC1867ParserImpl.h"

static const apr_size_t BUFSIZE         = 256;
static const char FILE_DIR[]            = "/tmp";
static const char CONTENT_TYPE_TMPL[]   = "multipart/form-data; boundary=%s";

void usage(const char *prog_name)
{
    cerr << "usage: " << prog_name << " <INPUT>" << endl;
}

int main(int argc, const char * const *argv)
{
    char content_type[BUFSIZE];
    apr_pool_t *pool;
    apr_file_t *fd;
    apr_off_t offset;

    apr_app_initialize(&argc, &argv, NULL);
    apr_pool_create(&pool, NULL);

    try {
        if (argc != 2) {
            throw "ѤΥե뤬ꤵƤޤ";
        }

        if (apr_file_open(&fd, argv[1], APR_READ|APR_BINARY,
                          APR_OS_DEFAULT, pool) != APR_SUCCESS) {
            throw "ѤΥե򳫤ޤǤ";
        }

        if (apr_file_gets(content_type, sizeof(content_type), fd) != APR_SUCCESS) {
            throw "ѤΥե read ˼Ԥޤ";
        }
        offset = 0;
        if (apr_file_seek(fd, APR_SET, &offset) != APR_SUCCESS) {
            throw "ѤΥե seek ˼Ԥޤ";
        }

        UploadParser parser(FILE_DIR, 10*1024*1024, 32*1024*1024, 5);

        auto_ptr<UploadParser::query_map>
            qmap(parser.parse(pool, fd,
                              apr_psprintf(pool, CONTENT_TYPE_TMPL, content_type+2)));

        UploadParser::clean_tmp_file(pool, qmap.get());
        UploadParser::dump_map(qmap.get());
    } catch(const char *message) {
        cerr << "Error: " << message << endl;
        usage(argv[0]);

        return EXIT_FAILURE;
    }

    apr_terminate();

    return EXIT_SUCCESS;
}
#endif

// Local Variables:
// mode: c++
// buffer-file-coding-system: euc-japan-dos
// End:
