/*
 * Conversion functions for libssiconv
 *
 * SPDX-FileType: SOURCE
 * SPDX-FileCopyrightText: Michael Bäuerle
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <assert.h>
#include <stddef.h>
#include <string.h>

#include "libssiconv-0/iconv.h"  /* Always include main header file first */
#include "libssiconv-0/iconv_bool.h"
#include "libssiconv-0/iconv_errno.h"
#include "libssiconv-0/iconv_name.h"
#include "libssiconv-0/iconv_table.h"


/* Check macros must not depend on C execution character set and/or locale */
#define SSIC0_I_PRINTABLE_ASCII(c)  ((0x20U < c) && (0x7FU > c))
#define SSIC0_I_LOWERCASE_ASCII(c)  ((0x60U < c) && (0x7BU > c))


/* ========================================================================== */
/*
 * Search encoding definition 'def' for encoding 'enc'.
 *
 * Returns a pointer to the mapping table or NULL (encoding not supported).
 */
static const char **ssic0_i_check_def(const char *enc,
                                      const ssic0_i_name_def *def)
{
    size_t i = 0;

    while (NULL != def[i].name)
    {
        if (!strcmp(enc, def[i].name))
            return def[i].table;
        ++i;
    }

    return NULL;
}


/* ========================================================================== */
/*
 * Select encoding definition for direction 'dir' and search for encoding 'enc'.
 *
 * Returns a pointer to the mapping table or NULL (encoding not supported).
 */
static const char **ssic0_i_check_dir(const char *enc, int dir)
{
    const char **ret = NULL;

    /* Check target encodings */
    if (SSIC0_I_NAME_TARGET == dir)
    {
        ret = ssic0_i_check_def(enc, ssic0_i_name_tgt);
        if (NULL != ret)
            return ret;
    }

    /* Check source encodings */
    if (SSIC0_I_NAME_SOURCE == dir)
    {
        ret = ssic0_i_check_def(enc, ssic0_i_name_src);
        if (NULL != ret)
            return ret;

#if SSIC0_I_NAME_ENABLE_ALIASES
        ret = ssic0_i_check_def(enc, ssic0_i_name_src_alias);
        if (NULL != ret)
            return ret;
#endif  /* SSIC0_I_NAME_ENABLE_ALIASES */

#if SSIC0_I_NAME_ENABLE_NONSTANDARD
        ret = ssic0_i_check_def(enc, ssic0_i_name_src_nonstd);
        if (NULL != ret)
            return ret;
#endif  /* SSIC0_I_NAME_ENABLE_NONSTANDARD */
   }

    return ret;
}


/* ========================================================================== */
/*
 * Check whether conversion from or to an encoding is supported.
 *
 * The parameter 'enc' is the name of the encoding (treated case-insensitive).
 * The parameter 'dir' is used to specify source or target (use constants
 * "SSIC0_I_NAME_SOURCE" and "SSIC0_I_NAME_TARGET").
 *
 * Returns a pointer to the mapping table or NULL (not supported).
 */
static const char **ssic0_i_check_encoding(const char *enc, int dir)
{
    char buf[SSIC0_I_NAME_MAX + 2U];  /* +2 to silence warning from GCC 14 */

    /* Copy name and ensure that it is NUL-terminated (not too long) */
    (void)strncpy(buf, enc, SSIC0_I_NAME_MAX + 1U);
    if (0 != buf[SSIC0_I_NAME_MAX])
        return NULL;

    /* Check name for printable US-ASCII and convert it to uppercase */
    {
        size_t i = 0;

        do
        {
            unsigned char c = buf[i];

            if (0 == c)
                break;

            if (!SSIC0_I_PRINTABLE_ASCII(c))
                return NULL;
            if (SSIC0_I_LOWERCASE_ASCII(c))
                buf[i] = c - 0x20U;
        }
        while (SSIC0_I_NAME_MAX > ++i);
    }

    /* Check whether encoding is supported */
    return ssic0_i_check_dir(buf, dir);
}


/* ========================================================================== */
/*
 * Decode UTF-8 sequence at position 'p' with length 'len'.
 *
 * On success the Unicode codepoint it written at location 'ucp'.
 *
 * Returns zero (false) on success.
 */
static ssic0_i_bool ssic0_i_decode_utf8(const char *p, size_t len,
                                        unsigned int *ucp)
{
    assert(0 < len);
    switch (len)
    {
        case 1:
        {
            unsigned int c = p[0];

            if (0x7FU < c)
                return 1;

            *ucp = c;
            break;
        }
        case 2:
        {
            unsigned int c1 = p[0];
            unsigned int c2 = p[1];

            if (0xC0U != (c1 & 0xE0U))
                return 1;

            if (0x80U != (c2 & 0xC0U))
                return 1;

            *ucp = ((c1 & 0x1FU) << 6) | (c2 & 0x3FU);
            break;
        }
        /* Longer sequences are beyond limits for supported target encodings */
        default:
            return 1;
    }

    return 0;
}


/* ========================================================================== */
/*
 * Get codepoint for UTF-8 sequence at position 'p' with length 'len'.
 *
 * For a non-identical conversion the value 1 is written at location 'ni'.
 * Otherwise the location 'ni' is not modified.
 *
 * Returns a codepoint for encoding 'target'.
 */
static char ssic0_i_get_codepoint(const char **target,
                                  const char *p, size_t len,
                                  ssic0_i_bool *ni)
{
    unsigned int limit = 0xFF;            /* Codepoint limit for ISO-8859-1 */
    unsigned int cp    = SSIC0_I_NI_ISO;  /* Replacement for ISO-8859-1 */

    if (ssic0_i_table_us_ascii == target)
    {
        limit = 0x7F;              /* Codepoint limit for US-ASCII */
        cp    = SSIC0_I_NI_ASCII;  /* Replacement for US-ASCII */
    }

    {
        /* Initial value beyond limit for both, ISO-8859-1 and US-ASCII */
        unsigned int ucp = 0x100;

        if (ssic0_i_decode_utf8(p, len, &ucp))
            *ni = 1;
        else if (limit < ucp)
            *ni = 1;
        else
            cp = ucp;
    }

    return (unsigned char)cp;
}


/* ========================================================================== */
/*
 * Conversion with upper half of mapping table.
 *
 * Parameters 'target' and 'source' are pointers to mapping tables.
 * The location of the target UTF-8 sequence (or ISO-8859-1 codepoint) for
 * source codepoint 'c' is written to 'p' (and its length to 'len').
 *
 * If the target encoding is not UTF-8, the codepoint is stored at 'cp'.
 * The location of 'cp' is written to 'p' (and the length one to 'len').
 *
 * The number of non-identical conversions is written to 'nonident'.
 */
static void ssic0_i_codepage_8bit(const char **target, const char **source,
                                  unsigned char c, const char **p, size_t *len,
                                  char *cp, size_t *nonident)
{
    size_t       nonident_len = strlen(SSIC0_I_NI);
    ssic0_i_bool ni           = 0;  /* Non-identical conversion */

    /* Fetch UTF-8 sequence for codepoint from mapping table */
    assert(0x80U <= c);
    *p   = source[c - 0x80U];
    *len = strlen(*p);

    /* Check for non-identical conversion */
    if ((nonident_len == *len) && (!memcmp(*p, SSIC0_I_NI, *len)))
        ni = 1;

    /* Check whether target encoding is not UTF-8 */
    if (ssic0_i_table_utf_8 != target)
    {
        *cp  = ssic0_i_get_codepoint(target, *p, *len, &ni);
        *p   = cp;
        *len = 1;
    }

    if (ni)
        ++(*nonident);
}


/* ========================================================================== */
/*
 * Conversion with mapping table.
 *
 * Lower half of all tables is US-ASCII (1:1 mapping).
 * See API for parameters other than 'target', 'source' and 'nonident'.
 * Parameters 'target' and 'source' are pointers to mapping tables.
 * On success the number of non-identical conversions is written at location
 * 'nonident'.
 *
 * Returns zero (false) on success, 'errno' will be set otherwise.
 */
static ssic0_i_bool ssic0_i_codepage(const char **target, const char **source,
                                     char *inarray, size_t *inlen,
                                     char *outarray, size_t *outlen,
                                     int flag, size_t *nonident)
{
    ssic0_i_bool replace = (flag & SSIC0_ICONV_REPLACE_INVALID) ? 1 : 0;
    ssic0_i_bool ign_nul = (flag & SSIC0_ICONV_IGNORE_NULL)     ? 1 : 0;
    size_t       inpos   = 0;
    size_t       outpos  = 0;

    while (*inlen)
    {
        unsigned char  c   = inarray[inpos];   /* Current input octet */
        const char    *p   = &inarray[inpos];  /* Copy data from here */
        size_t         len = 1;                /* Length of data at 'p' */
        char           cp  = SSIC0_I_NI_ASCII;

        if (0x80U <= c)
             ssic0_i_codepage_8bit(target, source, c, &p, &len, &cp, nonident);
        /*
         * For US-ASCII 'p' points to 'inarray' (1:1 mapping).
         * For ISO-8859-1 'p' points to 'cp' (where 8-bit codepoint is stored).
         * For UTF-8 'p' points to an octet-sequence in the mapping table.
         */

        if (!replace && (0 < *nonident))
        {
            /* Non-identical conversion not allowed */
            errno = SSIC0_I_EILSEQ;
            return 1;
        }

        if (!ign_nul && (1U == len) && (0 == *p))
        {
            /* Stop conversion on NUL control character */
            break;
        }

        if (*outlen < len)
        {
            /* Not enough space in the output array */
            errno = SSIC0_I_E2BIG;
            return 1;
        }

        assert(0 < len);
        memcpy(&outarray[outpos], p, len);
        outpos  += len;
        *outlen -= len;

        inpos  += 1U;
        *inlen -= 1U;
    }

    return 0;
}


/* ========================================================================== */
size_t ssic0_iconvstr(const char *tocode, const char *fromcode,
                      char *inarray, size_t *inlen,
                      char *outarray, size_t *outlen,
                      int flag)
{
    size_t       ret = 0;
    const char **tgt = ssic0_i_check_encoding(tocode  , SSIC0_I_NAME_TARGET);
    const char **src = ssic0_i_check_encoding(fromcode, SSIC0_I_NAME_SOURCE);

    if ((NULL == tgt) || (NULL == src))
    {
        /* Requested conversion is not supported */
        errno = SSIC0_I_EBADF;
        return (size_t)-1;
    }

    if (ssic0_i_codepage(tgt, src, inarray, inlen, outarray, outlen, flag, &ret))
        return (size_t)-1;

    return ret;
}
