/*
 * File: misc.c
 *
 * Copyright (C) 2000 Jorge Arellano Cid <jcid@inf.utfsm.cl>,
 *                    Jrgen Viksell <vsksga@hotmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <config.h>
#include <gtk/gtk.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>

#include "intl.h"
#include "misc.h"
#include "prefs.h"
#include "encoding.h"


/*
 * Prepend the users home-dir to 'file' string i.e,
 * pass in .dillo/bookmarks.html and it will return
 * /home/imain/.dillo/bookmarks.html
 *
 * Remember to g_free() returned value!
 */
gchar *a_Misc_prepend_user_home(const char *file)
{
   return ( g_strconcat(g_get_home_dir(), "/", file, NULL) );
}

/*
 * Escape characters as %XX sequences.
 * Return value: New string, or NULL if there's no need to escape.
 */
gchar *a_Misc_escape_chars(const gchar *str, gchar *esc_set)
{
   static const char *hex = "0123456789ABCDEF";
   gchar *p = NULL;
   GString *gstr;
   gint i;

   for (i = 0; str[i]; ++i)
      if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i]))
         break;

   if (str[i]) {
      /* needs escaping */
      gstr = g_string_sized_new(64);
      for (i = 0; str[i]; ++i) {
         if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) {
            g_string_append_c(gstr, '%');
            g_string_append_c(gstr, hex[(str[i] >> 4) & 15]);
            g_string_append_c(gstr, hex[str[i] & 15]);
         } else {
            g_string_append_c(gstr, str[i]);
         }
      }
      p = gstr->str;
      g_string_free(gstr, FALSE);
   }
   return p;
}

/*
 * Use character stuffing for characters given in 'esc_set'.
 * Return value: New string, or NULL if there's no need to stuff.
 */
gchar *a_Misc_stuff_chars(const gchar *str, const gchar *esc_set)
{
   gint i;
   gchar *p, *sstr = NULL;

   if ((p = strpbrk(str, esc_set))) {
      /* start stuffing */
      GString *gstr = g_string_new(str);
      for (i = p - str; gstr->str[i]; ++i)
         if (strchr(esc_set, gstr->str[i])) {
            g_string_insert_c(gstr, i, gstr->str[i]);
            ++i;
         }
      sstr = gstr->str;
      g_string_free(gstr, FALSE);
   }
   return sstr;
}

/*
 * Case insensitive strstr
 */
gchar *a_Misc_stristr(char *src, char *str)
{
   int i, j;

   for (i = 0, j = 0; src[i] && str[j]; ++i)
      if (tolower(src[i]) == tolower(str[j]))
         ++j;
      else if (j) {
         i -= j;
         j = 0;
      }

   if (!str[j])                 /* Got all */
      return (src + i - j);
   return NULL;
}

/*
 * strsep implementation
 */
gchar *a_Misc_strsep(char **orig, const char *delim)
{
   gchar *str, *p;

   if (!(str = *orig))
      return NULL;

   p = strpbrk(str, delim);
   if (p) {
      *p++ = 0;
      *orig = p;
   } else {
      *orig = NULL;
   }
   return str;
}

#define TAB_SIZE 8
/*
 * Takes a string and converts any tabs to spaces.
 */
gchar *a_Misc_expand_tabs(const char *str)
{
   GString *New = g_string_new("");
   int len, i, j, pos, old_pos;
   char *val;

   if ( (len = strlen(str)) ) {
      for (pos = 0, i = 0; i < len; i++) {
         if (str[i] == '\t') {
            /* Fill with whitespaces until the next tab. */
            old_pos = pos;
            pos += TAB_SIZE - (pos % TAB_SIZE);
            for (j = old_pos; j < pos; j++)
               g_string_append_c(New, ' ');
         } else {
            g_string_append_c(New, str[i]);
            pos++;
         }
      }
   }
   val = New->str;
   g_string_free(New, FALSE);
   return val;
}

/*
 * Split a string into tokens, at any character contained by delim,
 * and return the starting and ending positions within the string. For
 * n tokens, the returned array has at least 2 * n + 1 elements, and
 * contains the start of token i at 2 * i, the end at 2 * i + 1. The
 * array is terminated by -1.
 */
gint *a_Misc_strsplitpos(const gchar *str, const gchar *delim)
{
   gint array_max = 4;
   gint *array = g_new(gint, array_max);
   gint n = 0;
   gint p1 = 0, p2;

   while (TRUE) {
      while (str[p1] != 0 && strchr(delim, str[p1]) != NULL)
         p1++;
      if (str[p1] == 0)
         break;

      p2 = p1;
      while (str[p2] != 0 && strchr(delim, str[p2]) == NULL)
         p2++;

      if (array_max < 2 * n + 3) {
         array_max <<= 2;
         array = g_realloc(array, array_max * sizeof(gint));
      }

      array[2 * n] = p1;
      array[2 * n + 1] = p2;
      n++;

      if (str[p2] == 0)
         break;
      else {
         p1 = p2;
      }
   }

   array[2 * n] = -1;
   return array;
}

/*
 * Return a copy of an array which was created by a_Misc_strsplitpos.
 */
gint *a_Misc_strsplitposdup(gint *pos)
{
   gint n = 0;
   gint *pos2;
   while (pos[2 * n] != -1)
      n++;
   pos2 = g_new(gint, 2 * n + 1);
   memcpy(pos2, pos, (2 * n + 1) * sizeof(gint));
   return pos2;
}

/*
 * Detects 'Content-Type' when the server does not supply one.
 * It uses the magic(5) logic from file(1). Currently, it
 * only checks the few mime types that Dillo supports.
 *
 * 'Data' is a pointer to the first bytes of the raw data.
 */
const gchar *a_Misc_get_content_type_from_data(void *Data, size_t Size)
{
   static const gchar *Types[] = {
      "application/octet-stream",
      "text/html", "text/plain",
      "image/gif", "image/png", "image/jpeg",
   };
   gint Type = 0;
   gchar *p = Data;
   int i;

   /* HTML try */
   for (i = 0; i < Size && isspace(p[i]); ++i);
   if ((Size - i >= 5 && !g_strncasecmp(p+i, "<html", 5)) ||
       (Size - i >= 5 && !g_strncasecmp(p+i, "<head", 5)) ||
       (Size - i >= 6 && !g_strncasecmp(p+i, "<title", 6)) ||
       (Size - i >= 14 && !g_strncasecmp(p+i, "<!doctype html", 14))) {
      Type = 1;

   /* Images */
   } else if (Size >= 4 && !g_strncasecmp(p, "GIF8", 4)) {
      Type = 3;
   } else if (Size >= 4 && !g_strncasecmp(p, "\x89PNG", 4)) {
      Type = 4;
   } else if (Size >= 2 && !g_strncasecmp(p, "\xff\xd8", 2)) {
      /* JPEG has the first 2 bytes set to 0xffd8 in BigEndian - looking
       * at the character representation should be machine independent. */
      Type = 5;

   /* Text */
   } else {
      /* We'll assume ASCII if chars are below 128 (after all, this
       * is a last resort when the server doesn't send Content-Type) */
      Size = MIN (Size, 256);
      for (i = 0; i < Size; i++)
         if ((guchar) p[i] > 127)
            break;
      Type = (i < 12 || i < Size) ? 0 : 2;
   }

   return (Types[Type]);
}

#ifndef DISABLE_TABS

/*
 * check invalid DW_CHARSET, and repair.
 */
gchar *a_Misc_string_check(const gchar *input)
{
   gint i, len;
   gchar *ret, *str;
   GString *out = g_string_new("");
   
   /* invalid char will be replaced to white square. */
   str = a_Encoding_Convert(DW_CHARSET, DW_CHARSET, input, strlen(input));
   len = strlen(str);

   /* skip white square. */
   for(i = 0; i < len; i++) {
      if((unsigned char)str[i] == '[' 
              && (unsigned char)str[i+1] == ']') {
		  i++;
      } else g_string_append_c(out, str[i]);
   }
   g_free(str);
   ret = out->str;
   g_string_free(out, FALSE);
   return ret;
}

/*
 * (adapted from Galeon's misc_string_shorten_name())
 * try to shorten a string to target_length or less
 * this is quite language-specific, ie. it will not work
 * with non indo-european languages
 *
 * minimum returned string length is 1, even if target_length
 * is set to a lower value
 *
 * caller has to free the returned string
 */
gchar *
a_Misc_string_compress(const gchar *input, gint target_length)
{
   gint i, j, length;
   gchar *name, **common_prefixes = prefs.compress_common_prefixes_vec;
   gchar *shortened_name;
   char c;

   /* it does not make sense to compress a string into nothing... */
   if (target_length < 1)
     target_length = 1;
   
   if (input == NULL)
     return (a_Misc_string_shorten(g_strdup(_("Untitled")),target_length));
   
   /* copy and clean name */
   name = g_strdup (input);
   g_strstrip (name);
   length = strlen (name);

   /* remove common prefixes from string */
   if (common_prefixes)
     for(i=0; common_prefixes[i]; ++i)
       if(strncasecmp(name, common_prefixes[i],strlen(common_prefixes[i])) == 0) {
         length -= strlen(common_prefixes[i]);
         memmove(name, name + strlen(common_prefixes[i]), length + 1);
       }

   /* remove trailing whitespace, if any */
   g_strchug(name);
   
   /* remove vowels from string */
   for (i = length - 1, j = length - 1; i >= 0; i--) {
     c = name[i];
     /* keep the letter if the string is short enough
      * already, if the letter is at the beginning of a
      * word, or if it's not a lowercase vowel */
     if (length <= target_length || i == 0 ||
         (i > 0 && name[i - 1] == ' ') ||
         /* prefs.compress_vowels contains those characters
            that can be skipped without making a word totally
            irrecognizable. In most languages, those are the
            vowels (with and without accents) */
         !strchr (prefs.compress_vowels, c))
       {
         name[j] = c;
         j--;
       }
     else
       {
         length--;
       }
   }
   /* shift */
   memmove(name, name + j + 1, length + 1);

   /* check string */
   {
      gchar *oldstr = name;
      name = a_Misc_string_check(name);
      g_free(oldstr);
   }
   
   /* short enough yet? */
   if (length <= target_length)
     {
       return name;
     }

   /* no - try chopping out whole words */
   for (i = target_length; i > 0; i--) {
     if (ispunct((int) name[i]) || isspace((int) name[i]))
       {
         while (ispunct((int) name[i]) || isspace((int) name[i]))
           {
             i--;
           }
         /* this will do */
         name[i + 1] = '\0';
         g_strchomp (name);
         return name;
       }
   }

   /* still too long - use shortening on the compressed string */
   shortened_name = a_Misc_string_shorten(name, target_length);
   g_free(name);
   return shortened_name;
}

/*
 * shorten a string to no more than target_length characters
 * by replacing part of the middle of the string by
 * three ellipsis ('...').
 *
 * minimum returned string length is 3, even if target_length
 * is set to a lower value.
 *
 * caller must free returned string
 */
gchar *
a_Misc_string_shorten(const gchar *input, gint target_length)
{
   gchar *output;
   gint actual_length, first_length, second_length;
   
   if (!input) return NULL;
   
   /* target length can not be smaller than 3 */
   if (target_length < 3)
     target_length = 3;
   
   actual_length = strlen (input);
   
   /* if the string is already short enough, or if it's too short for
    * us to shorten it, return a new copy */
   if (actual_length <= target_length ||
       actual_length <= 3)
     return g_strdup (input);
   
   /* allocate new string */
   output = g_new (gchar, target_length + 1);
   
   /* calc lengths to take from beginning and ending of input */
   second_length = (target_length - 3) / 2;
   if(second_length%2 != 0) second_length++;
   first_length = target_length - 3 - second_length;
   
   /* create string */
   strncpy (output, input, first_length);
   strncpy (output + first_length, "...", 3);
   strncpy (output + first_length + 3,
            input + actual_length - second_length, second_length);
   output[target_length] = '\0';
   
   /* check string */
   {
      gchar *oldstr = output;
      output = a_Misc_string_check(output);
      g_free(oldstr);
   }
   
   return output;
}

#endif /* !DISABLE_TABS */
