# ==============================================================================
# Extract mappings for default case folding from Unicode data file
#
# See Unicode standard section 5.18
#
# Copyright (c) 2020 by the developers. See the LICENSE file for details.


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("%s\n", "/* Unicode case folding data created by build1.mk */")
   printf("static const struct uc_cf  uc_cf_table[] =\n{\n")
}

END \
{
   printf("   /* Range starting with -1 is the end marker */\n")
   printf("   { -1L, -1L, -1L, -1L }\n")
   printf("};\n")
   printf("\n\n/* EOF */\n")
}


# ==============================================================================
# Ignore comment lines

/^#/ \
{
   next
}


# ==============================================================================
# Process lines

{
   # Extract ranges of codepoints from 1st field
   # (2nd field "C" and "F" indicates relevant data)
   if(match($2, /^[ ](C|F)/))
   {
      range = extract_cf_mapping($3)
      if("" != range)
      {
         src = "0x" $1 "L"
         # Format: { Codepoint, 1 to 3 case folded codepoints }
         printf("   { %s, %s },\n", src, range)
      }
   }
   next
}


# ==============================================================================
# Extract codepoint mappings for default case folding

function extract_cf_mapping(s) \
{
   range = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /[0-9A-F]+/))
      {
         range = "0x" substr(s, RSTART, RLENGTH) "L"
         remainder = substr(s, RSTART + RLENGTH)
         if(match(remainder, /[0-9A-F]+/))
         {
            range = range ", 0x" substr(remainder, RSTART, RLENGTH) "L"
            remainder = substr(remainder, RSTART + RLENGTH)
            if(match(remainder, /[0-9A-F]+/))
            {
               range = range ", 0x" substr(remainder, RSTART, RLENGTH) "L"
            }
            else
            {
               # Set third codepoint to -1 if it does not exist
               range = range ", -1L"
            }
         }
         else
         {
            # Set second and third codepoint to -1 if they do not exist
            range = range ", -1L, -1L"
         }
      }
   }
   return(range)
}


# EOF
