# ==============================================================================
# Extract hangul syllable types from Unicode data file
#
# Copyright (c) 2016-2020 by the developers. See the LICENSE file for details.


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("%s\n", "/* Hangul syllable type data created by build1.mk */")
   printf("static const struct uc_hst  uc_hst_table[] =\n{\n")
   # Prefix for hangul syllable type enum values
   hst_prefix = "UC_HST_"
}

END \
{
   printf("   /* Range starting with -1 is the end marker */\n")
   printf("   { -1L, -1L, %s%s }\n", hst_prefix, "NONE")
   printf("};\n")
   printf("\n\n/* EOF */\n")
}


# ==============================================================================
# Process lines

{
   if(!match($0, /^[#]/))
   {
      # Extract ranges of codepoints from 1st field
      # (Field zero in terms of Unicode standard counting from zero)
      range = extract_range($1)
      # Extract potential canonical decomposition from 2nd field
      # (1st field in terms of Unicode standard counting from zero)
      hst = extract_syllable_type($2)
      if("" != range)
      {
         # Format: { Start codepoint if range, End codepoint if range }
         printf("   { %s, " hst_prefix"%s },\n", range, hst)
      }
   }
   next
}


# ==============================================================================
# Extract codepoint range

function extract_range(s) \
{
   range = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /[0-9A-F]+/))
      {
         first = "0x" substr(s, RSTART, RLENGTH) "L"
         remainder = substr(s, RSTART + RLENGTH)
         if(match(remainder, /[.][.][0-9A-F]+/))
         {
            second = "0x" substr(remainder, RSTART + 2, RLENGTH - 2) "L"
         }
         else
         {
            # If single codepoint insert the value as start and end of range
            second = first
         }
      }
      range = first ", " second
   }
   return(range)
}


# ==============================================================================
# Extract syllable type field

function extract_syllable_type(s) \
{
   st = ""
   # Check for missing data
   if("" != s)
   {
      # Extract syllable type
      if(match(s, /[LVT]+/))
      {
         st = substr(s, RSTART, RLENGTH)
      }
   }
   return(st)
}


# EOF
