# ==============================================================================
# Extract normalization test data (for NFC) from Unicode data file
#
# Copyright (c) 2016 by the developers. See the LICENSE file for details.


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("%s\n", "/* Unicode NFC test data created by build1.mk */")
   printf("static const long int  uc_test_nfc_table[] =\n{\n")
   printf("%s", "   /* Codepoint value -2 is the field separator, ")
   printf("%s\n", "-3 terminates a record */")
}

END \
{
   printf("   /* Codepoint value -1 is the end marker */\n")
   printf("   -1L\n")
   printf("};\n")
   printf("\n\n/* EOF */\n")
}


# ==============================================================================
# Ignore comment lines

/^[#@]/ \
{
   next
}


# ==============================================================================
# Process lines

{
   # Extract codepoint sequence from 1st field
   #printf("%s ; %s\n", $1, $2)
   src = extract_sequence($1)
   nfc = extract_sequence($2)
   nfd = extract_sequence($3)
   if("" == src || "" == nfc || "" == nfd)
   {
      #printf("Error\n")
      exit(1)
   }
   else
   {
      printf("   %s, -2L, %s, -2L, %s, -3L,\n", src, nfc, nfd)
   }
   next
}


# ==============================================================================
# Extract codepoint sequence

function extract_sequence(s) \
{
   seq = ""
   if("" != s)
   {
      remainder = s
      # Extract next codepoint
      while(match(remainder, /[0-9A-F]+/))
      {
         found = substr(remainder, RSTART, RLENGTH)
         #printf("Found: %s\n", found)
         if("" == seq)
         {
            seq = "0x" found "L"
         }
         else
         {
            seq = seq ", " "0x" found "L"
         }
         remainder = substr(remainder, RSTART + RLENGTH)
         if("" == remainder)
         {
            break
         }
      }
   }
   return(seq)
}


# EOF
