Update Files

2025-01-22 16:18:30 +01:00
parent ed4603cf95
commit a36294b518
16718 changed files with 2960346 additions and 0 deletions
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateCommon.py
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateCommon.py
@ -0,0 +1,355 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This file is a Python module containing common lists and functions for the
+# GenerateXXX scripts that create various.c and .h files from Unicode data
+# files. It was created as part of a re-organizaton of these scripts in
+# December 2021.
+
+
+import re
+
+
+# ---------------------------------------------------------------------------
+#                             DATA LISTS
+# ---------------------------------------------------------------------------
+
+# BIDI classes in the DerivedBidiClass.txt file, with comments.
+
+bidi_classes = [
+  'AL',  'Arabic letter',
+  'AN',  'Arabic number',
+  'B',   'Paragraph separator',
+  'BN',  'Boundary neutral',
+  'CS',  'Common separator',
+  'EN',  'European number',
+  'ES',  'European separator',
+  'ET',  'European terminator',
+  'FSI', 'First strong isolate',
+  'L',   'Left to right',
+  'LRE', 'Left to right embedding',
+  'LRI', 'Left to right isolate',
+  'LRO', 'Left to right override',
+  'NSM', 'Non-spacing mark',
+  'ON',  'Other neutral',
+  'PDF', 'Pop directional format',
+  'PDI', 'Pop directional isolate',
+  'R',   'Right to left',
+  'RLE', 'Right to left embedding',
+  'RLI', 'Right to left isolate',
+  'RLO', 'Right to left override',
+  'S',   'Segment separator',
+  'WS',  'White space'
+  ]
+
+# Particular category property names, with comments. NOTE: If ever this list
+# is changed, the table called "catposstab" in the pcre2_auto_possess.c file
+# must be edited to keep in step.
+
+category_names = [
+  'Cc', 'Control',
+  'Cf', 'Format',
+  'Cn', 'Unassigned',
+  'Co', 'Private use',
+  'Cs', 'Surrogate',
+  'Ll', 'Lower case letter',
+  'Lm', 'Modifier letter',
+  'Lo', 'Other letter',
+  'Lt', 'Title case letter',
+  'Lu', 'Upper case letter',
+  'Mc', 'Spacing mark',
+  'Me', 'Enclosing mark',
+  'Mn', 'Non-spacing mark',
+  'Nd', 'Decimal number',
+  'Nl', 'Letter number',
+  'No', 'Other number',
+  'Pc', 'Connector punctuation',
+  'Pd', 'Dash punctuation',
+  'Pe', 'Close punctuation',
+  'Pf', 'Final punctuation',
+  'Pi', 'Initial punctuation',
+  'Po', 'Other punctuation',
+  'Ps', 'Open punctuation',
+  'Sc', 'Currency symbol',
+  'Sk', 'Modifier symbol',
+  'Sm', 'Mathematical symbol',
+  'So', 'Other symbol',
+  'Zl', 'Line separator',
+  'Zp', 'Paragraph separator',
+  'Zs', 'Space separator'
+  ]
+
+# The Extended_Pictographic property is not found in the file where all the
+# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt
+# file, but we list it here so that the name has the correct index value.
+
+break_properties = [
+  'CR',                    ' 0',
+  'LF',                    ' 1',
+  'Control',               ' 2',
+  'Extend',                ' 3',
+  'Prepend',               ' 4',
+  'SpacingMark',           ' 5',
+  'L',                     ' 6 Hangul syllable type L',
+  'V',                     ' 7 Hangul syllable type V',
+  'T',                     ' 8 Hangul syllable type T',
+  'LV',                    ' 9 Hangul syllable type LV',
+  'LVT',                   '10 Hangul syllable type LVT',
+  'Regional_Indicator',    '11',
+  'Other',                 '12',
+  'ZWJ',                   '13',
+  'Extended_Pictographic', '14'
+  ]
+
+# List of files from which the names of Boolean properties are obtained, along
+# with a list of regex patterns for properties to be ignored, and a list of
+# extra pattern names to add.
+
+bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt']
+bool_propsignore = [r'^Other_', r'^Hyphen$']
+bool_propsextras = ['ASCII', 'Bidi_Mirrored']
+
+
+# ---------------------------------------------------------------------------
+#                   GET BOOLEAN PROPERTY NAMES
+# ---------------------------------------------------------------------------
+
+# Get a list of Boolean property names from a number of files.
+
+def getbpropslist():
+  bplist = []
+  bplast = ""
+
+  for filename in bool_propsfiles:
+    try:
+      file = open('Unicode.tables/' + filename, 'r')
+    except IOError:
+      print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
+      sys.exit(1)
+
+    for line in file:
+      line = re.sub(r'#.*', '', line)
+      data = list(map(str.strip, line.split(';')))
+      if len(data) <= 1 or data[1] == bplast:
+        continue
+      bplast = data[1]
+      for pat in bool_propsignore:
+        if re.match(pat, bplast) != None:
+          break
+      else:
+        bplist.append(bplast)
+
+    file.close()
+
+  bplist.extend(bool_propsextras)
+  bplist.sort()
+  return bplist
+
+bool_properties = getbpropslist()
+bool_props_list_item_size = (len(bool_properties) + 31) // 32
+
+
+
+# ---------------------------------------------------------------------------
+#                  COLLECTING PROPERTY NAMES AND ALIASES
+# ---------------------------------------------------------------------------
+
+script_names = ['Unknown']
+abbreviations = {}
+
+def collect_property_names():
+  global script_names
+  global abbreviations
+
+  names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_]+) #')
+
+  last_script_name = ""
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None or match_obj.group(1) == last_script_name:
+        continue
+
+      last_script_name = match_obj.group(1)
+      script_names.append(last_script_name)
+
+  # Sometimes there is comment in the line
+  # so splitting around semicolon is not enough
+  value_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_ ]+))?')
+
+  with open("Unicode.tables/PropertyValueAliases.txt") as f:
+    for line in f:
+      match_obj = value_alias_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      if match_obj.group(1) == "sc":
+        if match_obj.group(2) == match_obj.group(3):
+          abbreviations[match_obj.group(3)] = ()
+        elif match_obj.group(4) == None:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2),)
+        else:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4))
+
+  # We can also collect Boolean property abbreviations into the same dictionary
+
+  bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?')
+  with open("Unicode.tables/PropertyAliases.txt") as f:
+    for line in f:
+      match_obj = bin_alias_re.match(line)
+      if match_obj == None:
+        continue
+
+      if match_obj.group(2) in bool_properties:
+        if match_obj.group(3) == None:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1),)
+        else:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3))
+
+collect_property_names()
+
+
+
+# ---------------------------------------------------------------------------
+#                      REORDERING SCRIPT NAMES
+# ---------------------------------------------------------------------------
+
+script_abbrevs = []
+
+def reorder_scripts():
+  global script_names
+  global script_abbrevs
+  global abbreviations
+
+  for name in script_names:
+    abbrevs = abbreviations[name]
+    script_abbrevs.append(name if len(abbrevs) == 0 else abbrevs[0])
+
+  extended_script_abbrevs = set()
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
+
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      for name in match_obj.group(1).split(" "):
+        extended_script_abbrevs.add(name)
+
+  new_script_names = []
+  new_script_abbrevs = []
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev not in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  script_names = new_script_names
+  script_abbrevs = new_script_abbrevs
+
+reorder_scripts()
+script_list_item_size = (script_names.index('Unknown') + 31) // 32
+
+
+# ---------------------------------------------------------------------------
+#                         DERIVED LISTS
+# ---------------------------------------------------------------------------
+
+# Create general character property names from the first letters of the
+# particular categories.
+
+gcn_set = set(category_names[i][0] for i in range(0, len(category_names), 2))
+general_category_names = list(gcn_set)
+general_category_names.sort()
+
+
+# ---------------------------------------------------------------------------
+#                           FUNCTIONS
+# ---------------------------------------------------------------------------
+
+import sys
+
+# Open an output file, using the command's argument or a default. Write common
+# preliminary header information.
+
+def open_output(default):
+  if len(sys.argv) > 2:
+    print('** Too many arguments: just give a file name')
+    sys.exit(1)
+  if len(sys.argv) == 2:
+    output_name = sys.argv[1]
+  else:
+    output_name = default
+  try:
+    file = open(output_name, "w")
+  except IOError:
+    print ("** Couldn't open %s" % output_name)
+    sys.exit(1)
+
+  script_name = sys.argv[0]
+  i = script_name.rfind('/')
+  if i >= 0:
+    script_name = script_name[i+1:]
+
+  file.write("""\
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+""")
+
+  file.write("Instead, modify the maint/%s script and run it to generate\n"
+  "a new version of this code.\n\n" % script_name)
+
+  file.write("""\
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+\n""")
+  return file
+
+# End of UcpCommon.py
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateTest26.py
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateTest26.py
@ -0,0 +1,188 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+#
+# This file auto-generates unicode property tests and their expected output.
+# It is recommended to re-run this generator after the unicode files are
+# updated. The names of the generated files are `testinput26` and `testoutput26`
+
+import re
+import sys
+
+from GenerateCommon import \
+  script_names, \
+  script_abbrevs
+
+def write_both(text):
+  input_file.write(text)
+  output_file.write(text)
+
+def to_string_char(ch_idx):
+  if ch_idx < 128:
+    if ch_idx < 16:
+      return "\\x{0%x}" % ch_idx
+    if ch_idx >= 32:
+      return chr(ch_idx)
+  return "\\x{%x}" % ch_idx
+
+output_directory = ""
+
+if len(sys.argv) > 2:
+  print('** Too many arguments: just give a directory name')
+  sys.exit(1)
+if len(sys.argv) == 2:
+  output_directory = sys.argv[1]
+  if not output_directory.endswith("/"):
+    output_directory += "/"
+
+try:
+  input_file = open(output_directory + "testinput26", "w")
+  output_file = open(output_directory + "testoutput26", "w")
+except IOError:
+  print ("** Couldn't open output files")
+  sys.exit(1)
+
+write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")
+
+# ---------------------------------------------------------------------------
+#                      UNICODE SCRIPT EXTENSION TESTS
+# ---------------------------------------------------------------------------
+
+write_both("# Unicode Script Extension tests.\n\n")
+
+def gen_script_tests():
+  script_data = [None] * len(script_names)
+  char_data = [None] * 0x110000
+
+  property_re = re.compile("^([0-9A-F]{4,6})(?:\\.\\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+) #")
+  prev_name = ""
+  script_idx = -1
+
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      name = match_obj.group(3)
+      if name != prev_name:
+        script_idx = script_names.index(name)
+        prev_name = name
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      char_data[low] = name
+
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+        for idx in range(low + 1, high + 1):
+           char_data[idx] = name
+
+      if script_data[script_idx] == None:
+        script_data[script_idx] = [low, None, None, None, None]
+      script_data[script_idx][1] = high
+
+  extended_script_indicies = {}
+
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+
+      for abbrev in match_obj.group(3).split(" "):
+        if abbrev not in extended_script_indicies:
+          idx = script_abbrevs.index(abbrev)
+          extended_script_indicies[abbrev] = idx
+          rec = script_data[idx]
+          rec[2] = low
+          rec[3] = high
+        else:
+          idx = extended_script_indicies[abbrev]
+          rec = script_data[idx]
+          if rec[2] > low:
+            rec[2] = low
+          if rec[3] < high:
+            rec[3] = high
+
+        if rec[4] == None:
+          name = script_names[idx]
+          for idx in range(low, high + 1):
+            if char_data[idx] != name:
+              rec[4] = idx
+              break
+
+  long_property_name = False
+
+  for idx, rec in enumerate(script_data):
+    script_name = script_names[idx]
+
+    if script_name == "Unknown":
+      continue
+
+    script_abbrev = script_abbrevs[idx]
+
+    write_both("# Base script check\n")
+    write_both("/^\\p{sc=%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(rec[0]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[0]))
+    write_both("\n")
+
+    write_both("/^\\p{Script=%s}/utf\n" % script_abbrev)
+    write_both("  %s\n" % to_string_char(rec[1]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[1]))
+    write_both("\n")
+
+    if rec[2] != None:
+      property_name = "scx"
+      if long_property_name:
+        property_name = "Script_Extensions"
+
+      write_both("# Script extension check\n")
+      write_both("/^\\p{%s}/utf\n" % script_name)
+      write_both("  %s\n" % to_string_char(rec[2]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[2]))
+      write_both("\n")
+
+      write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev))
+      write_both("  %s\n" % to_string_char(rec[3]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[3]))
+      write_both("\n")
+
+      long_property_name = not long_property_name
+
+      if rec[4] != None:
+        write_both("# Script extension only character\n")
+        write_both("/^\\p{%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write(" 0: %s\n" % to_string_char(rec[4]))
+        write_both("\n")
+
+        write_both("/^\\p{sc=%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write("No match\n")
+        write_both("\n")
+      else:
+        print("External character has not found for %s" % script_name)
+
+    high = rec[1]
+    if rec[3] != None and rec[3] > rec[1]:
+      high = rec[3]
+    write_both("# Character not in script\n")
+    write_both("/^\\p{%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(high + 1))
+    output_file.write("No match\n")
+    write_both("\n")
+
+
+gen_script_tests()
+
+write_both("# End of testinput26\n")
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcd.py
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcd.py
@ -0,0 +1,923 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+#
+# This script generates the pcre2_ucd.c file from Unicode data files. This is
+# the compressed Unicode property data used by PCRE2. The script was created in
+# December 2021 as part of the Unicode data generation refactoring. It is
+# basically a re-working of the MultiStage2.py script that was submitted to the
+# PCRE project by Peter Kankowski in 2008 as part of a previous upgrading of
+# Unicode property support. A number of extensions have since been added. The
+# main difference in the 2021 upgrade (apart from comments and layout) is that
+# the data tables (e.g. list of script names) are now listed in or generated by
+# a separate Python module that is shared with the other Generate scripts.
+#
+# This script must be run in the "maint" directory. It requires the following
+# Unicode data tables: BidiMirrorring.txt, CaseFolding.txt,
+# DerivedBidiClass.txt, DerivedCoreProperties.txt, DerivedGeneralCategory.txt,
+# GraphemeBreakProperty.txt, PropList.txt, PropertyAliases.txt,
+# PropertyValueAliases.txt, ScriptExtensions.txt, Scripts.txt, and
+# emoji-data.txt. These must be in the Unicode.tables subdirectory.
+#
+# The emoji-data.txt file is found in the "emoji" subdirectory even though it
+# is technically part of a different (but coordinated) standard as shown
+# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"),
+# for example:
+#
+# http://unicode.org/Public/emoji/13.0/ReadMe.txt
+#
+# DerivedBidiClass.txt and DerivedGeneralCategory.txt are in the "extracted"
+# subdirectory of the Unicode database (UCD) on the Unicode web site;
+# GraphemeBreakProperty.txt is in the "auxiliary" subdirectory. The other files
+# are in the top-level UCD directory.
+#
+# -----------------------------------------------------------------------------
+# Minor modifications made to the original script:
+#  Added #! line at start
+#  Removed tabs
+#  Made it work with Python 2.4 by rewriting two statements that needed 2.5
+#  Consequent code tidy
+#  Adjusted data file names to take from the Unicode.tables directory
+#  Adjusted global table names by prefixing _pcre_.
+#  Commented out stuff relating to the casefolding table, which isn't used;
+#    removed completely in 2012.
+#  Corrected size calculation
+#  Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed.
+#  Update for PCRE2: name changes, and SUPPORT_UCP is abolished.
+#
+# Major modifications made to the original script:
+#  Added code to add a grapheme break property field to records.
+#
+#  Added code to search for sets of more than two characters that must match
+#  each other caselessly. A new table is output containing these sets, and
+#  offsets into the table are added to the main output records. This new
+#  code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer
+#  used.
+#
+#  Update for Python3:
+#    . Processed with 2to3, but that didn't fix everything
+#    . Changed string.strip to str.strip
+#    . Added encoding='utf-8' to the open() call
+#    . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is
+#        required and the result of the division is a float
+#
+#  Added code to scan the emoji-data.txt file to find the Extended Pictographic
+#  property, which is used by PCRE2 as a grapheme breaking property. This was
+#  done when updating to Unicode 11.0.0 (July 2018).
+#
+#  Added code to add a Script Extensions field to records. This has increased
+#  their size from 8 to 12 bytes, only 10 of which are currently used.
+#
+#  Added code to add a bidi class field to records by scanning the
+#  DerivedBidiClass.txt and PropList.txt files. This uses one of the two spare
+#  bytes, so now 11 out of 12 are in use.
+#
+# 01-March-2010:     Updated list of scripts for Unicode 5.2.0
+# 30-April-2011:     Updated list of scripts for Unicode 6.0.0
+#     July-2012:     Updated list of scripts for Unicode 6.1.0
+# 20-August-2012:    Added scan of GraphemeBreakProperty.txt and added a new
+#                      field in the record to hold the value. Luckily, the
+#                      structure had a hole in it, so the resulting table is
+#                      not much bigger than before.
+# 18-September-2012: Added code for multiple caseless sets. This uses the
+#                      final hole in the structure.
+# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
+# 13-May-2014:       Updated for PCRE2
+# 03-June-2014:      Updated for Python 3
+# 20-June-2014:      Updated for Unicode 7.0.0
+# 12-August-2014:    Updated to put Unicode version into the file
+# 19-June-2015:      Updated for Unicode 8.0.0
+# 02-July-2017:      Updated for Unicode 10.0.0
+# 03-July-2018:      Updated for Unicode 11.0.0
+# 07-July-2018:      Added code to scan emoji-data.txt for the Extended
+#                      Pictographic property.
+# 01-October-2018:   Added the 'Unknown' script name
+# 03-October-2018:   Added new field for Script Extensions
+# 27-July-2019:      Updated for Unicode 12.1.0
+# 10-March-2020:     Updated for Unicode 13.0.0
+# PCRE2-10.39:       Updated for Unicode 14.0.0
+# 05-December-2021:  Added code to scan DerivedBidiClass.txt for bidi class,
+#                      and also PropList.txt for the Bidi_Control property
+# 19-December-2021:  Reworked script extensions lists to be bit maps instead
+#                      of zero-terminated lists of script numbers.
+# ----------------------------------------------------------------------------
+#
+# Changes to the refactored script:
+#
+# 26-December-2021:  Refactoring completed
+# 10-January-2022:   Addition of general Boolean property support
+# 12-January-2022:   Merge scriptx and bidiclass fields
+# 14-January-2022:   Enlarge Boolean property offset to 12 bits
+#
+# ----------------------------------------------------------------------------
+#
+#
+# The main tables generated by this script are used by macros defined in
+# pcre2_internal.h. They look up Unicode character properties using short
+# sequences of code that contains no branches, which makes for greater speed.
+#
+# Conceptually, there is a table of records (of type ucd_record), one for each
+# Unicode character. Each record contains the script number, script extension
+# value, character type, grapheme break type, offset to caseless matching set,
+# offset to the character's other case, the bidi class, and offset to bitmap of
+# Boolean properties.
+#
+# A real table covering all Unicode characters would be far too big. It can be
+# efficiently compressed by observing that many characters have the same
+# record, and many blocks of characters (taking 128 characters in a block) have
+# the same set of records as other blocks. This leads to a 2-stage lookup
+# process.
+#
+# This script constructs seven tables. The ucd_caseless_sets table contains
+# lists of characters that all match each other caselessly. Each list is
+# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than
+# any valid character. The first list is empty; this is used for characters
+# that are not part of any list.
+#
+# The ucd_digit_sets table contains the code points of the '9' characters in
+# each set of 10 decimal digits in Unicode. This is used to ensure that digits
+# in script runs all come from the same set. The first element in the vector
+# contains the number of subsequent elements, which are in ascending order.
+#
+# Scripts are partitioned into two groups. Scripts that appear in at least one
+# character's script extension list come first, followed by "Unknown" and then
+# all the rest. This sorting is done automatically in the GenerateCommon.py
+# script. A script's number is its index in the script_names list.
+#
+# The ucd_script_sets table contains bitmaps that represent lists of scripts
+# for Script Extensions properties. Each bitmap consists of a fixed number of
+# unsigned 32-bit numbers, enough to allocate a bit for every script that is
+# used in any character's extension list, that is, enough for every script
+# whose number is less than ucp_Unknown. A character's script extension value
+# in its ucd record is an offset into the ucd_script_sets vector. The first
+# bitmap has no bits set; characters that have no script extensions have zero
+# as their script extensions value so that they use this map.
+#
+# The ucd_boolprop_sets table contains bitmaps that represent lists of Boolean
+# properties. Each bitmap consists of a fixed number of unsigned 32-bit
+# numbers, enough to allocate a bit for each supported Boolean property.
+#
+# The ucd_records table contains one instance of every unique character record
+# that is required. The ucd_stage1 table is indexed by a character's block
+# number, which is the character's code point divided by 128, since 128 is the
+# size of each block. The result of a lookup in ucd_stage1 a "virtual" block
+# number.
+#
+# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by
+# the offset of a character within its own block, and the result is the index
+# number of the required record in the ucd_records vector.
+#
+# The following examples are correct for the Unicode 14.0.0 database. Future
+# updates may make change the actual lookup values.
+#
+# Example: lowercase "a" (U+0061) is in block 0
+#          lookup 0 in stage1 table yields 0
+#          lookup 97 (0x61) in the first table in stage2 yields 35
+#          record 35 is { 0, 5, 12, 0, -32, 18432, 44 }
+#             0 = ucp_Latin   => Latin script
+#             5 = ucp_Ll      => Lower case letter
+#            12 = ucp_gbOther => Grapheme break property "Other"
+#             0               => Not part of a caseless set
+#           -32 (-0x20)       => Other case is U+0041
+#         18432 = 0x4800      => Combined Bidi class + script extension values
+#            44               => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#             9 = ucp_bidiL   => Bidi class left-to-right
+#             0               => No special script extension property
+#
+# Almost all lowercase latin characters resolve to the same record. One or two
+# are different because they are part of a multi-character caseless set (for
+# example, k, K and the Kelvin symbol are such a set).
+#
+# Example: hiragana letter A (U+3042) is in block 96 (0x60)
+#          lookup 96 in stage1 table yields 93
+#          lookup 66 (0x42) in table 93 in stage2 yields 819
+#          record 819 is { 20, 7, 12, 0, 0, 18432, 82 }
+#            20 = ucp_Hiragana => Hiragana script
+#             7 = ucp_Lo       => Other letter
+#            12 = ucp_gbOther  => Grapheme break property "Other"
+#             0                => Not part of a caseless set
+#             0                => No other case
+#         18432 = 0x4800       => Combined Bidi class + script extension values
+#            82                => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#             9 = ucp_bidiL   => Bidi class left-to-right
+#             0               => No special script extension property
+#
+# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39)
+#          lookup 57 in stage1 table yields 55
+#          lookup 80 (0x50) in table 55 in stage2 yields 621
+#          record 621 is { 84, 12, 3, 0, 0, 26762, 96 }
+#            84 = ucp_Inherited => Script inherited from predecessor
+#            12 = ucp_Mn        => Non-spacing mark
+#             3 = ucp_gbExtend  => Grapheme break property "Extend"
+#             0                 => Not part of a caseless set
+#             0                 => No other case
+#         26762 = 0x688A        => Combined Bidi class + script extension values
+#            96                 => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#            13 = ucp_bidiNSM   => Bidi class non-spacing mark
+#           138                 => Script Extension list offset = 138
+#
+# At offset 138 in the ucd_script_sets vector we find a bitmap with bits 1, 8,
+# 18, and 47 set. This means that this character is expected to be used with
+# any of those scripts, which are Bengali, Devanagari, Kannada, and Grantha.
+#
+#  Philip Hazel, last updated 14 January 2022.
+##############################################################################
+
+
+# Import standard modules
+
+import re
+import string
+import sys
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  bidi_classes, \
+  bool_properties, \
+  bool_propsfiles, \
+  bool_props_list_item_size, \
+  break_properties, \
+  category_names, \
+  general_category_names, \
+  script_abbrevs, \
+  script_list_item_size, \
+  script_names, \
+  open_output
+
+# Some general parameters
+
+MAX_UNICODE = 0x110000
+NOTACHAR = 0xffffffff
+
+
+# ---------------------------------------------------------------------------
+#                         DEFINE FUNCTIONS
+# ---------------------------------------------------------------------------
+
+
+# Parse a line of Scripts.txt, GraphemeBreakProperty.txt, DerivedBidiClass.txt
+# or DerivedGeneralCategory.txt
+
+def make_get_names(enum):
+  return lambda chardata: enum.index(chardata[1])
+
+
+# Parse a line of CaseFolding.txt
+
+def get_other_case(chardata):
+  if chardata[1] == 'C' or chardata[1] == 'S':
+    return int(chardata[2], 16) - int(chardata[0], 16)
+  return 0
+
+
+# Parse a line of ScriptExtensions.txt
+
+def get_script_extension(chardata):
+  global last_script_extension
+
+  offset = len(script_lists) * script_list_item_size
+  if last_script_extension == chardata[1]:
+    return offset - script_list_item_size
+
+  last_script_extension = chardata[1]
+  script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' ')))
+  return offset
+
+
+# Read a whole table in memory, setting/checking the Unicode version
+
+def read_table(file_name, get_value, default_value):
+  global unicode_version
+
+  f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name)
+  file_base = f.group(1)
+  version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$"
+  file = open(file_name, 'r', encoding='utf-8')
+  f = re.match(version_pat, file.readline())
+  version = f.group(1)
+  if unicode_version == "":
+    unicode_version = version
+  elif unicode_version != version:
+    print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr)
+
+  table = [default_value] * MAX_UNICODE
+  for line in file:
+    line = re.sub(r'#.*', '', line)
+    chardata = list(map(str.strip, line.split(';')))
+    if len(chardata) <= 1:
+      continue
+    value = get_value(chardata)
+    m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
+    char = int(m.group(1), 16)
+    if m.group(3) is None:
+      last = char
+    else:
+      last = int(m.group(3), 16)
+    for i in range(char, last + 1):
+      # It is important not to overwrite a previously set value because in the
+      # CaseFolding file there are lines to be ignored (returning the default
+      # value of 0) which often come after a line which has already set data.
+      if table[i] == default_value:
+        table[i] = value
+  file.close()
+  return table
+
+
+# Get the smallest possible C language type for the values in a table
+
+def get_type_size(table):
+  type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4),
+    ("signed char", 1), ("int16_t", 2), ("int32_t", 4)]
+  limits = [(0, 255), (0, 65535), (0, 4294967295), (-128, 127),
+    (-32768, 32767), (-2147483648, 2147483647)]
+  minval = min(table)
+  maxval = max(table)
+  for num, (minlimit, maxlimit) in enumerate(limits):
+    if minlimit <= minval and maxval <= maxlimit:
+      return type_size[num]
+  raise OverflowError("Too large to fit into C types")
+
+
+# Get the total size of a list of tables
+
+def get_tables_size(*tables):
+  total_size = 0
+  for table in tables:
+    type, size = get_type_size(table)
+    total_size += size * len(table)
+  return total_size
+
+
+# Compress a table into the two stages
+
+def compress_table(table, block_size):
+  blocks = {} # Dictionary for finding identical blocks
+  stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table)
+  stage2 = [] # Stage 2 table contains the blocks with property values
+  table = tuple(table)
+  for i in range(0, len(table), block_size):
+    block = table[i:i+block_size]
+    start = blocks.get(block)
+    if start is None:
+      # Allocate a new block
+      start = len(stage2) / block_size
+      stage2 += block
+      blocks[block] = start
+    stage1.append(start)
+  return stage1, stage2
+
+
+# Output a table
+
+def write_table(table, table_name, block_size = None):
+  type, size = get_type_size(table)
+  ELEMS_PER_LINE = 16
+
+  s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table))
+  if block_size:
+    s += ", block = %d" % block_size
+  f.write(s + " */\n")
+  table = tuple(table)
+  if block_size is None:
+    fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */\n"
+    mult = MAX_UNICODE / len(table)
+    for i in range(0, len(table), ELEMS_PER_LINE):
+      f.write(fmt % (table[i:i+ELEMS_PER_LINE] + (int(i * mult),)))
+  else:
+    if block_size > ELEMS_PER_LINE:
+      el = ELEMS_PER_LINE
+    else:
+      el = block_size
+    fmt = "%3d," * el + "\n"
+    if block_size > ELEMS_PER_LINE:
+      fmt = fmt * int(block_size / ELEMS_PER_LINE)
+    for i in range(0, len(table), block_size):
+      f.write(("\n/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size]))
+  f.write("};\n\n")
+
+
+# Extract the unique combinations of properties into records
+
+def combine_tables(*tables):
+  records = {}
+  index = []
+  for t in zip(*tables):
+    i = records.get(t)
+    if i is None:
+      i = records[t] = len(records)
+    index.append(i)
+  return index, records
+
+
+# Create a record struct
+
+def get_record_size_struct(records):
+  size = 0
+  structure = 'typedef struct {\n'
+  for i in range(len(records[0])):
+    record_slice = [record[i] for record in records]
+    slice_type, slice_size = get_type_size(record_slice)
+    # add padding: round up to the nearest power of slice_size
+    size = (size + slice_size - 1) & -slice_size
+    size += slice_size
+    structure += '%s property_%d;\n' % (slice_type, i)
+
+  # round up to the first item of the next structure in array
+  record_slice = [record[0] for record in records]
+  slice_type, slice_size = get_type_size(record_slice)
+  size = (size + slice_size - 1) & -slice_size
+
+  structure += '} ucd_record;\n*/\n'
+  return size, structure
+
+
+# Write records
+
+def write_records(records, record_size):
+  f.write('const ucd_record PRIV(ucd_records)[] = { ' + \
+    '/* %d bytes, record size %d */\n' % (len(records) * record_size, record_size))
+  records = list(zip(list(records.keys()), list(records.values())))
+  records.sort(key = lambda x: x[1])
+  for i, record in enumerate(records):
+    f.write(('  {' + '%6d, ' * len(record[0]) + '}, /* %3d */\n') % (record[0] + (i,)))
+  f.write('};\n\n')
+
+
+# Write a bit set
+
+def write_bitsets(list, item_size):
+  for d in list:
+    bitwords = [0] * item_size
+    for idx in d:
+      bitwords[idx // 32] |= 1 << (idx & 31)
+    s = " "
+    for x in bitwords:
+      f.write("%s" % s)
+      s = ", "
+      f.write("0x%08xu" % x)
+    f.write(",\n")
+  f.write("};\n\n")
+
+
+# ---------------------------------------------------------------------------
+# This bit of code must have been useful when the original script was being
+# developed. Retain it just in case it is ever needed again.
+
+# def test_record_size():
+#   tests = [ \
+#     ( [(3,), (6,), (6,), (1,)], 1 ), \
+#     ( [(300,), (600,), (600,), (100,)], 2 ), \
+#     ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \
+#     ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \
+#     ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
+#     ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
+#     ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \
+#     ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \
+#   ]
+#   for test in tests:
+#     size, struct = get_record_size_struct(test[0])
+#     assert(size == test[1])
+# test_record_size()
+# ---------------------------------------------------------------------------
+
+
+
+# ---------------------------------------------------------------------------
+#                       MAIN CODE FOR CREATING TABLES
+# ---------------------------------------------------------------------------
+
+unicode_version = ""
+
+# Some of the tables imported from GenerateCommon.py have alternate comment
+# strings for use by GenerateUcpHeader. The comments are not wanted here, so
+# remove them.
+
+bidi_classes = bidi_classes[::2]
+break_properties = break_properties[::2]
+category_names = category_names[::2]
+
+# Create the various tables from Unicode data files
+
+script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown'))
+category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
+break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_properties), break_properties.index('Other'))
+other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
+bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', make_get_names(bidi_classes), bidi_classes.index('L'))
+
+# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now
+# we need to find the Extended_Pictographic property for emoji characters. This
+# can be set as an additional grapheme break property, because the default for
+# all the emojis is "other". We scan the emoji-data.txt file and modify the
+# break-props table.
+
+file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8')
+for line in file:
+  line = re.sub(r'#.*', '', line)
+  chardata = list(map(str.strip, line.split(';')))
+  if len(chardata) <= 1:
+    continue
+  if chardata[1] != "Extended_Pictographic":
+    continue
+  m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
+  char = int(m.group(1), 16)
+  if m.group(3) is None:
+    last = char
+  else:
+    last = int(m.group(3), 16)
+  for i in range(char, last + 1):
+    if break_props[i] != break_properties.index('Other'):
+      print("WARNING: Emoji 0x%x has break property %s, not 'Other'",
+        i, break_properties[break_props[i]], file=sys.stderr)
+    break_props[i] = break_properties.index('Extended_Pictographic')
+file.close()
+
+# Handle script extensions. The get_script_extesion() function maintains a
+# list of unique bitmaps representing lists of scripts, returning the offset
+# in that list. Initialize the list with an empty set, which is used for
+# characters that have no script extensions.
+
+script_lists = [[]]
+last_script_extension = ""
+scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
+
+for idx in range(len(scriptx_bidi_class)):
+  scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
+bidi_class = None
+
+# Find the Boolean properties of each character. This next bit of magic creates
+# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
+# the *same* list, which is not what we want.
+
+bprops = [[] for _ in range(MAX_UNICODE)]
+
+# Collect the properties from the various files
+
+for filename in bool_propsfiles:
+  try:
+    file = open('Unicode.tables/' + filename, 'r')
+  except IOError:
+    print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
+    sys.exit(1)
+
+  for line in file:
+    line = re.sub(r'#.*', '', line)
+    data = list(map(str.strip, line.split(';')))
+    if len(data) <= 1:
+      continue
+
+    try:
+      ix = bool_properties.index(data[1])
+    except ValueError:
+      continue
+
+    m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', data[0])
+    char = int(m.group(1), 16)
+    if m.group(3) is None:
+      last = char
+    else:
+      last = int(m.group(3), 16)
+
+    for i in range(char, last + 1):
+      bprops[i].append(ix)
+
+  file.close()
+
+# The ASCII property isn't listed in any files, but it is easy enough to add
+# it manually.
+
+ix = bool_properties.index("ASCII")
+for i in range(128):
+  bprops[i].append(ix)
+
+# The Bidi_Mirrored property isn't listed in any property files. We have to
+# deduce it from the file that lists the mirrored characters.
+
+ix = bool_properties.index("Bidi_Mirrored")
+
+try:
+  file = open('Unicode.tables/BidiMirroring.txt', 'r')
+except IOError:
+  print(f"** Couldn't open {'Unicode.tables/BidiMirroring.txt'}\n")
+  sys.exit(1)
+
+for line in file:
+  line = re.sub(r'#.*', '', line)
+  data = list(map(str.strip, line.split(';')))
+  if len(data) <= 1:
+    continue
+  c = int(data[0], 16)
+  bprops[c].append(ix)
+
+file.close()
+
+# Scan each character's boolean property list and created a list of unique
+# lists, at the same time, setting the index in that list for each property in
+# the bool_props vector.
+
+bool_props = [0] * MAX_UNICODE
+bool_props_lists = [[]]
+
+for c in range(MAX_UNICODE):
+  s = set(bprops[c])
+  for i in range(len(bool_props_lists)):
+    if s == set(bool_props_lists[i]):
+      break;
+  else:
+    bool_props_lists.append(bprops[c])
+    i += 1
+
+  bool_props[c] = i * bool_props_list_item_size
+
+# This block of code was added by PH in September 2012. It scans the other_case
+# table to find sets of more than two characters that must all match each other
+# caselessly. Later in this script a table of these sets is written out.
+# However, we have to do this work here in order to compute the offsets in the
+# table that are inserted into the main table.
+
+# The CaseFolding.txt file lists pairs, but the common logic for reading data
+# sets only one value, so first we go through the table and set "return"
+# offsets for those that are not already set.
+
+for c in range(MAX_UNICODE):
+  if other_case[c] != 0 and other_case[c + other_case[c]] == 0:
+    other_case[c + other_case[c]] = -other_case[c]
+
+# Now scan again and create equivalence sets.
+
+caseless_sets = []
+
+for c in range(MAX_UNICODE):
+  o = c + other_case[c]
+
+  # Trigger when this character's other case does not point back here. We
+  # now have three characters that are case-equivalent.
+
+  if other_case[o] != -other_case[c]:
+    t = o + other_case[o]
+
+    # Scan the existing sets to see if any of the three characters are already
+    # part of a set. If so, unite the existing set with the new set.
+
+    appended = 0
+    for s in caseless_sets:
+      found = 0
+      for x in s:
+        if x == c or x == o or x == t:
+          found = 1
+
+      # Add new characters to an existing set
+
+      if found:
+        found = 0
+        for y in [c, o, t]:
+          for x in s:
+            if x == y:
+              found = 1
+          if not found:
+            s.append(y)
+        appended = 1
+
+    # If we have not added to an existing set, create a new one.
+
+    if not appended:
+      caseless_sets.append([c, o, t])
+
+# End of loop looking for caseless sets.
+
+# Now scan the sets and set appropriate offsets for the characters.
+
+caseless_offsets = [0] * MAX_UNICODE
+
+offset = 1;
+for s in caseless_sets:
+  for x in s:
+    caseless_offsets[x] = offset
+  offset += len(s) + 1
+
+# End of block of code for creating offsets for caseless matching sets.
+
+
+# Combine all the tables
+
+table, records = combine_tables(script, category, break_props,
+  caseless_offsets, other_case, scriptx_bidi_class, bool_props)
+
+# Find the record size and create a string definition of the structure for
+# outputting as a comment.
+
+record_size, record_struct = get_record_size_struct(list(records.keys()))
+
+# Find the optimum block size for the two-stage table
+
+min_size = sys.maxsize
+for block_size in [2 ** i for i in range(5,10)]:
+  size = len(records) * record_size
+  stage1, stage2 = compress_table(table, block_size)
+  size += get_tables_size(stage1, stage2)
+  #print "/* block size %5d  => %5d bytes */" % (block_size, size)
+  if size < min_size:
+    min_size = size
+    min_stage1, min_stage2 = stage1, stage2
+    min_block_size = block_size
+
+
+# ---------------------------------------------------------------------------
+#                   MAIN CODE FOR WRITING THE OUTPUT FILE
+# ---------------------------------------------------------------------------
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucd.c")
+
+# Output this file's heading text
+
+f.write("""\
+/* This file contains tables of Unicode properties that are extracted from
+Unicode data files. See the comments at the start of maint/GenerateUcd.py for
+details.
+
+As well as being part of the PCRE2 library, this file is #included by the
+pcre2test program, which redefines the PRIV macro to change table names from
+_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present,
+just one of these tables is actually needed. When compiling the library, some
+headers are needed. */
+
+#ifndef PCRE2_PCRE2TEST
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+/* The tables herein are needed only when UCP support is built, and in PCRE2
+that happens automatically with UTF support. This module should not be
+referenced otherwise, so it should not matter whether it is compiled or not.
+However a comment was received about space saving - maybe the guy linked all
+the modules rather than using a library - so we include a condition to cut out
+the tables when not needed. But don't leave a totally empty module because some
+compilers barf at that. Instead, just supply some small dummy tables. */
+
+#ifndef SUPPORT_UNICODE
+const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}};
+const uint16_t PRIV(ucd_stage1)[] = {0};
+const uint16_t PRIV(ucd_stage2)[] = {0};
+const uint32_t PRIV(ucd_caseless_sets)[] = {0};
+#else
+\n""")
+
+# --- Output some variable heading stuff ---
+
+f.write("/* Total size: %d bytes, block size: %d. */\n\n" % (min_size, min_block_size))
+f.write('const char *PRIV(unicode_version) = "{}";\n\n'.format(unicode_version))
+
+f.write("""\
+/* When recompiling tables with a new Unicode version, please check the types
+in this structure definition with those in pcre2_internal.h (the actual field
+names will be different).
+\n""")
+
+f.write(record_struct)
+
+f.write("""
+/* If the 32-bit library is run in non-32-bit mode, character values greater
+than 0x10ffff may be encountered. For these we set up a special record. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+  ucp_Unknown,    /* script */
+  ucp_Cn,         /* type unassigned */
+  ucp_gbOther,    /* grapheme break property */
+  0,              /* case set */
+  0,              /* other case */
+  0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
+  0,              /* bool properties offset */
+  }};
+#endif
+\n""")
+
+# --- Output the table of caseless character sets ---
+
+f.write("""\
+/* This table contains lists of characters that are caseless sets of
+more than one character. Each list is terminated by NOTACHAR. */
+
+const uint32_t PRIV(ucd_caseless_sets)[] = {
+  NOTACHAR,
+""")
+
+for s in caseless_sets:
+  s = sorted(s)
+  for x in s:
+    f.write('  0x%04x,' % x)
+  f.write('  NOTACHAR,\n')
+f.write('};\n\n')
+
+# --- Other tables are not needed by pcre2test ---
+
+f.write("""\
+/* When #included in pcre2test, we don't need the table of digit sets, nor the
+the large main UCD tables. */
+
+#ifndef PCRE2_PCRE2TEST
+\n""")
+
+# --- Read Scripts.txt again for the sets of 10 digits. ---
+
+digitsets = []
+file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8')
+
+for line in file:
+  m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line)
+  if m is None:
+    continue
+  first = int(m.group(1),16)
+  last  = int(m.group(2),16)
+  if ((last - first + 1) % 10) != 0:
+    f.write("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last),
+      file=sys.stderr)
+  while first < last:
+    digitsets.append(first + 9)
+    first += 10
+file.close()
+digitsets.sort()
+
+f.write("""\
+/* This table lists the code points for the '9' characters in each set of
+decimal digits. It is used to ensure that all the digits in a script run come
+from the same set. */
+
+const uint32_t PRIV(ucd_digit_sets)[] = {
+""")
+
+f.write("  %d,  /* Number of subsequent values */" % len(digitsets))
+count = 8
+for d in digitsets:
+  if count == 8:
+    f.write("\n ")
+    count = 0
+  f.write(" 0x%05x," % d)
+  count += 1
+f.write("\n};\n\n")
+
+f.write("""\
+/* This vector is a list of script bitsets for the Script Extension property.
+The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as
+ucd_script_sets_item_size. */
+
+const uint32_t PRIV(ucd_script_sets)[] = {
+""")
+write_bitsets(script_lists, script_list_item_size)
+
+f.write("""\
+/* This vector is a list of bitsets for Boolean properties. The number of
+32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in
+pcre2_ucp.h. */
+
+const uint32_t PRIV(ucd_boolprop_sets)[] = {
+""")
+write_bitsets(bool_props_lists, bool_props_list_item_size)
+
+
+# Output the main UCD tables.
+
+f.write("""\
+/* These are the main two-stage UCD tables. The fields in each record are:
+script (8 bits), character type (8 bits), grapheme break property (8 bits),
+offset to multichar other cases or zero (8 bits), offset to other case or zero
+(32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed
+into a 16-bit field, and offset in binary properties table (16 bits). */
+\n""")
+
+write_records(records, record_size)
+write_table(min_stage1, 'PRIV(ucd_stage1)')
+write_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
+
+f.write("#if UCD_BLOCK_SIZE != %d\n" % min_block_size)
+f.write("""\
+#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
+#endif
+#endif  /* SUPPORT_UNICODE */
+
+#endif  /* PCRE2_PCRE2TEST */
+
+/* End of pcre2_ucd.c */
+""")
+
+f.close
+
+# End
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcpHeader.py
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcpHeader.py
@ -0,0 +1,98 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This script generates the pcre2_ucp.h file from Unicode data files. This
+# header uses enumerations to give names to Unicode property types and script
+# names.
+
+# This script was created in December 2021 as part of the Unicode data
+# generation refactoring.
+
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  bidi_classes, \
+  bool_properties, \
+  bool_props_list_item_size, \
+  break_properties, \
+  category_names, \
+  general_category_names, \
+  script_list_item_size, \
+  script_names, \
+  open_output
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucp.h")
+
+# Output this file's heading text
+
+f.write("""\
+#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
+#define PCRE2_UCP_H_IDEMPOTENT_GUARD
+
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
+
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
+\n""")
+
+f.write("/* These are the general character categories. */\n\nenum {\n")
+for i in general_category_names:
+  f.write("  ucp_%s,\n" % i)
+f.write("};\n\n")
+
+f.write("/* These are the particular character categories. */\n\nenum {\n")
+for i in range(0, len(category_names), 2):
+  f.write("  ucp_%s,    /* %s */\n" % (category_names[i], category_names[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are Boolean properties. */\n\nenum {\n")
+for i in bool_properties:
+  f.write("  ucp_%s,\n" % i)
+
+f.write("  /* This must be last */\n")
+f.write("  ucp_Bprop_Count\n};\n\n")
+
+f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n")
+f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size)
+
+f.write("/* These are the bidi class values. */\n\nenum {\n")
+for i in range(0, len(bidi_classes), 2):
+  sp = ' ' * (4 - len(bidi_classes[i]))
+  f.write("  ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are grapheme break properties. The Extended Pictographic "
+  "property\ncomes from the emoji-data.txt file. */\n\nenum {\n")
+for i in range(0, len(break_properties), 2):
+  sp = ' ' * (21 - len(break_properties[i]))
+  f.write("  ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are the script identifications. */\n\nenum {\n  /* Scripts which has characters in other scripts. */\n")
+for i in script_names:
+  if i == "Unknown":
+    f.write("\n  /* Scripts which has no characters in other scripts. */\n")
+  f.write("  ucp_%s,\n" % i)
+f.write("\n")
+
+f.write("  /* This must be last */\n")
+f.write("  ucp_Script_Count\n};\n\n")
+
+f.write("/* Size of entries in ucd_script_sets[] */\n\n")
+f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size)
+
+f.write("#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
+f.write("/* End of pcre2_ucp.h */\n")
+
+f.close()
+
+# End
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcpTables.py
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/GenerateUcpTables.py
@ -0,0 +1,203 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This script generates the pcre2_ucptables.c file, which contains tables for
+# recognizing Unicode property names. It is #included by pcre2_tables.c. In
+# order to reduce the number of relocations when loading the PCRE2 library, the
+# names are held as a single large string, with offsets in the table. This is
+# tedious to maintain by hand. Therefore, a script is used to generate the
+# table.
+
+# This script was created in December 2021 based on the previous GenerateUtt
+# script, whose output had to be manually edited into pcre2_tables.c. Here is
+# the history of the original script:
+
+# -----------------------------------------------------------------------------
+# Modified by PH 17-March-2009 to generate the more verbose form that works
+# for UTF-support in EBCDIC as well as ASCII environments.
+# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.
+# Modified by PH 04-May-2010 to add new "X.." special categories.
+# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
+# Modified by ChPe 30-September-2012 to add this note; no other changes were
+# necessary for Unicode 6.2.0 support.
+# Modfied by PH 26-February-2013 to add the Xuc special category.
+# Comment modified by PH 13-May-2014 to update to PCRE2 file names.
+# Script updated to Python 3 by running it through the 2to3 converter.
+# Added script names for Unicode 7.0.0, 20-June-2014.
+# Added script names for Unicode 8.0.0, 19-June-2015.
+# Added script names for Unicode 10.0.0, 02-July-2017.
+# Added script names for Unicode 11.0.0, 03-July-2018.
+# Added 'Unknown' script, 01-October-2018.
+# Added script names for Unicode 12.1.0, 27-July-2019.
+# Added script names for Unicode 13.0.0, 10-March-2020.
+# Added Script names for Unicode 14.0.0, PCRE2-10.39
+# Added support for bidi class and bidi control, 06-December-2021
+#   This also involved lower casing strings and removing underscores, in
+#   accordance with Unicode's "loose matching" rules, which Perl observes.
+# Changed default script type from PT_SC to PT_SCX, 18-December-2021
+# -----------------------------------------------------------------------------
+#
+# Note subsequent changes here:
+#
+# 27-December-2021: Added support for 4-letter script abbreviations.
+# 10-January-2022:  Further updates for Boolean property support
+# -----------------------------------------------------------------------------
+
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  abbreviations, \
+  bool_properties, \
+  bidi_classes, \
+  category_names, \
+  general_category_names, \
+  script_names, \
+  open_output
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucptables.c")
+
+# The list in bidi_classes contains just the Unicode classes such as AN, LRE,
+# etc., along with comments. We need to add "bidi" in front of each value, in
+# order to create names that don't clash with other types of property.
+
+bidi_class_names = []
+for i in range(0, len(bidi_classes), 2):
+  bidi_class_names.append("bidi" + bidi_classes[i])
+
+# Remove the comments from other lists that contain them.
+
+category_names = category_names[::2]
+
+# Create standardized versions of the names by lowercasing and removing
+# underscores.
+
+def stdname(x):
+  return x.lower().replace('_', '')
+
+def stdnames(x):
+  y = [''] * len(x)
+  for i in range(len(x)):
+    y[i] = stdname(x[i])
+  return y
+
+std_category_names = stdnames(category_names)
+std_general_category_names = stdnames(general_category_names)
+std_bidi_class_names = stdnames(bidi_class_names)
+std_bool_properties = stdnames(bool_properties)
+
+# Create the table, starting with the Unicode script, category and bidi class
+# names. We keep both the standardized name and the original, because the
+# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we
+# still use the full original names.
+
+utt_table = []
+
+scx_end = script_names.index('Unknown')
+
+for idx, name in enumerate(script_names):
+  pt_type = 'PT_SCX' if idx < scx_end else 'PT_SC'
+  utt_table.append((stdname(name), name, pt_type))
+  for abbrev in abbreviations[name]:
+    utt_table.append((stdname(abbrev), name, pt_type))
+
+# Add the remaining property lists
+
+utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names)))
+utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names)))
+utt_table += list(zip(std_bidi_class_names, bidi_class_names, ['PT_BIDICL'] * len(bidi_class_names)))
+
+for name in bool_properties:
+  utt_table.append((stdname(name), name, 'PT_BOOL'))
+  if name in abbreviations: 
+    for abbrev in abbreviations[name]:
+      utt_table.append((stdname(abbrev), name, 'PT_BOOL'))
+
+# Now add specials and synonyms. Note both the standardized and capitalized
+# forms are needed.
+
+utt_table.append(('any', 'Any', 'PT_ANY'))
+utt_table.append(('l&',  'L&',  'PT_LAMP'))
+utt_table.append(('lc',  'LC',  'PT_LAMP'))
+utt_table.append(('xan', 'Xan', 'PT_ALNUM'))
+utt_table.append(('xps', 'Xps', 'PT_PXSPACE'))
+utt_table.append(('xsp', 'Xsp', 'PT_SPACE'))
+utt_table.append(('xuc', 'Xuc', 'PT_UCNC'))
+utt_table.append(('xwd', 'Xwd', 'PT_WORD'))
+
+# Remove duplicates from the table and then sort it.
+
+utt_table = list(set(utt_table)) 
+utt_table.sort()
+
+# Output file-specific heading
+
+f.write("""\
+#ifdef SUPPORT_UNICODE
+
+/* The PRIV(utt)[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and using offsets instead.
+All letters are lower cased, and underscores are removed, in accordance with
+the "loose matching" rules that Unicode advises and Perl uses. */
+\n""")
+
+# We have to use STR_ macros to define the strings so that it all works in
+# UTF-8 mode on EBCDIC platforms.
+
+for utt in utt_table:
+  f.write('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')))
+  for c in utt[0]:
+    if c == '&':
+      f.write(' STR_AMPERSAND')
+    else:
+      f.write(' STR_%s' % c);
+  f.write(' "\\0"\n')
+
+# Output the long string of concatenated names
+
+f.write('\nconst char PRIV(utt_names)[] =\n');
+last = ''
+for utt in utt_table:
+  if utt == utt_table[-1]:
+    last = ';'
+  f.write('  STRING_%s0%s\n' % (utt[0].replace('&', '_AMPERSAND'), last))
+
+# Output the property type table
+
+f.write('\nconst ucp_type_table PRIV(utt)[] = {\n')
+offset = 0
+last = ','
+for utt in utt_table:
+  if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
+      'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
+    value = '0'
+  else:
+    value = 'ucp_' + utt[1]
+  if utt == utt_table[-1]:
+    last = ''
+  f.write('  { %3d, %s, %s }%s\n' % (offset, utt[2], value, last))
+  offset += len(utt[0]) + 1
+f.write('};\n\n')
+
+# Ending text
+
+f.write("""\
+const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+
+#endif /* SUPPORT_UNICODE */
+
+/* End of pcre2_ucptables.c */
+""")
+
+f.close
+
+# End
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ManyConfigTests
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ManyConfigTests
@ -0,0 +1,453 @@
+#! /bin/sh
+
+# This is a script for the use of PCRE2 maintainers. It configures and rebuilds
+# PCRE2 with a variety of configuration options, and in each case runs the
+# tests to ensure that all goes well. Every possible combination would take far
+# too long, so we use a representative sample. This script should be run in the
+# PCRE2 source directory.
+
+# While debugging, it is sometimes useful to be able to cut out some of the
+# tests, in order to run those that are giving errors. The following options
+# do this:
+#
+# -noasan         skip the test that uses -fsanitize=address
+# -nousan         skip the test that uses -fsanitize=undefined
+# -nodebug        skip the test that uses --enable-debug
+# -nojit          skip all JIT tests
+# -nojitmain      skip non-valgrind JIT tests
+# -nojitvalgrind  skip JIT tests with valgrind
+# -nomain         skip all the main (non-JIT) set of tests
+# -nomainvalgrind skip the main (non-JIT) valgrind tests
+# -notmp          skip the test in a temporary directory
+# -novalgrind     skip all the valgrind tests
+
+# Alternatively, if any of those names are given with '+' instead of '-no',
+# only those groups named with '+' are run (e.g. +jit). If -dummy is given,
+# no tests are actually run - this provides a means of testing the selectors.
+
+# The -v option causes a call to 'pcre2test -C' to happen for each
+# configuration.
+
+useasan=1
+useusan=1
+usedebug=1
+usejit=1
+usejitvalgrind=1
+usemain=1
+usemainvalgrind=1
+usetmp=1
+usevalgrind=1
+
+dummy=0
+seenplus=0
+verbose=0
+
+while [ $# -gt 0 ] ; do
+  case $1 in
+    +*) if [ $seenplus -eq 0 ]; then
+          useasan=0
+          useusan=0
+          usedebug=0
+          usejit=0
+          usejitvalgrind=0
+          usemain=0
+          usemainvalgrind=0
+          usetmp=0
+          seenplus=1
+        fi;;
+  esac
+
+  case $1 in
+    -dummy)          dummy=1;;
+    -v)              verbose=1;;
+    -noasan)         useasan=0;;
+    -nousan)         useusan=0;;
+    -nodebug)        usedebug=0;;
+    -nojit)          usejit=0; usejitvalgrind=0;;
+    -nojitmain)      usejit=0;;
+    -nojitvalgrind)  usejitvalgrind=0;;
+    -nomain)         usemain=0; usemainvalgrind=0;;
+    -nomainvalgrind) usemainvalgrind=0;;
+    -notmp)          usetmp=0;;
+    -novalgrind)     usevalgrind=0;;
+    +asan)           useasan=1;;
+    +usan)           useusan=1;;
+    +debug)          usedebug=1;;
+    +jit)            usejit=1; usejitvalgrind=1;;
+    +jitmain)        usejit=1;;
+    +jitvalgrind)    usejitvalgrind=1;;
+    +main)           usemain=1; usemainvalgrind=1;;
+    +mainvalgrind)   usemainvalgrind=1;;
+    +tmp)            usetmp=1;;
+    +valgrind)       usevalgrind=1; usejitvalgrind=1; usemainvalgrind=1;;
+    *)               echo "Unknown option '$1'"; exit 1;;
+  esac
+  shift
+done
+
+if [ $usejitvalgrind -eq 0 -a $usemainvalgrind -eq 0 ] ; then
+  usevalgrind=0
+fi
+
+# This is in case the caller has set aliases (as I do - PH)
+
+unset cp ls mv rm
+
+# This is a temporary directory for testing out-of-line builds
+
+tmp=/tmp/pcre2testing
+
+# Don't bother with compiler optimization for most tests; it just slows down
+# compilation a lot (and running the tests themselves is quick). However, one
+# special test turns optimization on, because it can provoke some compiler
+# warnings.
+
+CFLAGS="-g"
+OFLAGS="-O0"
+ISGCC=0
+
+# If the compiler is gcc, add a lot of warning switches.
+
+cc --version >/tmp/pcre2ccversion 2>/dev/null
+if [ $? -eq 0 ] && grep GCC /tmp/pcre2ccversion >/dev/null; then
+  ISGCC=1
+  CFLAGS="$CFLAGS -Wall"
+  CFLAGS="$CFLAGS -Wno-overlength-strings"
+  CFLAGS="$CFLAGS -Wpointer-arith"
+  CFLAGS="$CFLAGS -Wwrite-strings"
+  CFLAGS="$CFLAGS -Wundef -Wshadow"
+  CFLAGS="$CFLAGS -Wmissing-field-initializers"
+  CFLAGS="$CFLAGS -Wunused-parameter"
+  CFLAGS="$CFLAGS -Wextra -Wformat"
+  CFLAGS="$CFLAGS -Wbad-function-cast"
+  CFLAGS="$CFLAGS -Wmissing-declarations"
+  CFLAGS="$CFLAGS -Wnested-externs"
+  CFLAGS="$CFLAGS -pedantic"
+  CFLAGS="$CFLAGS -Wuninitialized"
+  CFLAGS="$CFLAGS -Wmaybe-uninitialized" 
+  CFLAGS="$CFLAGS -Wmissing-prototypes"
+  CFLAGS="$CFLAGS -Wstrict-prototypes"
+fi
+rm -f /tmp/pcre2ccversion
+
+# This function runs a single test with the set of configuration options that
+# are in $opts. The source directory must be set in srcdir. The function must
+# be defined as "runtest()" not "function runtest()" in order to run on
+# Solaris.
+
+runtest()
+  {
+  rm -f $srcdir/pcre2test $srcdir/pcre2grep $srcdir/pcre2_jit_test
+  testcount=`expr $testcount + 1`
+
+  if [ "$opts" = "" ] ; then
+    echo "[$testcount/$testtotal] Configuring with: default settings"
+  else
+    echo "[$testcount/$testtotal] Configuring with:"
+    echo "  $opts"
+  fi
+
+  if [ $dummy -eq 1 ]; then return; fi
+
+  CFLAGS="$CFLAGS" \
+    $srcdir/configure $opts >/dev/null 2>teststderrM
+  if [ $? -ne 0 ]; then
+    echo " "
+    echo "******** Error while configuring ********"
+    cat teststderrM
+    exit 1
+  fi
+
+# There is an infelicity in the Autotools world (as of October 2015) which
+# causes the message
+#
+# ar: `u' modifier ignored since `D' is the default (see `U')
+#
+# to be output while linking. This triggers an unwanted error report from this
+# script, because it expects no stderr output while making. To get round this
+# we filter the stderr output through sed, removing all occurrences of the
+# above lines. Just for paranoia, check that sed is available before doing
+# this.
+
+  echo "Making"
+  make -j >/dev/null 2>teststderrM
+  makeRC=$?
+  if command -v sed >/dev/null 2>&1 ; then
+    sed "/\`u' modifier ignored since \`D' is the default/ d" \
+      teststderrM > teststderrMM
+    mv -f teststderrMM teststderrM
+  fi
+  if [ $makeRC -ne 0 -o -s teststderrM ]; then
+    echo " "
+    echo "******** Errors or warnings while making ********"
+    echo " "
+    cat teststderrM
+    exit 1
+  fi
+
+  if [ $verbose -eq 1 ]; then
+    ./pcre2test -C
+  fi
+
+  ./pcre2test -C jit >/dev/null
+  jit=$?
+  ./pcre2test -C pcre2-8 >/dev/null
+  pcre2_8=$?
+
+  echo "Running PCRE2 library tests $withvalgrind"
+  $srcdir/RunTest $valgrind >teststdoutM 2>teststderrM
+
+  if [ $? -ne 0 -o -s teststderrM ]; then
+    echo " "
+    echo "**** Test failed ****"
+    if [ -s teststderrM ] ; then
+      cat teststderrM
+    else
+      cat teststdoutM
+    fi
+    exit 1
+  fi
+
+  if [ $pcre2_8 -gt 0 ]; then
+    echo "Running pcre2grep tests $withvalgrind"
+    $srcdir/RunGrepTest $valgrind >teststdoutM 2>teststderrM
+    if [ $? -ne 0 -o -s teststderrM ]; then
+      echo " "
+      echo "**** Test failed ****"
+      cat teststderrM
+      cat teststdoutM
+      exit 1
+    fi
+  else
+    echo "Skipping pcre2grep tests: 8-bit library not compiled"
+  fi
+
+  if [ "$jit" -gt 0 ]; then
+    echo "Running JIT regression tests $withvalgrind"
+    $jrvalgrind $srcdir/pcre2_jit_test >teststdoutM 2>teststderrM
+    if [ $? -ne 0 -o -s teststderrM ]; then
+      echo " "
+      echo "**** Test failed ****"
+      cat teststderrM
+      cat teststdoutM
+      exit 1
+    fi
+  else
+    echo "Skipping JIT regression tests: JIT is not enabled"
+  fi
+  }
+
+# Update the total count whenever a new test is added; it is used to show
+# progess as each test is run.
+
+testtotal=`expr 17 \* $usemain + \
+  1 \* $usemain \* $usedebug + \
+  1 \* $usetmp + \
+  1 \* $ISGCC \* $usemain + \
+  1 \* $ISGCC \* $usemain \* $useasan + \
+  1 \* $ISGCC \* $usemain \* $useusan + \
+  13 \* $usejit + \
+  2 \* $usemainvalgrind + \
+  2 \* $usejitvalgrind`
+
+testcount=0
+
+if [ $testtotal -eq 0 ] ; then
+  echo "** No tests selected"
+  exit 1
+fi
+
+valgrind=
+jrvalgrind=
+withvalgrind=
+srcdir=.
+export srcdir
+
+if [ $usejit -ne 0 ]; then
+  enable_jit=--enable-jit
+else
+  enable_jit=
+fi
+
+# If gcc is in use, run a maximally configured test with -O2, because that can
+# throw up warnings that are not detected with -O0. Then run a second test with
+# -fsanitize=address, which also may throw up new warnings as well as checking
+# things at runtime. Finally, run another test using -fsanitize=undefined
+# -std-gnu99 to check for runtime actions that are not well defined. However,
+# we also use -fno-sanitize=shift to avoid warnings for shifts of negative
+# numbers, which occur in src/pcre2_jit_compile.c.
+
+if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
+  echo "---------- Maximally configured test with -O2 ----------"
+  SAVECFLAGS="$CFLAGS"
+  CFLAGS="-O2 $CFLAGS"
+  echo "CFLAGS=$CFLAGS"
+  opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32"
+  runtest
+  if [ $useasan -ne 0 ]; then
+    echo "---------- Maximally configured test with -fsanitize=address ----------"
+# Following a kernel change, sanitize address doesn't work unless the extra
+# PIE options are also set.
+    CFLAGS="$OFLAGS $SAVECFLAGS -no-pie -fno-PIE -fsanitize=address"
+    echo "CFLAGS=$CFLAGS"
+    opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32"
+    runtest
+  fi
+# This also seems to be the case for sanitize undefined.   
+  if [ $useusan -ne 0 ]; then
+    echo "------- Maximally configured test with -fsanitize=undefined -fno-sanitize=shift -fno-sanitize=alignment -std=gnu99 -------"
+    CFLAGS="$OFLAGS $SAVECFLAGS -no-pie -fno-PIE -fsanitize=undefined -fno-sanitize=shift -fno-sanitize=alignment -std=gnu99"
+    echo "CFLAGS=$CFLAGS"
+    opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32"
+    runtest
+  fi
+  CFLAGS="$OFLAGS $SAVECFLAGS"
+fi
+
+# This set of tests builds PCRE2 and runs the tests with a variety of configure
+# options, in the current (source) directory. The empty configuration builds
+# with all the default settings. As well as testing that these options work, we
+# use --disable-shared or --disable-static except for the default test (which
+# builds both) to save a bit of time by building only one version of the
+# library for the subsequent tests.
+
+echo "---------- CFLAGS for the remaining tests ----------"
+echo "CFLAGS=$CFLAGS"
+
+if [ $usemain -ne 0 ]; then
+  if [ $usedebug -ne 0 ]; then
+    echo "---------- Maximally configured test with --enable-debug ----------"
+    opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug"
+    runtest
+  fi
+
+  echo "---------- Non-JIT tests in the current directory ----------"
+  for opts in \
+    "" \
+    "--disable-static" \
+    "--disable-shared" \
+    "--disable-unicode --disable-shared --enable-never-backslash-C" \
+    "--with-link-size=3 --disable-shared --disable-pcre2grep-callout" \
+    "--disable-unicode --enable-rebuild-chartables --disable-shared" \
+    "--disable-unicode --enable-newline-is-any --disable-shared" \
+    "--disable-unicode --enable-newline-is-cr --disable-shared" \
+    "--disable-unicode --enable-newline-is-crlf --disable-shared" \
+    "--disable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
+    "--enable-newline-is-any --disable-static" \
+    "--disable-unicode --enable-pcre2-16" \
+    "--enable-pcre2-16 --disable-shared" \
+    "--disable-unicode --enable-pcre2-32" \
+    "--enable-pcre2-32 --disable-shared" \
+    "--disable-unicode --enable-pcre2-32 --enable-pcre2-16 --disable-shared" \
+    "--disable-unicode --enable-pcre2-32 --enable-pcre2-16 --disable-pcre2-8 --disable-shared"
+  do
+    runtest
+  done
+fi
+
+# Now run the JIT tests unless disabled
+
+if [ $usejit -ne 0 ]; then
+  echo "---------- JIT tests in the current directory ----------"
+  for opts in \
+    "--disable-unicode --enable-jit --disable-shared" \
+    "--enable-jit --disable-shared" \
+    "--enable-jit --with-link-size=3 --disable-shared" \
+    "--enable-jit --enable-pcre2-16 --disable-shared" \
+    "--disable-unicode --enable-jit --enable-pcre2-16 --disable-pcre2-8 --disable-shared" \
+    "--enable-jit --enable-pcre2-16 --disable-pcre2-8 --disable-shared" \
+    "--enable-jit --enable-pcre2-16 --with-link-size=3 --disable-shared" \
+    "--enable-jit --enable-pcre2-16 --with-link-size=4 --disable-shared" \
+    "--enable-jit --enable-pcre2-32 --disable-shared" \
+    "--disable-unicode --enable-jit --enable-pcre2-32 --disable-pcre2-8 --disable-shared" \
+    "--enable-jit --enable-pcre2-32 --disable-pcre2-8 --disable-shared" \
+    "--enable-jit --enable-pcre2-32 --with-link-size=4 --disable-shared" \
+    "--enable-jit --enable-pcre2-32 --enable-pcre2-16 --disable-pcre2-8 --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
+  do
+    runtest
+  done
+fi
+
+# Now re-run some of the tests under valgrind.
+
+if [ $usevalgrind -ne 0 ]; then
+  echo "---------- Tests in the current directory using valgrind ----------"
+  valgrind=valgrind
+  withvalgrind="with valgrind"
+
+  if [ $usemainvalgrind -ne 0 ]; then
+    for opts in \
+      "--disable-shared" \
+      "--with-link-size=3 --enable-pcre2-16 --enable-pcre2-32 --disable-shared"
+    do
+      opts="--enable-valgrind $opts"
+      runtest
+    done
+  fi
+
+  if [ $usejitvalgrind -ne 0 ]; then
+    jrvalgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --suppressions=$srcdir/testdata/valgrind-jit.supp" 
+    for opts in \
+      "--enable-jit --disable-shared" \
+      "--enable-jit --enable-pcre2-16 --enable-pcre2-32"
+    do
+      opts="--enable-valgrind $opts"
+      runtest
+    done
+  fi
+fi
+
+valgrind=
+jrvalgrind=
+withvalgrind=
+
+# Clean up the distribution and then do at least one build and test in a
+# directory other than the source directory. It doesn't work unless the
+# source directory is cleaned up first.
+
+if [ -f Makefile ]; then
+  echo "Running 'make distclean'"
+  make distclean >/dev/null 2>&1
+  if [ $? -ne 0 ]; then
+    echo "** 'make distclean' failed"
+    exit 1
+  fi
+fi
+
+echo "---------- End of tests in the source directory ----------"
+echo "Removing teststdoutM and teststderrM"
+rm -rf teststdoutM teststderrM
+
+if [ $usetmp -ne 0 ]; then
+  echo "---------- Tests in the $tmp directory ----------"
+  srcdir=`pwd`
+  export srcdir
+
+  if [ ! -e $tmp ]; then
+    mkdir $tmp
+  fi
+
+  if [ ! -d $tmp ]; then
+    echo "** Failed to create $tmp or it is not a directory"
+    exit 1
+  fi
+
+  cd $tmp
+  if [ $? -ne 0 ]; then
+    echo "** Failed to cd to $tmp"
+    exit 1
+  fi
+
+  for opts in \
+    "--disable-shared"
+  do
+    runtest
+  done
+
+  echo "Removing $tmp"
+  rm -rf $tmp
+fi
+
+echo "---------- All done ----------"
+
+# End
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/README
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/README
@ -0,0 +1,460 @@
+MAINTENANCE README FOR PCRE2
+============================
+
+The files in the "maint" directory of the PCRE2 source contain data, scripts,
+and programs that are used for the maintenance of PCRE2, but which do not form
+part of the PCRE2 distribution tarballs. This document describes these files
+and also contains some notes for maintainers. Its contents are:
+
+  Files in the maint directory
+  Updating to a new Unicode release
+  Preparing for a PCRE2 release
+  Making a PCRE2 release
+  Long-term ideas (wish list)
+
+
+Files in the maint directory
+============================
+
+GenerateCommon.py
+  A Python module containing data and functions that are used by the other
+  Generate scripts.
+  
+GenerateTest26.py
+  A Python script that generates input and expected output test data for test
+  26, which tests certain aspects of Unicode property support.  
+
+GenerateUcd.py
+  A Python script that generates the file pcre2_ucd.c from GenerateCommon.py
+  and Unicode data files, which are themselves downloaded from the Unicode web
+  site. The generated file contains the tables for a 2-stage lookup of Unicode
+  properties, along with some auxiliary tables. The script starts with a long
+  comment that gives details of the tables it constructs. 
+
+GenerateUcpHeader.py
+  A Python script that generates the file pcre2_ucp.h from GenerateCommon.py
+  and Unicode data files. The generated file defines constants for various
+  Unicode property values.
+
+GenerateUcpTables.py
+  A Python script that generates the file pcre2_ucptables.c from
+  GenerateCommon.py and Unicode data files. The generated file contains tables
+  for looking up Unicode property names.
+
+ManyConfigTests
+  A shell script that runs "configure, make, test" a number of times with
+  different configuration settings.
+
+pcre2_chartables.c.non-standard
+  This is a set of character tables that came from a Windows system. It has
+  characters greater than 128 that are set as spaces, amongst other things. I
+  kept it so that it can be used for testing from time to time.
+
+README
+  This file.
+
+Unicode.tables
+  The files in this directory were downloaded from the Unicode web site. They
+  contain information about Unicode characters and scripts, and are used by the
+  Generate scripts. There is also UnicodeData.txt, which is no longer used by
+  any script, because it is useful occasionally for manually looking up the
+  details of certain characters. However, note that character names in this
+  file such as "Arabic sign sanah" do NOT mean that the character is in a
+  particular script (in this case, Arabic). Scripts.txt and
+  ScriptExtensions.txt are where to look for script information.
+
+ucptest.c
+  A program for testing the Unicode property macros that do lookups in the
+  pcre2_ucd.c data, mainly useful after rebuilding the Unicode property tables.
+  Compile and run this in the "maint" directory (see comments at its head).
+  This program can also be used to find characters with specific properties and 
+  to list which properties are supported. 
+
+ucptestdata
+  A directory containing four files, testinput{1,2} and testoutput{1,2}, for
+  use in conjunction with the ucptest program.
+
+utf8.c
+  A short, freestanding C program for converting a Unicode code point into a
+  sequence of bytes in the UTF-8 encoding, and vice versa. If its argument is a
+  hex number such as 0x1234, it outputs a list of the equivalent UTF-8 bytes.
+  If its argument is a sequence of concatenated UTF-8 bytes (e.g. 12e188b4) it
+  treats them as a UTF-8 string and outputs the equivalent code points in hex.
+  See comments at its head for details.
+
+
+Updating to a new Unicode release
+=================================
+
+When there is a new release of Unicode, the files in Unicode.tables must be
+refreshed from the web site. Once that is done, the four Python scripts that 
+generate files from the Unicode data can be run from within the "maint" 
+directory.
+
+Note: Previously, it was necessary to update lists of scripts and their 
+abbreviations by hand before running the Python scripts. This is no longer
+necessary because the scripts have been upgraded to extract this information
+themselves. Also, there used to be explicit lists of scripts in two of the man
+pages. This is no longer the case; the pcre2test program can now output a list 
+of supported scripts.
+
+You can give an output file name as an argument to the following scripts, but
+by default:
+
+GenerateUcd.py        creates pcre2_ucd.c        )
+GenerateUcpHeader.py  creates pcre2_ucp.h        ) in the current directory
+GenerateUcpTables.py  creates pcre2_ucptables.c  )
+
+These files can be compared against the existing versions in the src directory
+to check on any changes before replacing the old files, but you can also
+generate directly into the final location by running:
+
+./GenerateUcd.py       ../src/pcre2_ucd.c
+./GenerateUcpHeader.py ../src/pcre2_ucp.h
+./GenerateUcpTables.py ../src/pcre2_ucptables.c
+
+Once the .c and .h files are in the ../src directory, the ucptest program can
+be compiled and used to check that the new tables work properly. The data files
+in ucptestdata are set up to check a number of test characters. See the
+comments at the start of ucptest.c. If there are new scripts, adding a few
+tests to the files in ucptestdata is a good idea.
+
+Finally, you should run the GenerateTest26.py script to regenerate new versions 
+of the input and expected output from a series of Unicode property tests that 
+are automatically generated from the Unicode data files. By default, the files
+are written to testinput26 and testoutput26 in the current directory, but you
+can give an alternative directory name as an argument to the script. These
+files should eventually be installed in the main testdata directory.
+
+
+Preparing for a PCRE2 release
+=============================
+
+This section contains a checklist of things that I do before building a new
+release.
+
+. Ensure that the version number and version date are correct in configure.ac.
+
+. Update the library version numbers in configure.ac according to the rules
+  given below.
+
+. If new build options or new source files have been added, ensure that they
+  are added to the CMake files as well as to the autoconf files. The relevant
+  files are CMakeLists.txt and config-cmake.h.in. After making a release, test
+  it out with CMake if there have been changes here.
+
+. Run ./autogen.sh to ensure everything is up-to-date.
+
+. Compile and test with many different config options, and combinations of
+  options. Also, test with valgrind by running "RunTest valgrind" and
+  "RunGrepTest valgrind". The script maint/ManyConfigTests now encapsulates
+  this testing. It runs tests with different configurations, and it also runs
+  some of them with valgrind, all of which can take quite some time.
+
+. Run tests in both 32-bit and 64-bit environments if possible. I can no longer
+  run 32-bit tests.
+
+. Run tests with two or more different compilers (e.g. clang and gcc), and
+  make use of -fsanitize=address and friends where possible. For gcc,
+  -fsanitize=undefined -std=gnu99 picks up undefined behaviour at runtime, but
+  needs -fno-sanitize=shift to get rid of warnings for shifts of negative
+  numbers in the JIT compiler. For clang, -fsanitize=address,undefined,integer
+  can be used but -fno-sanitize=alignment,shift,unsigned-integer-overflow must
+  be added when compiling with JIT. Another useful clang option is
+  -fsanitize=signed-integer-overflow
+
+. Do a test build using CMake. Remove src/config.h first, lest it override the
+  version that CMake creates. Also do a CMake unity build to check that it 
+  still works: [c]cmake -DCMAKE_UNITY_BUILD=ON sets up a unity build.
+
+. Run perltest.sh on the test data for tests 1 and 4. The output should match
+  the PCRE2 test output, apart from the version identification at the start of
+  each test. Sometimes there are other differences in test 4 if PCRE2 and Perl
+  are using different Unicode releases. The other tests are not Perl-compatible
+  (they use various PCRE2-specific features or options).
+
+. It is possible to test with the emulated memmove() function by undefining
+  HAVE_MEMMOVE and HAVE_BCOPY in config.h, though I do not do this often.
+
+. Documentation: check AUTHORS, ChangeLog (check version and date), LICENCE,
+  NEWS (check version and date), NON-AUTOTOOLS-BUILD, and README. Many of these
+  won't need changing, but over the long term things do change.
+
+. I used to test new releases myself on a number of different operating
+  systems. For example, on Solaris it is helpful to test using Sun's cc
+  compiler as a change from gcc. Adding -xarch=v9 to the cc options does a
+  64-bit test, but it also needs -S 64 for pcre2test to increase the stack size
+  for test 2. Since I retired I can no longer do much of this. There are 
+  automated tests under Ubuntu, Alpine, and Windows that are now set up as 
+  GitHub actions. Check that they are running clean.
+
+. The buildbots at http://buildfarm.opencsw.org/ do some automated testing
+  of PCRE2 and should also be checked before putting out a release.
+
+
+Updating version info for libtool
+=================================
+
+This set of rules for updating library version information came from a web page
+whose URL I have forgotten. The version information consists of three parts:
+(current, revision, age).
+
+1. Start with version information of 0:0:0 for each libtool library.
+
+2. Update the version information only immediately before a public release of
+   your software. More frequent updates are unnecessary, and only guarantee
+   that the current interface number gets larger faster.
+
+3. If the library source code has changed at all since the last update, then
+   increment revision; c:r:a becomes c:r+1:a.
+
+4. If any interfaces have been added, removed, or changed since the last
+   update, increment current, and set revision to 0.
+
+5. If any interfaces have been added since the last public release, then
+   increment age.
+
+6. If any interfaces have been removed or changed since the last public
+   release, then set age to 0.
+
+The following explanation may help in understanding the above rules a bit
+better. Consider that there are three possible kinds of reaction from users to
+changes in a shared library:
+
+1. Programs using the previous version may use the new version as a drop-in
+   replacement, and programs using the new version can also work with the
+   previous one. In other words, no recompiling nor relinking is needed. In
+   this case, increment revision only, don't touch current or age.
+
+2. Programs using the previous version may use the new version as a drop-in
+   replacement, but programs using the new version may use APIs not present in
+   the previous one. In other words, a program linking against the new version
+   may fail if linked against the old version at run time. In this case, set
+   revision to 0, increment current and age.
+
+3. Programs may need to be changed, recompiled, relinked in order to use the
+   new version. Increment current, set revision and age to 0.
+
+
+Making a PCRE2 release
+======================
+
+Run PrepareRelease and commit the files that it changes. The first thing this
+script does is to run CheckMan on the man pages; if it finds any markup errors,
+it reports them and then aborts. Otherwise it removes trailing spaces from
+sources and refreshes the HTML documentation. Update the GitHub repository with
+"git push".
+
+Once PrepareRelease has run clean, run "make distcheck" to create the tarballs
+and the zipball. I then sign these files. Double-check with "git status" that
+the repository is fully up-to-date, then create a new tag and a release on
+GitHub. Upload the tarballs, zipball, and the signatures as "assets" of the
+GitHub release.
+
+When the new release is out, don't forget to tell webmaster@pcre.org and the
+mailing list.
+
+
+Future ideas (wish list)
+========================
+
+This section records a list of ideas so that they do not get forgotten. They
+vary enormously in their usefulness and potential for implementation. Some are
+very sensible; some are rather wacky. Some have been on this list for many
+years.
+
+. Optimization
+
+  There are always ideas for new optimizations so as to speed up pattern
+  matching. Most of them try to save work by recognizing a non-match without
+  having to scan all the possibilities. These are some that I've recorded:
+
+  * /((A{0,5}){0,5}){0,5}(something complex)/ on a non-matching string is very
+    slow, though Perl is fast. Can we speed up somehow? Convert to {0,125}?
+    OTOH, this is pathological - the user could easily fix it.
+
+  * Turn ={4} into ==== ? (for speed). I once did an experiment, and it seems
+    to have little effect, and maybe makes things worse.
+
+  * "Ends with literal string" - note that a single character doesn't gain much
+    over the existing "required code unit" feature that just remembers one code
+    unit.
+
+  * Remember an initial string rather than just 1 code unit.
+
+  * A required code unit from alternatives - not just the last unit, but an
+    earlier one if common to all alternatives.
+
+  * Friedl contains other ideas.
+
+  * The code does not set initial code unit flags for Unicode property types
+    such as \p; I don't know how much benefit there would be for, for example,
+    setting the bits for 0-9 and all values >= xC0 (in 8-bit mode) when a
+    pattern starts with \p{N}.
+
+. If Perl gets to a consistent state over the settings of capturing sub-
+  patterns inside repeats, see if we can match it. One example of the
+  difference is the matching of /(main(O)?)+/ against mainOmain, where PCRE2
+  leaves $2 set. In Perl, it's unset. Changing this in PCRE2 will be very hard
+  because I think it needs much more state to be remembered.
+
+. A feature to suspend a match via a callout was once requested.
+
+. An option to convert results into character offsets and character lengths.
+
+. A (non-Unix) user wanted pcregrep options to (a) list a file name just once,
+  preceded by a blank line, instead of adding it to every matched line, and (b)
+  support --outputfile=name.
+
+. Define a union for the results from pcre2_pattern_info().
+
+. Provide a "random access to the subject" facility so that the way in which it
+  is stored is independent of PCRE2. For efficiency, it probably isn't possible
+  to switch this dynamically. It would have to be specified when PCRE2 was
+  compiled. PCRE2 would then call a function every time it wanted a character.
+
+. pcre2grep: add -rs for a sorted recurse. Having to store file names and sort
+  them will of course slow it down.
+
+. Someone suggested --disable-callout to save code space when callouts are
+  never wanted. This seems rather marginal.
+
+. A user suggested a parameter to limit the length of string matched, for
+  example if the parameter is N, the current match should fail if the matched
+  substring exceeds N. This could apply to both match functions. The value
+  could be a new field in the match context. Compare the offset_limit feature,
+  which limits where a match must start.
+
+. Write a function that generates random matching strings for a compiled
+  pattern.
+
+. Pcre2grep: an option to specify the output line separator, either as a string
+  or select from a fixed list. This is not straightforward, because at the
+  moment it outputs whatever is in the input file.
+
+. Improve the code for duplicate checking in pcre2_dfa_match(). An incomplete,
+  non-thread-safe patch showed that this can help performance for patterns
+  where there are many alternatives. However, a simple thread-safe
+  implementation that I tried made things worse in many simple cases, so this
+  is not an obviously good thing.
+
+. PCRE2 cannot at present distinguish between subpatterns with different names,
+  but the same number (created by the use of ?|). In order to do so, a way of
+  remembering *which* subpattern numbered n matched is needed. (*MARK) can
+  perhaps be used as a way round this problem. However, note that Perl does not
+  distinguish: like PCRE2, a name is just an alias for a number in Perl.
+
+. Instead of having #ifdef HAVE_CONFIG_H in each module, put #include
+  "something" and the the #ifdef appears only in one place, in "something".
+
+. Implement something like (?(R2+)... to check outer recursions.
+
+. If Perl ever supports the POSIX notation [[.something.]] PCRE2 should try
+  to follow.
+
+. A user wanted a way of ignoring all Unicode "mark" characters so that, for
+  example "a" followed by an accent would, together, match "a". This can only
+  be done clumsily at present by using a lookahead such as /(?=a)\X/, which
+  works for "combining" characters.
+
+. Perl supports [\N{x}-\N{y}] as a Unicode range, even in EBCDIC. PCRE2
+  supports \N{U+dd..} everywhere, but not in EBCDIC.
+
+. Unicode stuff from Perl:
+
+    \b{gcb} or \b{g}    grapheme cluster boundary
+    \b{sb}              sentence boundary
+    \b{wb}              word boundary
+
+  See Unicode TR 29. The last two are very much aimed at natural language.
+
+. Allow a callout to specify a number of characters to skip. This can be done
+  compatibly via an extra callout field.
+
+. Allow callouts to return *PRUNE, *COMMIT, *THEN, *SKIP, with and without
+  continuing (that is, with and without an implied *FAIL). A new option,
+  PCRE2_CALLOUT_EXTENDED say, would be needed. This is unlikely ever to be
+  implemented by JIT, so this could be an option for pcre2_match().
+
+. A limit on substitutions: a user suggested somehow finding a way of making
+  match_limit apply to the whole operation instead of each match separately.
+
+. Some #defines could be replaced with enums to improve robustness.
+
+. There was a request for an option for pcre2_match() to return the longest
+  match. This would mean searching for all possible matches, of course.
+
+. Perl's /a modifier sets Unicode, but restricts \d etc to ASCII characters,
+  which is the PCRE2 default for PCRE2_UTF (use PCRE2_UCP to change). However,
+  Perl also has /aa, which in addition, disables ASCII/non-ASCII caseless
+  matching. Perhaps we need a new option PCRE2_CASELESS_RESTRICT_ASCII. In
+  practice, this just means not using the ucd_caseless_sets[] table.
+
+. There is more that could be done to the oss-fuzz setup (needs some research).
+  A seed corpus could be built. I noted something about $LIB_FUZZING_ENGINE.
+  The test function could make use of get_substrings() to cover more code.
+
+. A neater way of handling recursion file names in pcre2grep, e.g. a single
+  buffer that can grow. See also GitHub issue #2 (recursion looping via
+  symlinks).
+
+. A user suggested that before/after parameters in pcre2grep could have
+  negative values, to list lines near to the matched line, but not necessarily
+  the line itself. For example, --before-context=-1 would list the line *after*
+  each matched line, without showing the matched line. The problem here is what
+  to do with matches that are close together. Maybe a simpler way would be a
+  flag to disable showing matched lines, only valid with either -A or -B?
+
+. There was a suggestiong for a pcre2grep colour default, or possibly a more
+  general PCRE2GREP_OPT, but only for some options - not file names or patterns.
+
+. Breaking loops that match an empty string: perhaps find a way of continuing
+  if *something* has changed, but this might mean remembering additional data.
+  "Something" could be a capture value, but then a list of previous values
+  would be needed to avoid a cycle of changes.
+
+. If a function could be written to find 3-character (or other length) fixed
+  strings, at least one of which must be present for a match, efficient
+  pre-searching of large datasets could be implemented.
+
+. If pcre2grep had --first-line (match only in the first line) it could be
+  efficiently used to find files "starting with xxx". What about --last-line?
+  There was also the suggestion of an option for pcre2grep to scan only the
+  start of a file. I am not keen - this is the job of "head".
+
+. A user requested a means of determining whether a failed match was failed by
+  the start-of-match optimizations, or by running the match engine. Easy enough
+  to define a bit in the match data, but all three matchers would need work.
+
+. Would inlining "simple" recursions provide a useful performance boost for the
+  interpreters? JIT already does some of this, but it may not be worth it for
+  the interpreters.
+
+. Redesign handling of class/nclass/xclass because the compile code logic is
+  currently very contorted and obscure. Also there was a request for a way of
+  re-defining \w (and therefore \W, \b, and \B). An in-pattern sequence such as
+  (?w=[...]) was suggested. Easiest way would be simply to inline the class,
+  with lookarounds for \b and \B. Ideally the setting should last till the end
+  of the group, which means remembering all previous settings; maybe a fixed
+  amount of stack would do - how deep would anyone want to nest these things?
+  See GitHub issue #13 for a compendium of character class issues, including
+  (?[...]) extended classes.
+
+. A user suggested something like --with-build-info to set a build information
+  string that could be retrieved by pcre2_config(). However, there's no
+  facility for a length limit in pcre2_config(), and what would be the
+  encoding?
+
+. Quantified groups with a fixed count currently operate by replicating the
+  group in the compiled bytecode. This may not really matter in these days of
+  gigabyte memory, but perhaps another implementation might be considered.
+  Needs coordination between the interpreters and JIT.
+
+. There are regular requests for variable-length lookbehinds.
+
+. See also any suggestions in the GitHub issues.
+
+Philip Hazel
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 25 April 2022
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/BidiMirroring.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/BidiMirroring.txt
@ -0,0 +1,633 @@
+# BidiMirroring-14.0.0.txt
+# Date: 2021-08-08, 22:55:00 GMT [KW, RP]
+# © 2021 Unicode®, Inc.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Bidi_Mirroring_Glyph Property
+#
+# This file is an informative contributory data file in the
+# Unicode Character Database.
+#
+# This data file lists characters that have the Bidi_Mirrored=Yes property
+# value, for which there is another Unicode character that typically has a glyph
+# that is the mirror image of the original character's glyph.
+#
+# The repertoire covered by the file is Unicode 14.0.0.
+#
+# The file contains a list of lines with mappings from one code point
+# to another one for character-based mirroring.
+# Note that for "real" mirroring, a rendering engine needs to select
+# appropriate alternative glyphs, and that many Unicode characters do not
+# have a mirror-image Unicode character.
+#
+# Each mapping line contains two fields, separated by a semicolon (';').
+# Each of the two fields contains a code point represented as a
+# variable-length hexadecimal value with 4 to 6 digits.
+# A comment indicates where the characters are "BEST FIT" mirroring.
+#
+# Code points for which Bidi_Mirrored=Yes, but for which no appropriate
+# characters exist with mirrored glyphs, are
+# listed as comments at the end of the file.
+#
+# Formally, the default value of the Bidi_Mirroring_Glyph property
+# for each code point is <none>, unless a mapping to
+# some other character is specified in this data file. When a code
+# point has the default value for the Bidi_Mirroring_Glyph property,
+# that means that no other character exists whose glyph is suitable
+# for character-based mirroring.
+#
+# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm,
+# at https://www.unicode.org/reports/tr9/
+#
+# This file was originally created by Markus Scherer.
+# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
+# and for subsequent versions by Ken Whistler, Laurentiu Iancu, and Roozbeh Pournader.
+#
+# Historical and Compatibility Information:
+#
+# The OpenType Mirroring Pairs List (OMPL) is frozen to match the
+# Unicode 5.1 version of the Bidi_Mirroring_Glyph property (2008).
+# See https://www.microsoft.com/typography/otspec/ompl.txt
+#
+# The Unicode 6.1 version of the Bidi_Mirroring_Glyph property (2011)
+# added one mirroring pair: 27CB <--> 27CD.
+#
+# The Unicode 11.0 version of the Bidi_Mirroring_Glyph property (2018)
+# underwent a substantial revision, to formally recognize all of the
+# exact mirroring pairs and "BEST FIT" mirroring pairs that had been
+# added after the freezing of the OMPL list. As a result, starting
+# with Unicode 11.0, the bmg mapping values more accurately reflect
+# the current status of glyphs for Bidi_Mirrored characters in
+# the Unicode Standard, but this listing now extends significantly
+# beyond the frozen OMPL list. Implementers should be aware of this
+# intentional distinction.
+#
+# ############################################################
+#
+# Property:	Bidi_Mirroring_Glyph
+#
+# @missing: 0000..10FFFF; <none>
+
+0028; 0029 # LEFT PARENTHESIS
+0029; 0028 # RIGHT PARENTHESIS
+003C; 003E # LESS-THAN SIGN
+003E; 003C # GREATER-THAN SIGN
+005B; 005D # LEFT SQUARE BRACKET
+005D; 005B # RIGHT SQUARE BRACKET
+007B; 007D # LEFT CURLY BRACKET
+007D; 007B # RIGHT CURLY BRACKET
+00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON
+0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS
+0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON
+0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS
+169B; 169C # OGHAM FEATHER MARK
+169C; 169B # OGHAM REVERSED FEATHER MARK
+2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
+2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
+207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
+207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
+208D; 208E # SUBSCRIPT LEFT PARENTHESIS
+208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
+2208; 220B # ELEMENT OF
+2209; 220C # [BEST FIT] NOT AN ELEMENT OF
+220A; 220D # SMALL ELEMENT OF
+220B; 2208 # CONTAINS AS MEMBER
+220C; 2209 # [BEST FIT] DOES NOT CONTAIN AS MEMBER
+220D; 220A # SMALL CONTAINS AS MEMBER
+2215; 29F5 # DIVISION SLASH
+221F; 2BFE # RIGHT ANGLE
+2220; 29A3 # ANGLE
+2221; 299B # MEASURED ANGLE
+2222; 29A0 # SPHERICAL ANGLE
+2224; 2AEE # DOES NOT DIVIDE
+223C; 223D # TILDE OPERATOR
+223D; 223C # REVERSED TILDE
+2243; 22CD # ASYMPTOTICALLY EQUAL TO
+2245; 224C # APPROXIMATELY EQUAL TO
+224C; 2245 # ALL EQUAL TO
+2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
+2254; 2255 # COLON EQUALS
+2255; 2254 # EQUALS COLON
+2264; 2265 # LESS-THAN OR EQUAL TO
+2265; 2264 # GREATER-THAN OR EQUAL TO
+2266; 2267 # LESS-THAN OVER EQUAL TO
+2267; 2266 # GREATER-THAN OVER EQUAL TO
+2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
+2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
+226A; 226B # MUCH LESS-THAN
+226B; 226A # MUCH GREATER-THAN
+226E; 226F # [BEST FIT] NOT LESS-THAN
+226F; 226E # [BEST FIT] NOT GREATER-THAN
+2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
+2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
+2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
+2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
+2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
+2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
+2276; 2277 # LESS-THAN OR GREATER-THAN
+2277; 2276 # GREATER-THAN OR LESS-THAN
+2278; 2279 # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN
+2279; 2278 # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN
+227A; 227B # PRECEDES
+227B; 227A # SUCCEEDS
+227C; 227D # PRECEDES OR EQUAL TO
+227D; 227C # SUCCEEDS OR EQUAL TO
+227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
+227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
+2280; 2281 # [BEST FIT] DOES NOT PRECEDE
+2281; 2280 # [BEST FIT] DOES NOT SUCCEED
+2282; 2283 # SUBSET OF
+2283; 2282 # SUPERSET OF
+2284; 2285 # [BEST FIT] NOT A SUBSET OF
+2285; 2284 # [BEST FIT] NOT A SUPERSET OF
+2286; 2287 # SUBSET OF OR EQUAL TO
+2287; 2286 # SUPERSET OF OR EQUAL TO
+2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
+2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
+228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
+228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
+228F; 2290 # SQUARE IMAGE OF
+2290; 228F # SQUARE ORIGINAL OF
+2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
+2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
+2298; 29B8 # CIRCLED DIVISION SLASH
+22A2; 22A3 # RIGHT TACK
+22A3; 22A2 # LEFT TACK
+22A6; 2ADE # ASSERTION
+22A8; 2AE4 # TRUE
+22A9; 2AE3 # FORCES
+22AB; 2AE5 # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+22B0; 22B1 # PRECEDES UNDER RELATION
+22B1; 22B0 # SUCCEEDS UNDER RELATION
+22B2; 22B3 # NORMAL SUBGROUP OF
+22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
+22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
+22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+22B6; 22B7 # ORIGINAL OF
+22B7; 22B6 # IMAGE OF
+22B8; 27DC # MULTIMAP
+22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CB; 22CC # LEFT SEMIDIRECT PRODUCT
+22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
+22CD; 2243 # REVERSED TILDE EQUALS
+22D0; 22D1 # DOUBLE SUBSET
+22D1; 22D0 # DOUBLE SUPERSET
+22D6; 22D7 # LESS-THAN WITH DOT
+22D7; 22D6 # GREATER-THAN WITH DOT
+22D8; 22D9 # VERY MUCH LESS-THAN
+22D9; 22D8 # VERY MUCH GREATER-THAN
+22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
+22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
+22DC; 22DD # EQUAL TO OR LESS-THAN
+22DD; 22DC # EQUAL TO OR GREATER-THAN
+22DE; 22DF # EQUAL TO OR PRECEDES
+22DF; 22DE # EQUAL TO OR SUCCEEDS
+22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
+22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
+22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
+22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
+22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
+22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
+22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
+22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
+22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
+22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
+22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
+22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
+22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
+22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
+22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
+22F2; 22FA # ELEMENT OF WITH LONG HORIZONTAL STROKE
+22F3; 22FB # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22F4; 22FC # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22F6; 22FD # ELEMENT OF WITH OVERBAR
+22F7; 22FE # SMALL ELEMENT OF WITH OVERBAR
+22FA; 22F2 # CONTAINS WITH LONG HORIZONTAL STROKE
+22FB; 22F3 # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22FC; 22F4 # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22FD; 22F6 # CONTAINS WITH OVERBAR
+22FE; 22F7 # SMALL CONTAINS WITH OVERBAR
+2308; 2309 # LEFT CEILING
+2309; 2308 # RIGHT CEILING
+230A; 230B # LEFT FLOOR
+230B; 230A # RIGHT FLOOR
+2329; 232A # LEFT-POINTING ANGLE BRACKET
+232A; 2329 # RIGHT-POINTING ANGLE BRACKET
+2768; 2769 # MEDIUM LEFT PARENTHESIS ORNAMENT
+2769; 2768 # MEDIUM RIGHT PARENTHESIS ORNAMENT
+276A; 276B # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276B; 276A # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276C; 276D # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276D; 276C # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276E; 276F # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT
+2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT
+27C3; 27C4 # OPEN SUBSET
+27C4; 27C3 # OPEN SUPERSET
+27C5; 27C6 # LEFT S-SHAPED BAG DELIMITER
+27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER
+27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET
+27C9; 27C8 # SUPERSET PRECEDING SOLIDUS
+27CB; 27CD # MATHEMATICAL RISING DIAGONAL
+27CD; 27CB # MATHEMATICAL FALLING DIAGONAL
+27D5; 27D6 # LEFT OUTER JOIN
+27D6; 27D5 # RIGHT OUTER JOIN
+27DC; 22B8 # LEFT MULTIMAP
+27DD; 27DE # LONG RIGHT TACK
+27DE; 27DD # LONG LEFT TACK
+27E2; 27E3 # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK
+27E3; 27E2 # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK
+27E4; 27E5 # WHITE SQUARE WITH LEFTWARDS TICK
+27E5; 27E4 # WHITE SQUARE WITH RIGHTWARDS TICK
+27E6; 27E7 # MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7; 27E6 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8; 27E9 # MATHEMATICAL LEFT ANGLE BRACKET
+27E9; 27E8 # MATHEMATICAL RIGHT ANGLE BRACKET
+27EA; 27EB # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB; 27EA # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC; 27ED # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED; 27EC # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE; 27EF # MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF; 27EE # MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+2983; 2984 # LEFT WHITE CURLY BRACKET
+2984; 2983 # RIGHT WHITE CURLY BRACKET
+2985; 2986 # LEFT WHITE PARENTHESIS
+2986; 2985 # RIGHT WHITE PARENTHESIS
+2987; 2988 # Z NOTATION LEFT IMAGE BRACKET
+2988; 2987 # Z NOTATION RIGHT IMAGE BRACKET
+2989; 298A # Z NOTATION LEFT BINDING BRACKET
+298A; 2989 # Z NOTATION RIGHT BINDING BRACKET
+298B; 298C # LEFT SQUARE BRACKET WITH UNDERBAR
+298C; 298B # RIGHT SQUARE BRACKET WITH UNDERBAR
+298D; 2990 # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E; 298F # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F; 298E # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990; 298D # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991; 2992 # LEFT ANGLE BRACKET WITH DOT
+2992; 2991 # RIGHT ANGLE BRACKET WITH DOT
+2993; 2994 # LEFT ARC LESS-THAN BRACKET
+2994; 2993 # RIGHT ARC GREATER-THAN BRACKET
+2995; 2996 # DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996; 2995 # DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997; 2998 # LEFT BLACK TORTOISE SHELL BRACKET
+2998; 2997 # RIGHT BLACK TORTOISE SHELL BRACKET
+299B; 2221 # MEASURED ANGLE OPENING LEFT
+29A0; 2222 # SPHERICAL ANGLE OPENING LEFT
+29A3; 2220 # REVERSED ANGLE
+29A4; 29A5 # ANGLE WITH UNDERBAR
+29A5; 29A4 # REVERSED ANGLE WITH UNDERBAR
+29A8; 29A9 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT
+29A9; 29A8 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT
+29AA; 29AB # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT
+29AB; 29AA # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT
+29AC; 29AD # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP
+29AD; 29AC # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP
+29AE; 29AF # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN
+29AF; 29AE # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN
+29B8; 2298 # CIRCLED REVERSE SOLIDUS
+29C0; 29C1 # CIRCLED LESS-THAN
+29C1; 29C0 # CIRCLED GREATER-THAN
+29C4; 29C5 # SQUARED RISING DIAGONAL SLASH
+29C5; 29C4 # SQUARED FALLING DIAGONAL SLASH
+29CF; 29D0 # LEFT TRIANGLE BESIDE VERTICAL BAR
+29D0; 29CF # VERTICAL BAR BESIDE RIGHT TRIANGLE
+29D1; 29D2 # BOWTIE WITH LEFT HALF BLACK
+29D2; 29D1 # BOWTIE WITH RIGHT HALF BLACK
+29D4; 29D5 # TIMES WITH LEFT HALF BLACK
+29D5; 29D4 # TIMES WITH RIGHT HALF BLACK
+29D8; 29D9 # LEFT WIGGLY FENCE
+29D9; 29D8 # RIGHT WIGGLY FENCE
+29DA; 29DB # LEFT DOUBLE WIGGLY FENCE
+29DB; 29DA # RIGHT DOUBLE WIGGLY FENCE
+29E8; 29E9 # DOWN-POINTING TRIANGLE WITH LEFT HALF BLACK
+29E9; 29E8 # DOWN-POINTING TRIANGLE WITH RIGHT HALF BLACK
+29F5; 2215 # REVERSE SOLIDUS OPERATOR
+29F8; 29F9 # BIG SOLIDUS
+29F9; 29F8 # BIG REVERSE SOLIDUS
+29FC; 29FD # LEFT-POINTING CURVED ANGLE BRACKET
+29FD; 29FC # RIGHT-POINTING CURVED ANGLE BRACKET
+2A2B; 2A2C # MINUS SIGN WITH FALLING DOTS
+2A2C; 2A2B # MINUS SIGN WITH RISING DOTS
+2A2D; 2A2E # PLUS SIGN IN LEFT HALF CIRCLE
+2A2E; 2A2D # PLUS SIGN IN RIGHT HALF CIRCLE
+2A34; 2A35 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE
+2A35; 2A34 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
+2A3C; 2A3D # INTERIOR PRODUCT
+2A3D; 2A3C # RIGHTHAND INTERIOR PRODUCT
+2A64; 2A65 # Z NOTATION DOMAIN ANTIRESTRICTION
+2A65; 2A64 # Z NOTATION RANGE ANTIRESTRICTION
+2A79; 2A7A # LESS-THAN WITH CIRCLE INSIDE
+2A7A; 2A79 # GREATER-THAN WITH CIRCLE INSIDE
+2A7B; 2A7C # [BEST FIT] LESS-THAN WITH QUESTION MARK ABOVE
+2A7C; 2A7B # [BEST FIT] GREATER-THAN WITH QUESTION MARK ABOVE
+2A7D; 2A7E # LESS-THAN OR SLANTED EQUAL TO
+2A7E; 2A7D # GREATER-THAN OR SLANTED EQUAL TO
+2A7F; 2A80 # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+2A80; 2A7F # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+2A81; 2A82 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+2A82; 2A81 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+2A83; 2A84 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT
+2A84; 2A83 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT
+2A85; 2A86 # [BEST FIT] LESS-THAN OR APPROXIMATE
+2A86; 2A85 # [BEST FIT] GREATER-THAN OR APPROXIMATE
+2A87; 2A88 # [BEST FIT] LESS-THAN AND SINGLE-LINE NOT EQUAL TO
+2A88; 2A87 # [BEST FIT] GREATER-THAN AND SINGLE-LINE NOT EQUAL TO
+2A89; 2A8A # [BEST FIT] LESS-THAN AND NOT APPROXIMATE
+2A8A; 2A89 # [BEST FIT] GREATER-THAN AND NOT APPROXIMATE
+2A8B; 2A8C # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
+2A8C; 2A8B # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
+2A8D; 2A8E # [BEST FIT] LESS-THAN ABOVE SIMILAR OR EQUAL
+2A8E; 2A8D # [BEST FIT] GREATER-THAN ABOVE SIMILAR OR EQUAL
+2A8F; 2A90 # [BEST FIT] LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN
+2A90; 2A8F # [BEST FIT] GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN
+2A91; 2A92 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL
+2A92; 2A91 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL
+2A93; 2A94 # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL
+2A94; 2A93 # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL
+2A95; 2A96 # SLANTED EQUAL TO OR LESS-THAN
+2A96; 2A95 # SLANTED EQUAL TO OR GREATER-THAN
+2A97; 2A98 # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE
+2A98; 2A97 # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE
+2A99; 2A9A # DOUBLE-LINE EQUAL TO OR LESS-THAN
+2A9A; 2A99 # DOUBLE-LINE EQUAL TO OR GREATER-THAN
+2A9B; 2A9C # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN
+2A9C; 2A9B # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN
+2A9D; 2A9E # [BEST FIT] SIMILAR OR LESS-THAN
+2A9E; 2A9D # [BEST FIT] SIMILAR OR GREATER-THAN
+2A9F; 2AA0 # [BEST FIT] SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN
+2AA0; 2A9F # [BEST FIT] SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN
+2AA1; 2AA2 # DOUBLE NESTED LESS-THAN
+2AA2; 2AA1 # DOUBLE NESTED GREATER-THAN
+2AA6; 2AA7 # LESS-THAN CLOSED BY CURVE
+2AA7; 2AA6 # GREATER-THAN CLOSED BY CURVE
+2AA8; 2AA9 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+2AA9; 2AA8 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+2AAA; 2AAB # SMALLER THAN
+2AAB; 2AAA # LARGER THAN
+2AAC; 2AAD # SMALLER THAN OR EQUAL TO
+2AAD; 2AAC # LARGER THAN OR EQUAL TO
+2AAF; 2AB0 # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
+2AB0; 2AAF # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
+2AB1; 2AB2 # [BEST FIT] PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO
+2AB2; 2AB1 # [BEST FIT] SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO
+2AB3; 2AB4 # PRECEDES ABOVE EQUALS SIGN
+2AB4; 2AB3 # SUCCEEDS ABOVE EQUALS SIGN
+2AB5; 2AB6 # [BEST FIT] PRECEDES ABOVE NOT EQUAL TO
+2AB6; 2AB5 # [BEST FIT] SUCCEEDS ABOVE NOT EQUAL TO
+2AB7; 2AB8 # [BEST FIT] PRECEDES ABOVE ALMOST EQUAL TO
+2AB8; 2AB7 # [BEST FIT] SUCCEEDS ABOVE ALMOST EQUAL TO
+2AB9; 2ABA # [BEST FIT] PRECEDES ABOVE NOT ALMOST EQUAL TO
+2ABA; 2AB9 # [BEST FIT] SUCCEEDS ABOVE NOT ALMOST EQUAL TO
+2ABB; 2ABC # DOUBLE PRECEDES
+2ABC; 2ABB # DOUBLE SUCCEEDS
+2ABD; 2ABE # SUBSET WITH DOT
+2ABE; 2ABD # SUPERSET WITH DOT
+2ABF; 2AC0 # SUBSET WITH PLUS SIGN BELOW
+2AC0; 2ABF # SUPERSET WITH PLUS SIGN BELOW
+2AC1; 2AC2 # SUBSET WITH MULTIPLICATION SIGN BELOW
+2AC2; 2AC1 # SUPERSET WITH MULTIPLICATION SIGN BELOW
+2AC3; 2AC4 # SUBSET OF OR EQUAL TO WITH DOT ABOVE
+2AC4; 2AC3 # SUPERSET OF OR EQUAL TO WITH DOT ABOVE
+2AC5; 2AC6 # SUBSET OF ABOVE EQUALS SIGN
+2AC6; 2AC5 # SUPERSET OF ABOVE EQUALS SIGN
+2AC7; 2AC8 # [BEST FIT] SUBSET OF ABOVE TILDE OPERATOR
+2AC8; 2AC7 # [BEST FIT] SUPERSET OF ABOVE TILDE OPERATOR
+2AC9; 2ACA # [BEST FIT] SUBSET OF ABOVE ALMOST EQUAL TO
+2ACA; 2AC9 # [BEST FIT] SUPERSET OF ABOVE ALMOST EQUAL TO
+2ACB; 2ACC # [BEST FIT] SUBSET OF ABOVE NOT EQUAL TO
+2ACC; 2ACB # [BEST FIT] SUPERSET OF ABOVE NOT EQUAL TO
+2ACD; 2ACE # SQUARE LEFT OPEN BOX OPERATOR
+2ACE; 2ACD # SQUARE RIGHT OPEN BOX OPERATOR
+2ACF; 2AD0 # CLOSED SUBSET
+2AD0; 2ACF # CLOSED SUPERSET
+2AD1; 2AD2 # CLOSED SUBSET OR EQUAL TO
+2AD2; 2AD1 # CLOSED SUPERSET OR EQUAL TO
+2AD3; 2AD4 # SUBSET ABOVE SUPERSET
+2AD4; 2AD3 # SUPERSET ABOVE SUBSET
+2AD5; 2AD6 # SUBSET ABOVE SUBSET
+2AD6; 2AD5 # SUPERSET ABOVE SUPERSET
+2ADE; 22A6 # SHORT LEFT TACK
+2AE3; 22A9 # DOUBLE VERTICAL BAR LEFT TURNSTILE
+2AE4; 22A8 # VERTICAL BAR DOUBLE LEFT TURNSTILE
+2AE5; 22AB # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE
+2AEC; 2AED # DOUBLE STROKE NOT SIGN
+2AED; 2AEC # REVERSED DOUBLE STROKE NOT SIGN
+2AEE; 2224 # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH
+2AF7; 2AF8 # TRIPLE NESTED LESS-THAN
+2AF8; 2AF7 # TRIPLE NESTED GREATER-THAN
+2AF9; 2AFA # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO
+2AFA; 2AF9 # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO
+2BFE; 221F # REVERSED RIGHT ANGLE
+2E02; 2E03 # LEFT SUBSTITUTION BRACKET
+2E03; 2E02 # RIGHT SUBSTITUTION BRACKET
+2E04; 2E05 # LEFT DOTTED SUBSTITUTION BRACKET
+2E05; 2E04 # RIGHT DOTTED SUBSTITUTION BRACKET
+2E09; 2E0A # LEFT TRANSPOSITION BRACKET
+2E0A; 2E09 # RIGHT TRANSPOSITION BRACKET
+2E0C; 2E0D # LEFT RAISED OMISSION BRACKET
+2E0D; 2E0C # RIGHT RAISED OMISSION BRACKET
+2E1C; 2E1D # LEFT LOW PARAPHRASE BRACKET
+2E1D; 2E1C # RIGHT LOW PARAPHRASE BRACKET
+2E20; 2E21 # LEFT VERTICAL BAR WITH QUILL
+2E21; 2E20 # RIGHT VERTICAL BAR WITH QUILL
+2E22; 2E23 # TOP LEFT HALF BRACKET
+2E23; 2E22 # TOP RIGHT HALF BRACKET
+2E24; 2E25 # BOTTOM LEFT HALF BRACKET
+2E25; 2E24 # BOTTOM RIGHT HALF BRACKET
+2E26; 2E27 # LEFT SIDEWAYS U BRACKET
+2E27; 2E26 # RIGHT SIDEWAYS U BRACKET
+2E28; 2E29 # LEFT DOUBLE PARENTHESIS
+2E29; 2E28 # RIGHT DOUBLE PARENTHESIS
+2E55; 2E56 # LEFT SQUARE BRACKET WITH STROKE
+2E56; 2E55 # RIGHT SQUARE BRACKET WITH STROKE
+2E57; 2E58 # LEFT SQUARE BRACKET WITH DOUBLE STROKE
+2E58; 2E57 # RIGHT SQUARE BRACKET WITH DOUBLE STROKE
+2E59; 2E5A # TOP HALF LEFT PARENTHESIS
+2E5A; 2E59 # TOP HALF RIGHT PARENTHESIS
+2E5B; 2E5C # BOTTOM HALF LEFT PARENTHESIS
+2E5C; 2E5B # BOTTOM HALF RIGHT PARENTHESIS
+3008; 3009 # LEFT ANGLE BRACKET
+3009; 3008 # RIGHT ANGLE BRACKET
+300A; 300B # LEFT DOUBLE ANGLE BRACKET
+300B; 300A # RIGHT DOUBLE ANGLE BRACKET
+300C; 300D # [BEST FIT] LEFT CORNER BRACKET
+300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
+300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
+300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
+3010; 3011 # LEFT BLACK LENTICULAR BRACKET
+3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
+3014; 3015 # LEFT TORTOISE SHELL BRACKET
+3015; 3014 # RIGHT TORTOISE SHELL BRACKET
+3016; 3017 # LEFT WHITE LENTICULAR BRACKET
+3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
+3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
+3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
+301A; 301B # LEFT WHITE SQUARE BRACKET
+301B; 301A # RIGHT WHITE SQUARE BRACKET
+FE59; FE5A # SMALL LEFT PARENTHESIS
+FE5A; FE59 # SMALL RIGHT PARENTHESIS
+FE5B; FE5C # SMALL LEFT CURLY BRACKET
+FE5C; FE5B # SMALL RIGHT CURLY BRACKET
+FE5D; FE5E # SMALL LEFT TORTOISE SHELL BRACKET
+FE5E; FE5D # SMALL RIGHT TORTOISE SHELL BRACKET
+FE64; FE65 # SMALL LESS-THAN SIGN
+FE65; FE64 # SMALL GREATER-THAN SIGN
+FF08; FF09 # FULLWIDTH LEFT PARENTHESIS
+FF09; FF08 # FULLWIDTH RIGHT PARENTHESIS
+FF1C; FF1E # FULLWIDTH LESS-THAN SIGN
+FF1E; FF1C # FULLWIDTH GREATER-THAN SIGN
+FF3B; FF3D # FULLWIDTH LEFT SQUARE BRACKET
+FF3D; FF3B # FULLWIDTH RIGHT SQUARE BRACKET
+FF5B; FF5D # FULLWIDTH LEFT CURLY BRACKET
+FF5D; FF5B # FULLWIDTH RIGHT CURLY BRACKET
+FF5F; FF60 # FULLWIDTH LEFT WHITE PARENTHESIS
+FF60; FF5F # FULLWIDTH RIGHT WHITE PARENTHESIS
+FF62; FF63 # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET
+FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
+
+# The following characters have no appropriate mirroring character.
+# For these characters it is up to the rendering system
+#   to provide mirrored glyphs.
+
+# 2140; DOUBLE-STRUCK N-ARY SUMMATION
+# 2201; COMPLEMENT
+# 2202; PARTIAL DIFFERENTIAL
+# 2203; THERE EXISTS
+# 2204; THERE DOES NOT EXIST
+# 2211; N-ARY SUMMATION
+# 2216; SET MINUS
+# 221A; SQUARE ROOT
+# 221B; CUBE ROOT
+# 221C; FOURTH ROOT
+# 221D; PROPORTIONAL TO
+# 2226; NOT PARALLEL TO
+# 222B; INTEGRAL
+# 222C; DOUBLE INTEGRAL
+# 222D; TRIPLE INTEGRAL
+# 222E; CONTOUR INTEGRAL
+# 222F; SURFACE INTEGRAL
+# 2230; VOLUME INTEGRAL
+# 2231; CLOCKWISE INTEGRAL
+# 2232; CLOCKWISE CONTOUR INTEGRAL
+# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
+# 2239; EXCESS
+# 223B; HOMOTHETIC
+# 223E; INVERTED LAZY S
+# 223F; SINE WAVE
+# 2240; WREATH PRODUCT
+# 2241; NOT TILDE
+# 2242; MINUS TILDE
+# 2244; NOT ASYMPTOTICALLY EQUAL TO
+# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
+# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+# 2248; ALMOST EQUAL TO
+# 2249; NOT ALMOST EQUAL TO
+# 224A; ALMOST EQUAL OR EQUAL TO
+# 224B; TRIPLE TILDE
+# 225F; QUESTIONED EQUAL TO
+# 2260; NOT EQUAL TO
+# 2262; NOT IDENTICAL TO
+# 228C; MULTISET
+# 22A7; MODELS
+# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
+# 22AC; DOES NOT PROVE
+# 22AD; NOT TRUE
+# 22AE; DOES NOT FORCE
+# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22BE; RIGHT ANGLE WITH ARC
+# 22BF; RIGHT TRIANGLE
+# 22F5; ELEMENT OF WITH DOT ABOVE
+# 22F8; ELEMENT OF WITH UNDERBAR
+# 22F9; ELEMENT OF WITH TWO HORIZONTAL STROKES
+# 22FF; Z NOTATION BAG MEMBERSHIP
+# 2320; TOP HALF INTEGRAL
+# 2321; BOTTOM HALF INTEGRAL
+# 27C0; THREE DIMENSIONAL ANGLE
+# 27CC; LONG DIVISION
+# 27D3; LOWER RIGHT CORNER WITH DOT
+# 27D4; UPPER LEFT CORNER WITH DOT
+# 299C; RIGHT ANGLE VARIANT WITH SQUARE
+# 299D; MEASURED RIGHT ANGLE WITH DOT
+# 299E; ANGLE WITH S INSIDE
+# 299F; ACUTE ANGLE
+# 29A2; TURNED ANGLE
+# 29A6; OBLIQUE ANGLE OPENING UP
+# 29A7; OBLIQUE ANGLE OPENING DOWN
+# 29C2; CIRCLE WITH SMALL CIRCLE TO THE RIGHT
+# 29C3; CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT
+# 29C9; TWO JOINED SQUARES
+# 29CE; RIGHT TRIANGLE ABOVE LEFT TRIANGLE
+# 29DC; INCOMPLETE INFINITY
+# 29E1; INCREASES AS
+# 29E3; EQUALS SIGN AND SLANTED PARALLEL
+# 29E4; EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE
+# 29E5; IDENTICAL TO AND SLANTED PARALLEL
+# 29F4; RULE-DELAYED
+# 29F6; SOLIDUS WITH OVERBAR
+# 29F7; REVERSE SOLIDUS WITH HORIZONTAL STROKE
+# 2A0A; MODULO TWO SUM
+# 2A0B; SUMMATION WITH INTEGRAL
+# 2A0C; QUADRUPLE INTEGRAL OPERATOR
+# 2A0D; FINITE PART INTEGRAL
+# 2A0E; INTEGRAL WITH DOUBLE STROKE
+# 2A0F; INTEGRAL AVERAGE WITH SLASH
+# 2A10; CIRCULATION FUNCTION
+# 2A11; ANTICLOCKWISE INTEGRATION
+# 2A12; LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE
+# 2A13; LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE
+# 2A14; LINE INTEGRATION NOT INCLUDING THE POLE
+# 2A15; INTEGRAL AROUND A POINT OPERATOR
+# 2A16; QUATERNION INTEGRAL OPERATOR
+# 2A17; INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
+# 2A18; INTEGRAL WITH TIMES SIGN
+# 2A19; INTEGRAL WITH INTERSECTION
+# 2A1A; INTEGRAL WITH UNION
+# 2A1B; INTEGRAL WITH OVERBAR
+# 2A1C; INTEGRAL WITH UNDERBAR
+# 2A1E; LARGE LEFT TRIANGLE OPERATOR
+# 2A1F; Z NOTATION SCHEMA COMPOSITION
+# 2A20; Z NOTATION SCHEMA PIPING
+# 2A21; Z NOTATION SCHEMA PROJECTION
+# 2A24; PLUS SIGN WITH TILDE ABOVE
+# 2A26; PLUS SIGN WITH TILDE BELOW
+# 2A29; MINUS SIGN WITH COMMA ABOVE
+# 2A3E; Z NOTATION RELATIONAL COMPOSITION
+# 2A57; SLOPING LARGE OR
+# 2A58; SLOPING LARGE AND
+# 2A6A; TILDE OPERATOR WITH DOT ABOVE
+# 2A6B; TILDE OPERATOR WITH RISING DOTS
+# 2A6C; SIMILAR MINUS SIMILAR
+# 2A6D; CONGRUENT WITH DOT ABOVE
+# 2A6F; ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT
+# 2A70; APPROXIMATELY EQUAL OR EQUAL TO
+# 2A73; EQUALS SIGN ABOVE TILDE OPERATOR
+# 2A74; DOUBLE COLON EQUAL
+# 2AA3; DOUBLE NESTED LESS-THAN WITH UNDERBAR
+# 2ADC; FORKING
+# 2AE2; VERTICAL BAR TRIPLE RIGHT TURNSTILE
+# 2AE6; LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL
+# 2AF3; PARALLEL WITH TILDE OPERATOR
+# 2AFB; TRIPLE SOLIDUS BINARY RELATION
+# 2AFD; DOUBLE SOLIDUS OPERATOR
+# 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+# 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+# 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+# 1D789; MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+# 1D7C3; MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+
+# EOF
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/CaseFolding.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/CaseFolding.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedBidiClass.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedBidiClass.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedCoreProperties.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedCoreProperties.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedGeneralCategory.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/DerivedGeneralCategory.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/GraphemeBreakProperty.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/GraphemeBreakProperty.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropList.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropList.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropertyAliases.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropertyAliases.txt
@ -0,0 +1,212 @@
+# PropertyAliases-14.0.0.txt
+# Date: 2021-03-08, 19:35:48 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see http://www.unicode.org/reports/tr44/
+#
+# This file contains aliases for properties used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line has two or more fields, separated by semicolons.
+#
+# First Field: The first field is the short name for the property.
+# It is typically an abbreviation, but in a number of cases it is simply
+# a duplicate of the "long name" in the second field.
+# For Unihan database tags, the short name is actually a longer string than
+# the tag specified in the second field.
+#
+# Second Field: The second field is the long name for the property,
+# typically the formal name used in documentation about the property.
+#
+# The above are the preferred aliases. Other aliases may be listed in additional fields.
+#
+# Loose matching should be applied to all property names and property values, with
+# the exception of String Property values. With loose matching of property names and
+# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
+# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
+#
+# NOTE: Property value names are NOT unique across properties. For example:
+#
+#   AL means Arabic Letter for the Bidi_Class property, and
+#   AL means Above_Left for the Combining_Class property, and
+#   AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+# For example:
+#
+#   sc means the Script property, and
+#   Sc means the General_Category property value Currency_Symbol (Sc)
+#
+# The combination of property value and property name is, however, unique.
+#
+# For more information, see UAX #44, Unicode Character Database, and
+# UTS #18, Unicode Regular Expressions.
+# ================================================
+
+
+# ================================================
+# Numeric Properties
+# ================================================
+cjkAccountingNumeric     ; kAccountingNumeric
+cjkOtherNumeric          ; kOtherNumeric
+cjkPrimaryNumeric        ; kPrimaryNumeric
+nv                       ; Numeric_Value
+
+# ================================================
+# String Properties
+# ================================================
+cf                       ; Case_Folding
+cjkCompatibilityVariant  ; kCompatibilityVariant
+dm                       ; Decomposition_Mapping
+FC_NFKC                  ; FC_NFKC_Closure
+lc                       ; Lowercase_Mapping
+NFKC_CF                  ; NFKC_Casefold
+scf                      ; Simple_Case_Folding         ; sfc
+slc                      ; Simple_Lowercase_Mapping
+stc                      ; Simple_Titlecase_Mapping
+suc                      ; Simple_Uppercase_Mapping
+tc                       ; Titlecase_Mapping
+uc                       ; Uppercase_Mapping
+
+# ================================================
+# Miscellaneous Properties
+# ================================================
+bmg                      ; Bidi_Mirroring_Glyph
+bpb                      ; Bidi_Paired_Bracket
+cjkIICore                ; kIICore
+cjkIRG_GSource           ; kIRG_GSource
+cjkIRG_HSource           ; kIRG_HSource
+cjkIRG_JSource           ; kIRG_JSource
+cjkIRG_KPSource          ; kIRG_KPSource
+cjkIRG_KSource           ; kIRG_KSource
+cjkIRG_MSource           ; kIRG_MSource
+cjkIRG_SSource           ; kIRG_SSource
+cjkIRG_TSource           ; kIRG_TSource
+cjkIRG_UKSource          ; kIRG_UKSource
+cjkIRG_USource           ; kIRG_USource
+cjkIRG_VSource           ; kIRG_VSource
+cjkRSUnicode             ; kRSUnicode                  ; Unicode_Radical_Stroke; URS
+EqUIdeo                  ; Equivalent_Unified_Ideograph
+isc                      ; ISO_Comment
+JSN                      ; Jamo_Short_Name
+na                       ; Name
+na1                      ; Unicode_1_Name
+Name_Alias               ; Name_Alias
+scx                      ; Script_Extensions
+
+# ================================================
+# Catalog Properties
+# ================================================
+age                      ; Age
+blk                      ; Block
+sc                       ; Script
+
+# ================================================
+# Enumerated Properties
+# ================================================
+bc                       ; Bidi_Class
+bpt                      ; Bidi_Paired_Bracket_Type
+ccc                      ; Canonical_Combining_Class
+dt                       ; Decomposition_Type
+ea                       ; East_Asian_Width
+gc                       ; General_Category
+GCB                      ; Grapheme_Cluster_Break
+hst                      ; Hangul_Syllable_Type
+InPC                     ; Indic_Positional_Category
+InSC                     ; Indic_Syllabic_Category
+jg                       ; Joining_Group
+jt                       ; Joining_Type
+lb                       ; Line_Break
+NFC_QC                   ; NFC_Quick_Check
+NFD_QC                   ; NFD_Quick_Check
+NFKC_QC                  ; NFKC_Quick_Check
+NFKD_QC                  ; NFKD_Quick_Check
+nt                       ; Numeric_Type
+SB                       ; Sentence_Break
+vo                       ; Vertical_Orientation
+WB                       ; Word_Break
+
+# ================================================
+# Binary Properties
+# ================================================
+AHex                     ; ASCII_Hex_Digit
+Alpha                    ; Alphabetic
+Bidi_C                   ; Bidi_Control
+Bidi_M                   ; Bidi_Mirrored
+Cased                    ; Cased
+CE                       ; Composition_Exclusion
+CI                       ; Case_Ignorable
+Comp_Ex                  ; Full_Composition_Exclusion
+CWCF                     ; Changes_When_Casefolded
+CWCM                     ; Changes_When_Casemapped
+CWKCF                    ; Changes_When_NFKC_Casefolded
+CWL                      ; Changes_When_Lowercased
+CWT                      ; Changes_When_Titlecased
+CWU                      ; Changes_When_Uppercased
+Dash                     ; Dash
+Dep                      ; Deprecated
+DI                       ; Default_Ignorable_Code_Point
+Dia                      ; Diacritic
+EBase                    ; Emoji_Modifier_Base
+EComp                    ; Emoji_Component
+EMod                     ; Emoji_Modifier
+Emoji                    ; Emoji
+EPres                    ; Emoji_Presentation
+Ext                      ; Extender
+ExtPict                  ; Extended_Pictographic
+Gr_Base                  ; Grapheme_Base
+Gr_Ext                   ; Grapheme_Extend
+Gr_Link                  ; Grapheme_Link
+Hex                      ; Hex_Digit
+Hyphen                   ; Hyphen
+IDC                      ; ID_Continue
+Ideo                     ; Ideographic
+IDS                      ; ID_Start
+IDSB                     ; IDS_Binary_Operator
+IDST                     ; IDS_Trinary_Operator
+Join_C                   ; Join_Control
+LOE                      ; Logical_Order_Exception
+Lower                    ; Lowercase
+Math                     ; Math
+NChar                    ; Noncharacter_Code_Point
+OAlpha                   ; Other_Alphabetic
+ODI                      ; Other_Default_Ignorable_Code_Point
+OGr_Ext                  ; Other_Grapheme_Extend
+OIDC                     ; Other_ID_Continue
+OIDS                     ; Other_ID_Start
+OLower                   ; Other_Lowercase
+OMath                    ; Other_Math
+OUpper                   ; Other_Uppercase
+Pat_Syn                  ; Pattern_Syntax
+Pat_WS                   ; Pattern_White_Space
+PCM                      ; Prepended_Concatenation_Mark
+QMark                    ; Quotation_Mark
+Radical                  ; Radical
+RI                       ; Regional_Indicator
+SD                       ; Soft_Dotted
+STerm                    ; Sentence_Terminal
+Term                     ; Terminal_Punctuation
+UIdeo                    ; Unified_Ideograph
+Upper                    ; Uppercase
+VS                       ; Variation_Selector
+WSpace                   ; White_Space                 ; space
+XIDC                     ; XID_Continue
+XIDS                     ; XID_Start
+XO_NFC                   ; Expands_On_NFC
+XO_NFD                   ; Expands_On_NFD
+XO_NFKC                  ; Expands_On_NFKC
+XO_NFKD                  ; Expands_On_NFKD
+
+# ================================================
+# Total:    129
+
+# EOF
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropertyValueAliases.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/PropertyValueAliases.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/ScriptExtensions.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/ScriptExtensions.txt
@ -0,0 +1,628 @@
+# ScriptExtensions-14.0.0.txt
+# Date: 2021-06-04, 02:19:38 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see http://www.unicode.org/reports/tr44/
+#
+# The Script_Extensions property indicates which characters are commonly used
+# with more than one script, but with a limited number of scripts.
+# For each code point, there is one or more property values.  Each such value is a Script property value.
+# For more information, see:
+#   UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
+#     Especially the sections:
+#       https://www.unicode.org/reports/tr24/#Assignment_Script_Values
+#       https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
+#
+# Each Script_Extensions value in this file consists of a set
+# of one or more abbreviated Script property values. The ordering of the
+# values in that set is not material, but for stability in presentation
+# it is given here as alphabetical.
+#
+# The Script_Extensions values are presented in sorted order in the file.
+# They are sorted first by the number of Script property values in their sets,
+# and then alphabetically by first differing Script property value.
+#
+# Following each distinct Script_Extensions value is the list of code
+# points associated with that value, listed in code point order.
+#
+# All code points not explicitly listed for Script_Extensions
+# have as their value the corresponding Script property value
+#
+# @missing: 0000..10FFFF; <script>
+
+# ================================================
+
+# Property:	Script_Extensions
+
+# ================================================
+
+# Script_Extensions=Beng
+
+1CF7          ; Beng # Mc       VEDIC SIGN ATIKRAMA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva
+
+1CD1          ; Deva # Mn       VEDIC TONE SHARA
+1CD4          ; Deva # Mn       VEDIC SIGN YAJURVEDIC MIDLINE SVARITA
+1CDB          ; Deva # Mn       VEDIC TONE TRIPLE SVARITA
+1CDE..1CDF    ; Deva # Mn   [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW
+1CE2..1CE8    ; Deva # Mn   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CEB..1CEC    ; Deva # Lo   [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+1CEE..1CF1    ; Deva # Lo   [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
+
+# Total code points: 18
+
+# ================================================
+
+# Script_Extensions=Dupl
+
+1BCA0..1BCA3  ; Dupl # Cf   [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Grek
+
+0342          ; Grek # Mn       COMBINING GREEK PERISPOMENI
+0345          ; Grek # Mn       COMBINING GREEK YPOGEGRAMMENI
+1DC0..1DC1    ; Grek # Mn   [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Hani
+
+3006          ; Hani # Lo       IDEOGRAPHIC CLOSING MARK
+303E..303F    ; Hani # So   [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
+3190..3191    ; Hani # So   [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+3192..3195    ; Hani # No   [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+3196..319F    ; Hani # So  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31C0..31E3    ; Hani # So  [36] CJK STROKE T..CJK STROKE Q
+3220..3229    ; Hani # No  [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+322A..3247    ; Hani # So  [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
+3280..3289    ; Hani # No  [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+328A..32B0    ; Hani # So  [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+32C0..32CB    ; Hani # So  [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
+32FF          ; Hani # So       SQUARE ERA NAME REIWA
+3358..3370    ; Hani # So  [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
+337B..337F    ; Hani # So   [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION
+33E0..33FE    ; Hani # So  [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
+1D360..1D371  ; Hani # No  [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
+1F250..1F251  ; Hani # So   [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
+
+# Total code points: 238
+
+# ================================================
+
+# Script_Extensions=Latn
+
+0363..036F    ; Latn # Mn  [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X
+
+# Total code points: 13
+
+# ================================================
+
+# Script_Extensions=Nand
+
+1CFA          ; Nand # Lo       VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Syrc
+
+1DFA          ; Syrc # Mn       COMBINING DOT BELOW LEFT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Copt
+
+102E0         ; Arab Copt # Mn       COPTIC EPACT THOUSANDS MARK
+102E1..102FB  ; Arab Copt # No  [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
+
+# Total code points: 28
+
+# ================================================
+
+# Script_Extensions=Arab Rohg
+
+06D4          ; Arab Rohg # Po       ARABIC FULL STOP
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Nkoo
+
+FD3E          ; Arab Nkoo # Pe       ORNATE LEFT PARENTHESIS
+FD3F          ; Arab Nkoo # Ps       ORNATE RIGHT PARENTHESIS
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Arab Syrc
+
+064B..0655    ; Arab Syrc # Mn  [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
+0670          ; Arab Syrc # Mn       ARABIC LETTER SUPERSCRIPT ALEF
+
+# Total code points: 12
+
+# ================================================
+
+# Script_Extensions=Arab Thaa
+
+FDF2          ; Arab Thaa # Lo       ARABIC LIGATURE ALLAH ISOLATED FORM
+FDFD          ; Arab Thaa # So       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Beng Deva
+
+1CD5..1CD6    ; Beng Deva # Mn   [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA
+1CD8          ; Beng Deva # Mn       VEDIC TONE CANDRA BELOW
+1CE1          ; Beng Deva # Mc       VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CEA          ; Beng Deva # Lo       VEDIC SIGN ANUSVARA BAHIRGOMUKHA
+1CED          ; Beng Deva # Mn       VEDIC SIGN TIRYAK
+1CF5..1CF6    ; Beng Deva # Lo   [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
+A8F1          ; Beng Deva # Mn       COMBINING DEVANAGARI SIGN AVAGRAHA
+
+# Total code points: 9
+
+# ================================================
+
+# Script_Extensions=Bopo Hani
+
+302A..302D    ; Bopo Hani # Mn   [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Bugi Java
+
+A9CF          ; Bugi Java # Lm       JAVANESE PANGRANGKEP
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Cprt Linb
+
+10102         ; Cprt Linb # Po       AEGEAN CHECK MARK
+10137..1013F  ; Cprt Linb # So   [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cyrl Glag
+
+0484          ; Cyrl Glag # Mn       COMBINING CYRILLIC PALATALIZATION
+0487          ; Cyrl Glag # Mn       COMBINING CYRILLIC POKRYTIE
+2E43          ; Cyrl Glag # Po       DASH WITH LEFT UPTURN
+A66F          ; Cyrl Glag # Mn       COMBINING CYRILLIC VZMET
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Cyrl Latn
+
+0485..0486    ; Cyrl Latn # Mn   [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Cyrl Perm
+
+0483          ; Cyrl Perm # Mn       COMBINING CYRILLIC TITLO
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Cyrl Syrc
+
+1DF8          ; Cyrl Syrc # Mn       COMBINING DOT ABOVE LEFT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Gran
+
+1CD3          ; Deva Gran # Po       VEDIC SIGN NIHSHVASA
+1CF3          ; Deva Gran # Lo       VEDIC SIGN ROTATED ARDHAVISARGA
+1CF8..1CF9    ; Deva Gran # Mn   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Deva Nand
+
+1CE9          ; Deva Nand # Lo       VEDIC SIGN ANUSVARA ANTARGOMUKHA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Shrd
+
+1CD7          ; Deva Shrd # Mn       VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA
+1CD9          ; Deva Shrd # Mn       VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
+1CDC..1CDD    ; Deva Shrd # Mn   [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW
+1CE0          ; Deva Shrd # Mn       VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+
+# Total code points: 5
+
+# ================================================
+
+# Script_Extensions=Deva Taml
+
+A8F3          ; Deva Taml # Lo       DEVANAGARI SIGN CANDRABINDU VIRAMA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Geor Latn
+
+10FB          ; Geor Latn # Po       GEORGIAN PARAGRAPH SEPARATOR
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Gran Taml
+
+0BE6..0BEF    ; Gran Taml # Nd  [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0BF0..0BF2    ; Gran Taml # No   [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+0BF3          ; Gran Taml # So       TAMIL DAY SIGN
+11301         ; Gran Taml # Mn       GRANTHA SIGN CANDRABINDU
+11303         ; Gran Taml # Mc       GRANTHA SIGN VISARGA
+1133B..1133C  ; Gran Taml # Mn   [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
+11FD0..11FD1  ; Gran Taml # No   [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1
+11FD3         ; Gran Taml # No       TAMIL FRACTION THREE QUARTERS
+
+# Total code points: 21
+
+# ================================================
+
+# Script_Extensions=Gujr Khoj
+
+0AE6..0AEF    ; Gujr Khoj # Nd  [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Guru Mult
+
+0A66..0A6F    ; Guru Mult # Nd  [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Hani Latn
+
+A700..A707    ; Hani Latn # Sk   [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
+
+# Total code points: 8
+
+# ================================================
+
+# Script_Extensions=Hira Kana
+
+3031..3035    ; Hira Kana # Lm   [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+3099..309A    ; Hira Kana # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+309B..309C    ; Hira Kana # Sk   [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+30A0          ; Hira Kana # Pd       KATAKANA-HIRAGANA DOUBLE HYPHEN
+30FC          ; Hira Kana # Lm       KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF70          ; Hira Kana # Lm       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F    ; Hira Kana # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+
+# Total code points: 14
+
+# ================================================
+
+# Script_Extensions=Knda Nand
+
+0CE6..0CEF    ; Knda Nand # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Latn Mong
+
+202F          ; Latn Mong # Zs       NARROW NO-BREAK SPACE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Mani Ougr
+
+10AF2         ; Mani Ougr # Po       MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Mong Phag
+
+1802..1803    ; Mong Phag # Po   [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
+1805          ; Mong Phag # Po       MONGOLIAN FOUR DOTS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Arab Syrc Thaa
+
+061C          ; Arab Syrc Thaa # Cf       ARABIC LETTER MARK
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Thaa Yezi
+
+0660..0669    ; Arab Thaa Yezi # Nd  [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Beng Cakm Sylo
+
+09E6..09EF    ; Beng Cakm Sylo # Nd  [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cakm Mymr Tale
+
+1040..1049    ; Cakm Mymr Tale # Nd  [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cpmn Cprt Linb
+
+10100..10101  ; Cpmn Cprt Linb # Po   [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Cprt Lina Linb
+
+10107..10133  ; Cprt Lina Linb # No  [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+
+# Total code points: 45
+
+# ================================================
+
+# Script_Extensions=Deva Gran Knda
+
+1CF4          ; Deva Gran Knda # Mn       VEDIC TONE CANDRA ABOVE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Gran Latn
+
+20F0          ; Deva Gran Latn # Mn       COMBINING ASTERISK ABOVE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Hani Hira Kana
+
+303C          ; Hani Hira Kana # Lo       MASU MARK
+303D          ; Hani Hira Kana # Po       PART ALTERNATION MARK
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Kali Latn Mymr
+
+A92E          ; Kali Latn Mymr # Po       KAYAH LI SIGN CWI
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Knda
+
+1CD0          ; Beng Deva Gran Knda # Mn       VEDIC TONE KARSHANA
+1CD2          ; Beng Deva Gran Knda # Mn       VEDIC TONE PRENKHA
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Buhd Hano Tagb Tglg
+
+1735..1736    ; Buhd Hano Tagb Tglg # Po   [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Kthi Mahj
+
+0966..096F    ; Deva Dogr Kthi Mahj # Nd  [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Bopo Hang Hani Hira Kana
+
+3003          ; Bopo Hang Hani Hira Kana # Po       DITTO MARK
+3013          ; Bopo Hang Hani Hira Kana # So       GETA MARK
+301C          ; Bopo Hang Hani Hira Kana # Pd       WAVE DASH
+301D          ; Bopo Hang Hani Hira Kana # Ps       REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F    ; Bopo Hang Hani Hira Kana # Pe   [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+3030          ; Bopo Hang Hani Hira Kana # Pd       WAVY DASH
+3037          ; Bopo Hang Hani Hira Kana # So       IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+FE45..FE46    ; Bopo Hang Hani Hira Kana # Po   [2] SESAME DOT..WHITE SESAME DOT
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi
+
+060C          ; Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC COMMA
+061B          ; Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC SEMICOLON
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Bopo Hang Hani Hira Kana Yiii
+
+3001..3002    ; Bopo Hang Hani Hira Kana Yiii # Po   [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
+3008          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT ANGLE BRACKET
+3009          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT ANGLE BRACKET
+300A          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT DOUBLE ANGLE BRACKET
+300B          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT DOUBLE ANGLE BRACKET
+300C          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT CORNER BRACKET
+300D          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT CORNER BRACKET
+300E          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT WHITE CORNER BRACKET
+300F          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT WHITE CORNER BRACKET
+3010          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT BLACK LENTICULAR BRACKET
+3011          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT BLACK LENTICULAR BRACKET
+3014          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT TORTOISE SHELL BRACKET
+3015          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT TORTOISE SHELL BRACKET
+3016          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT WHITE LENTICULAR BRACKET
+3017          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT WHITE LENTICULAR BRACKET
+3018          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT WHITE TORTOISE SHELL BRACKET
+3019          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT WHITE TORTOISE SHELL BRACKET
+301A          ; Bopo Hang Hani Hira Kana Yiii # Ps       LEFT WHITE SQUARE BRACKET
+301B          ; Bopo Hang Hani Hira Kana Yiii # Pe       RIGHT WHITE SQUARE BRACKET
+30FB          ; Bopo Hang Hani Hira Kana Yiii # Po       KATAKANA MIDDLE DOT
+FF61          ; Bopo Hang Hani Hira Kana Yiii # Po       HALFWIDTH IDEOGRAPHIC FULL STOP
+FF62          ; Bopo Hang Hani Hira Kana Yiii # Ps       HALFWIDTH LEFT CORNER BRACKET
+FF63          ; Bopo Hang Hani Hira Kana Yiii # Pe       HALFWIDTH RIGHT CORNER BRACKET
+FF64..FF65    ; Bopo Hang Hani Hira Kana Yiii # Po   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+
+# Total code points: 26
+
+# ================================================
+
+# Script_Extensions=Deva Knda Mlym Orya Taml Telu
+
+1CDA          ; Deva Knda Mlym Orya Taml Telu # Mn       VEDIC TONE DOUBLE SVARITA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi
+
+061F          ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC QUESTION MARK
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh
+
+1CF2          ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo       VEDIC SIGN ARDHAVISARGA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc
+
+0640          ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm       ARABIC TATWEEL
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh
+
+A836..A837    ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+A838          ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc       NORTH INDIC RUPEE MARK
+A839          ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So       NORTH INDIC QUANTITY MARK
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh
+
+0952          ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn       DEVANAGARI STRESS SIGN ANUDATTA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh
+
+0951          ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn       DEVANAGARI STRESS SIGN UDATTA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh
+
+A833..A835    ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No   [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh
+
+A830..A832    ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No   [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
+
+0964          ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po       DEVANAGARI DANDA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
+
+0965          ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po       DEVANAGARI DOUBLE DANDA
+
+# Total code points: 1
+
+# EOF
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/Scripts.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/Scripts.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/UnicodeData.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/UnicodeData.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/emoji-data.txt
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/Unicode.tables/emoji-data.txt
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/pcre2_chartables.c.non-standard
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/pcre2_chartables.c.non-standard
@ -0,0 +1,141 @@
+const unsigned char _pcre_default_tables[] = {
+0,1,2,3,4,5,6,7,
+8,9,10,11,12,13,14,15,
+16,17,18,19,20,21,22,23,
+24,25,26,27,28,29,30,31,
+32,33,34,35,36,37,38,39,
+40,41,42,43,44,45,46,47,
+48,49,50,51,52,53,54,55,
+56,57,58,59,60,61,62,63,
+64,97,98,99,100,101,102,103,
+104,105,106,107,108,109,110,111,
+112,113,114,115,116,117,118,119,
+120,121,122,91,92,93,94,95,
+96,97,98,99,100,101,102,103,
+104,105,106,107,108,109,110,111,
+112,113,114,115,116,117,118,119,
+120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,
+136,137,138,139,140,141,142,143,
+144,145,146,147,148,149,150,151,
+152,153,154,155,156,157,158,159,
+160,161,162,163,164,165,166,167,
+168,169,170,171,172,173,174,175,
+176,177,178,179,180,181,182,183,
+184,185,186,187,188,189,190,191,
+224,225,226,227,228,229,230,231,
+232,233,234,235,236,237,238,239,
+240,241,242,243,244,245,246,215,
+248,249,250,251,252,253,254,223,
+224,225,226,227,228,229,230,231,
+232,233,234,235,236,237,238,239,
+240,241,242,243,244,245,246,247,
+248,249,250,251,252,253,254,255,
+0,1,2,3,4,5,6,7,
+8,9,10,11,12,13,14,15,
+16,17,18,19,20,21,22,23,
+24,25,26,27,28,29,30,31,
+32,33,34,35,36,37,38,39,
+40,41,42,43,44,45,46,47,
+48,49,50,51,52,53,54,55,
+56,57,58,59,60,61,62,63,
+64,97,98,99,100,101,102,103,
+104,105,106,107,108,109,110,111,
+112,113,114,115,116,117,118,119,
+120,121,122,91,92,93,94,95,
+96,65,66,67,68,69,70,71,
+72,73,74,75,76,77,78,79,
+80,81,82,83,84,85,86,87,
+88,89,90,123,124,125,126,127,
+128,129,130,131,132,133,134,135,
+136,137,138,139,140,141,142,143,
+144,145,146,147,148,149,150,151,
+152,153,154,155,156,157,158,159,
+160,161,162,163,164,165,166,167,
+168,169,170,171,172,173,174,175,
+176,177,178,179,180,181,182,183,
+184,185,186,187,188,189,190,191,
+224,225,226,227,228,229,230,231,
+232,233,234,235,236,237,238,239,
+240,241,242,243,244,245,246,215,
+248,249,250,251,252,253,254,223,
+192,193,194,195,196,197,198,199,
+200,201,202,203,204,205,206,207,
+208,209,210,211,212,213,214,247,
+216,217,218,219,220,221,222,255,
+0,62,0,0,1,0,0,0,
+0,0,0,0,0,0,0,0,
+32,0,0,0,1,0,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,255,3,
+126,0,0,0,126,0,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,255,3,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12,2,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,
+254,255,255,7,0,0,0,0,
+0,0,0,0,0,0,0,0,
+255,255,127,127,0,0,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,254,255,255,7,
+0,0,0,0,0,4,32,4,
+0,0,0,128,255,255,127,255,
+0,0,0,0,0,0,255,3,
+254,255,255,135,254,255,255,7,
+0,0,0,0,0,4,44,6,
+255,255,127,255,255,255,127,255,
+0,0,0,0,254,255,255,255,
+255,255,255,255,255,255,255,127,
+0,0,0,0,254,255,255,255,
+255,255,255,255,255,255,255,255,
+0,2,0,0,255,255,255,255,
+255,255,255,255,255,255,255,127,
+0,0,0,0,255,255,255,255,
+255,255,255,255,255,255,255,255,
+0,0,0,0,254,255,0,252,
+1,0,0,248,1,0,0,120,
+0,0,0,0,254,255,255,255,
+0,0,128,0,0,0,128,0,
+255,255,255,255,0,0,0,0,
+0,0,0,0,0,0,0,128,
+255,255,255,255,0,0,0,0,
+0,0,0,0,0,0,0,0,
+
+/* Fiddled by hand when the table bits changed. May be broken! */
+
+128,0,0,0,0,0,0,0,
+0,1,1,1,1,1,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,
+1,0,0,0,128,0,0,0,
+128,128,128,128,0,0,128,0,
+24,24,24,24,24,24,24,24,
+24,24,0,0,0,0,0,128,
+0,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,128,128,0,128,16,
+0,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,128,128,0,0,0,
+0,0,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,
+0,0,18,0,0,0,0,0,
+0,0,24,24,0,18,0,0,
+0,24,18,0,0,0,0,0,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,0,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,18,
+18,18,18,18,18,18,18,0,
+18,18,18,18,18,18,18,18
+};
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptest.c
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptest.c
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testinput1
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testinput1
@ -0,0 +1,50 @@
+findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 
+findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 
+findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 
+findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 
+findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 
+findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f 
+findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 
+findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f 
+
+findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 
+findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f 
+findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af 
+findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf 
+findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 
+findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df 
+findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef 
+findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff 
+
+findprop 0100 0101 0102 0103 0104 0105 0106
+
+findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7 
+findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
+findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
+findprop 10000 10001 e01ef f0000 100000
+
+findprop 1b00 12000 7c0 a840 10900
+findprop 1d79 a77d
+
+findprop  0800  083e  a4d0  a4f7  aa80  aadf
+findprop 10b00 10b35 13000 1342e 10840 10855
+
+findprop 11100 1113c 11680 116c0
+
+findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
+
+findprop 118a0 11ac7 16ad0
+
+findprop 11700 14400 108e0 11280 1d800
+
+findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
+
+findprop  a836  a833  1cf4  20f0  1cd0
+
+findprop 32ff
+
+findprop 1f16d
+
+findprop U+10e93 U+10eaa
+
+findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testinput2
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testinput2
@ -0,0 +1,19 @@
+find script Han
+find type Pe script Common scriptx Hangul
+find type Sk
+find type Pd
+find gbreak LVT
+find script Old_Uyghur
+find bidi PDF
+find bidi CS
+find bidi CS type Sm
+find bidi B
+find bidi FSI
+find bidi PDI
+find bidi RLI
+find bidi RLO
+find bidi S
+find bidi WS
+find script bopo
+find bool prependedconcatenationmark
+find bool pcm
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testoutput1
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testoutput1
@ -0,0 +1,409 @@
+findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 
+U+0000 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0001 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0002 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0003 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0004 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0005 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0006 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0007 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0008 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0009 S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000B S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000C WS  Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000E BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+000F BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 
+U+0010 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0011 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0012 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0013 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0014 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0015 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0016 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0017 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0018 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0019 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001A BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001B BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001C B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001D B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001E B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001F S   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 
+U+0020 WS  Separator: Space separator, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
+U+0021 ON  Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, quotationmark]
+U+0022 ON  Punctuation: Other punctuation, common, Other, [ascii, graphemebase, math, patternsyntax]
+U+0023 ET  Punctuation: Other punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
+U+0024 ET  Symbol: Currency symbol, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0025 ET  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0026 ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0027 ON  Punctuation: Other punctuation, common, Other, [ascii, bidimirrored, graphemebase, math, patternsyntax]
+U+0028 ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+0029 ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+002A ON  Punctuation: Other punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
+U+002B ES  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+002C CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+002D ES  Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+002E CS  Punctuation: Other punctuation, common, Other, [graphemebase, whitespace]
+U+002F CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 
+U+0030 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0031 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0032 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0033 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0034 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0035 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0036 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0037 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0038 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0039 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+003A CS  Punctuation: Other punctuation, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+003B ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+003C ON  Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
+U+003D ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+003E ON  Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
+U+003F ON  Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, quotationmark]
+findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 
+U+0040 ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0041 L   Letter: Upper case letter, latin, Other, U+0061, [graphemebase]
+U+0042 L   Letter: Upper case letter, latin, Other, U+0062, [graphemebase]
+U+0043 L   Letter: Upper case letter, latin, Other, U+0063, [graphemebase]
+U+0044 L   Letter: Upper case letter, latin, Other, U+0064, [graphemebase]
+U+0045 L   Letter: Upper case letter, latin, Other, U+0065, [graphemebase]
+U+0046 L   Letter: Upper case letter, latin, Other, U+0066, [graphemebase]
+U+0047 L   Letter: Upper case letter, latin, Other, U+0067, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0048 L   Letter: Upper case letter, latin, Other, U+0068, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0049 L   Letter: Upper case letter, latin, Other, U+0069, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004A L   Letter: Upper case letter, latin, Other, U+006A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004B L   Letter: Upper case letter, latin, Other, U+006B, U+212A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004C L   Letter: Upper case letter, latin, Other, U+006C, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004D L   Letter: Upper case letter, latin, Other, U+006D, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004E L   Letter: Upper case letter, latin, Other, U+006E, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004F L   Letter: Upper case letter, latin, Other, U+006F, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f 
+U+0050 L   Letter: Upper case letter, latin, Other, U+0070, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0051 L   Letter: Upper case letter, latin, Other, U+0071, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0052 L   Letter: Upper case letter, latin, Other, U+0072, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0053 L   Letter: Upper case letter, latin, Other, U+0073, U+017F, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0054 L   Letter: Upper case letter, latin, Other, U+0074, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0055 L   Letter: Upper case letter, latin, Other, U+0075, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0056 L   Letter: Upper case letter, latin, Other, U+0076, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0057 L   Letter: Upper case letter, latin, Other, U+0077, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0058 L   Letter: Upper case letter, latin, Other, U+0078, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0059 L   Letter: Upper case letter, latin, Other, U+0079, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+005A L   Letter: Upper case letter, latin, Other, U+007A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+005B ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+005C ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+005D ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+005E ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+005F ON  Punctuation: Connector punctuation, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, deprecated, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 
+U+0060 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+0061 L   Letter: Lower case letter, latin, Other, U+0041, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0062 L   Letter: Lower case letter, latin, Other, U+0042, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0063 L   Letter: Lower case letter, latin, Other, U+0043, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0064 L   Letter: Lower case letter, latin, Other, U+0044, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0065 L   Letter: Lower case letter, latin, Other, U+0045, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0066 L   Letter: Lower case letter, latin, Other, U+0046, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0067 L   Letter: Lower case letter, latin, Other, U+0047, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0068 L   Letter: Lower case letter, latin, Other, U+0048, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0069 L   Letter: Lower case letter, latin, Other, U+0049, [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+006A L   Letter: Lower case letter, latin, Other, U+004A, [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+006B L   Letter: Lower case letter, latin, Other, U+004B, U+212A, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006C L   Letter: Lower case letter, latin, Other, U+004C, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006D L   Letter: Lower case letter, latin, Other, U+004D, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006E L   Letter: Lower case letter, latin, Other, U+004E, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006F L   Letter: Lower case letter, latin, Other, U+004F, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f 
+U+0070 L   Letter: Lower case letter, latin, Other, U+0050, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0071 L   Letter: Lower case letter, latin, Other, U+0051, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0072 L   Letter: Lower case letter, latin, Other, U+0052, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0073 L   Letter: Lower case letter, latin, Other, U+0053, U+017F, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0074 L   Letter: Lower case letter, latin, Other, U+0054, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0075 L   Letter: Lower case letter, latin, Other, U+0055, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0076 L   Letter: Lower case letter, latin, Other, U+0056, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0077 L   Letter: Lower case letter, latin, Other, U+0057, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0078 L   Letter: Lower case letter, latin, Other, U+0058, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0079 L   Letter: Lower case letter, latin, Other, U+0059, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+007A L   Letter: Lower case letter, latin, Other, U+005A, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+007B ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+007C ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+007D ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+007E ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+007F BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+
+findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 
+U+0080 BN  Control: Control, common, Control
+U+0081 BN  Control: Control, common, Control
+U+0082 BN  Control: Control, common, Control
+U+0083 BN  Control: Control, common, Control
+U+0084 BN  Control: Control, common, Control
+U+0085 B   Control: Control, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+U+0086 BN  Control: Control, common, Control
+U+0087 BN  Control: Control, common, Control
+U+0088 BN  Control: Control, common, Control
+U+0089 BN  Control: Control, common, Control
+U+008A BN  Control: Control, common, Control
+U+008B BN  Control: Control, common, Control
+U+008C BN  Control: Control, common, Control
+U+008D BN  Control: Control, common, Control
+U+008E BN  Control: Control, common, Control
+U+008F BN  Control: Control, common, Control
+findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f 
+U+0090 BN  Control: Control, common, Control
+U+0091 BN  Control: Control, common, Control
+U+0092 BN  Control: Control, common, Control
+U+0093 BN  Control: Control, common, Control
+U+0094 BN  Control: Control, common, Control
+U+0095 BN  Control: Control, common, Control
+U+0096 BN  Control: Control, common, Control
+U+0097 BN  Control: Control, common, Control
+U+0098 BN  Control: Control, common, Control
+U+0099 BN  Control: Control, common, Control
+U+009A BN  Control: Control, common, Control
+U+009B BN  Control: Control, common, Control
+U+009C BN  Control: Control, common, Control
+U+009D BN  Control: Control, common, Control
+U+009E BN  Control: Control, common, Control
+U+009F BN  Control: Control, common, Control
+findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af 
+U+00A0 CS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+00A1 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A2 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A3 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A4 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A5 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A6 ON  Symbol: Other symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A7 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00A9 ON  Symbol: Other symbol, common, Extended Pictographic, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AA L   Letter: Other letter, latin, Other, [caseignorable, graphemeextend]
+U+00AB ON  Punctuation: Initial punctuation, common, Other, [graphemebase, sentenceterminal, terminalpunctuation]
+U+00AC ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+00AE ON  Symbol: Other symbol, common, Extended Pictographic, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf 
+U+00B0 ET  Symbol: Other symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00B1 ET  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00B2 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B3 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B4 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B5 L   Letter: Lower case letter, common, Other, U+03BC, U+039C, [alphabetic, deprecated, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B6 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00B7 ON  Punctuation: Other punctuation, common, Other, [alphabetic, graphemebase, idcontinue, xidcontinue]
+U+00B8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B9 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BA L   Letter: Other letter, latin, Other, [caseignorable, graphemeextend]
+U+00BB ON  Punctuation: Final punctuation, common, Other, [graphemebase, sentenceterminal, terminalpunctuation]
+U+00BC ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BD ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BE ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BF ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 
+U+00C0 L   Letter: Upper case letter, latin, Other, U+00E0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C1 L   Letter: Upper case letter, latin, Other, U+00E1, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C2 L   Letter: Upper case letter, latin, Other, U+00E2, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C3 L   Letter: Upper case letter, latin, Other, U+00E3, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C4 L   Letter: Upper case letter, latin, Other, U+00E4, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C5 L   Letter: Upper case letter, latin, Other, U+00E5, U+212B, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C6 L   Letter: Upper case letter, latin, Other, U+00E6, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C7 L   Letter: Upper case letter, latin, Other, U+00E7, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C8 L   Letter: Upper case letter, latin, Other, U+00E8, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C9 L   Letter: Upper case letter, latin, Other, U+00E9, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CA L   Letter: Upper case letter, latin, Other, U+00EA, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CB L   Letter: Upper case letter, latin, Other, U+00EB, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CC L   Letter: Upper case letter, latin, Other, U+00EC, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CD L   Letter: Upper case letter, latin, Other, U+00ED, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CE L   Letter: Upper case letter, latin, Other, U+00EE, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CF L   Letter: Upper case letter, latin, Other, U+00EF, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df 
+U+00D0 L   Letter: Upper case letter, latin, Other, U+00F0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D1 L   Letter: Upper case letter, latin, Other, U+00F1, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D2 L   Letter: Upper case letter, latin, Other, U+00F2, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D3 L   Letter: Upper case letter, latin, Other, U+00F3, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D4 L   Letter: Upper case letter, latin, Other, U+00F4, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D5 L   Letter: Upper case letter, latin, Other, U+00F5, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D6 L   Letter: Upper case letter, latin, Other, U+00F6, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D7 ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00D8 L   Letter: Upper case letter, latin, Other, U+00F8, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D9 L   Letter: Upper case letter, latin, Other, U+00F9, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DA L   Letter: Upper case letter, latin, Other, U+00FA, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DB L   Letter: Upper case letter, latin, Other, U+00FB, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DC L   Letter: Upper case letter, latin, Other, U+00FC, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DD L   Letter: Upper case letter, latin, Other, U+00FD, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DE L   Letter: Upper case letter, latin, Other, U+00FE, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DF L   Letter: Lower case letter, latin, Other, U+1E9E, [alphabetic, deprecated, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef 
+U+00E0 L   Letter: Lower case letter, latin, Other, U+00C0, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E1 L   Letter: Lower case letter, latin, Other, U+00C1, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E2 L   Letter: Lower case letter, latin, Other, U+00C2, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E3 L   Letter: Lower case letter, latin, Other, U+00C3, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E4 L   Letter: Lower case letter, latin, Other, U+00C4, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E5 L   Letter: Lower case letter, latin, Other, U+00C5, U+212B, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E6 L   Letter: Lower case letter, latin, Other, U+00C6, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E7 L   Letter: Lower case letter, latin, Other, U+00C7, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E8 L   Letter: Lower case letter, latin, Other, U+00C8, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E9 L   Letter: Lower case letter, latin, Other, U+00C9, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EA L   Letter: Lower case letter, latin, Other, U+00CA, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EB L   Letter: Lower case letter, latin, Other, U+00CB, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EC L   Letter: Lower case letter, latin, Other, U+00CC, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00ED L   Letter: Lower case letter, latin, Other, U+00CD, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EE L   Letter: Lower case letter, latin, Other, U+00CE, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EF L   Letter: Lower case letter, latin, Other, U+00CF, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff 
+U+00F0 L   Letter: Lower case letter, latin, Other, U+00D0, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F1 L   Letter: Lower case letter, latin, Other, U+00D1, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F2 L   Letter: Lower case letter, latin, Other, U+00D2, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F3 L   Letter: Lower case letter, latin, Other, U+00D3, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F4 L   Letter: Lower case letter, latin, Other, U+00D4, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F5 L   Letter: Lower case letter, latin, Other, U+00D5, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F6 L   Letter: Lower case letter, latin, Other, U+00D6, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F7 ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00F8 L   Letter: Lower case letter, latin, Other, U+00D8, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F9 L   Letter: Lower case letter, latin, Other, U+00D9, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FA L   Letter: Lower case letter, latin, Other, U+00DA, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FB L   Letter: Lower case letter, latin, Other, U+00DB, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FC L   Letter: Lower case letter, latin, Other, U+00DC, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FD L   Letter: Lower case letter, latin, Other, U+00DD, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FE L   Letter: Lower case letter, latin, Other, U+00DE, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FF L   Letter: Lower case letter, latin, Other, U+0178, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+
+findprop 0100 0101 0102 0103 0104 0105 0106
+U+0100 L   Letter: Upper case letter, latin, Other, U+0101, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0101 L   Letter: Lower case letter, latin, Other, U+0100, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0102 L   Letter: Upper case letter, latin, Other, U+0103, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0103 L   Letter: Lower case letter, latin, Other, U+0102, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0104 L   Letter: Upper case letter, latin, Other, U+0105, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0105 L   Letter: Lower case letter, latin, Other, U+0104, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0106 L   Letter: Upper case letter, latin, Other, U+0107, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+
+findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7 
+U+FFE0 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE1 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE2 ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFE3 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+FFE4 ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE5 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE6 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE7 L   Control: Unassigned, unknown, Other
+findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
+U+FFE8 ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE9 ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEA ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEB ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEC ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFED ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFEE ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFEF L   Control: Unassigned, unknown, Other
+findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
+U+FFF8 BN  Control: Unassigned, unknown, Control, [dash, defaultignorablecodepoint, deprecated, extendedpictographic, joincontrol, lowercase, patternwhitespace, quotationmark, sentenceterminal, softdotted, xidcontinue, xidstart]
+U+FFF9 ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFA ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFB ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFC ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFFD ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFFE BN  Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FFFF BN  Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+findprop 10000 10001 e01ef f0000 100000
+U+10000 L   Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10001 L   Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+E01EF NSM Mark: Non-spacing mark, inherited, Extend, []
+U+F0000 L   Control: Private use, unknown, Other
+U+100000 L   Control: Private use, unknown, Other
+
+findprop 1b00 12000 7c0 a840 10900
+U+1B00 NSM Mark: Non-spacing mark, balinese, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+12000 L   Letter: Other letter, cuneiform, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+07C0 R   Number: Decimal number, nko, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+A840 L   Letter: Other letter, phagspa, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10900 R   Letter: Other letter, phoenician, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+findprop 1d79 a77d
+U+1D79 L   Letter: Lower case letter, latin, Other, U+A77D, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+A77D L   Letter: Upper case letter, latin, Other, U+1D79, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+
+findprop  0800  083e  a4d0  a4f7  aa80  aadf
+U+0800 R   Letter: Other letter, samaritan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+083E R   Punctuation: Other punctuation, samaritan, Other, [bidimirrored, graphemebase, math, patternsyntax]
+U+A4D0 L   Letter: Other letter, lisu, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+A4F7 L   Letter: Other letter, lisu, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AA80 L   Letter: Other letter, taiviet, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AADF L   Punctuation: Other punctuation, taiviet, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+findprop 10b00 10b35 13000 1342e 10840 10855
+U+10B00 R   Letter: Other letter, avestan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10B35 R   Letter: Other letter, avestan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+13000 L   Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1342E L   Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10840 R   Letter: Other letter, imperialaramaic, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10855 R   Letter: Other letter, imperialaramaic, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+
+findprop 11100 1113c 11680 116c0
+U+11100 NSM Mark: Non-spacing mark, chakma, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+1113C L   Number: Decimal number, chakma, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+11680 L   Letter: Other letter, takri, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+116C0 L   Number: Decimal number, takri, Other, [graphemebase, patternsyntax, terminalpunctuation]
+
+findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000E BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0711 NSM Mark: Non-spacing mark, syriac, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+1B04 L   Mark: Spacing mark, balinese, SpacingMark, [dash, emoji, extendedpictographic, graphemebase, patternsyntax]
+U+1111 L   Letter: Other letter, hangul, Hangul syllable type L, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1169 L   Letter: Other letter, hangul, Hangul syllable type V, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+11FE L   Letter: Other letter, hangul, Hangul syllable type T, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE4C L   Letter: Other letter, hangul, Hangul syllable type LV, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD89 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+
+findprop 118a0 11ac7 16ad0
+U+118A0 L   Letter: Upper case letter, warangciti, Other, U+118C0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+11AC7 L   Letter: Other letter, paucinhau, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+16AD0 L   Letter: Other letter, bassavah, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+
+findprop 11700 14400 108e0 11280 1d800
+U+11700 L   Letter: Other letter, ahom, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+14400 L   Letter: Other letter, anatolianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+108E0 R   Letter: Other letter, hatran, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+11280 L   Letter: Other letter, multani, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1D800 L   Symbol: Other symbol, signwriting, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+
+findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
+U+11800 L   Letter: Other letter, dogra, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1E903 R   Letter: Upper case letter, adlam, Other, U+1E925, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+11DA9 L   Number: Decimal number, gunjalagondi, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend, [extendedpictographic, graphemebase, patternsyntax]
+U+11EE0 L   Letter: Other letter, makasar, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+16E48 L   Letter: Upper case letter, medefaidrin, Other, U+16E68, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+10F27 R   Letter: Other letter, oldsogdian, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10F30 AL  Letter: Other letter, sogdian, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+
+findprop  a836  a833  1cf4  20f0  1cd0
+U+A836 L   Symbol: Other symbol, common, Other, [devanagari, gurmukhi, gujarati, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+A833 L   Number: Other number, common, Other, [devanagari, gurmukhi, gujarati, kannada, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra, nandinagari], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, kannada, grantha], [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [latin, devanagari, grantha], [caseignorable, graphemebase, patternsyntax, quotationmark]
+U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, bengali, kannada, grantha], [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+
+findprop 32ff
+U+32FF L   Symbol: Other symbol, common, Other, [han], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+
+findprop 1f16d
+U+1F16D ON  Symbol: Other symbol, common, Extended Pictographic, [ascii, sentenceterminal, unifiedideograph, whitespace, xidcontinue]
+
+findprop U+10e93 U+10eaa
+U+10E93 R   Letter: Other letter, yezidi, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10EAA R   Control: Unassigned, unknown, Other
+
+findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
+U+0602 AN  Control: Format, arabic, Prepend, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, lowercase]
+U+202A LRE Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202B RLE Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202C PDF Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2068 FSI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2069 PDI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202D LRO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202E RLO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2067 RLI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testoutput2
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/ucptestdata/testoutput2
@ -0,0 +1,298 @@
+find script Han
+U+2E80..U+2E99 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+U+2E9B..U+2EF3 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+U+2F00..U+2FD5 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+        U+3005 L   Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+3007 L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+3021..U+3029 L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+3038..U+303A L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+303B L   Letter: Modifier letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
+U+3400..U+4DBF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+4E00..U+9FFF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+F900..U+FA0D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA0E..U+FA0F L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA10 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA11 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA12 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA13..U+FA14 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA15..U+FA1E L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA1F L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA20 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA21 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA22 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA23..U+FA24 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA25..U+FA26 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA27..U+FA29 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA2A..U+FA6D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA70..U+FAD9 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+16FE2 ON  Punctuation: Other punctuation, han, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+       U+16FE3 L   Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+16FF0..U+16FF1 L   Mark: Spacing mark, han, SpacingMark, [caseignorable, graphemeextend, idcontinue, ideographic, xidcontinue]
+U+20000..U+2A6DF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2A700..U+2B738 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2B740..U+2B81D L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2B820..U+2CEA1 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2CEB0..U+2EBE0 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2F800..U+2FA1D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+30000..U+3134A L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+find type Pe script Common scriptx Hangul
+U+3009 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+300B ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+300D ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation]
+U+300F ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation]
+U+3011 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3015 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3017 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3019 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+301B ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+301E..U+301F ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [softdotted, terminalpunctuation, unifiedideograph, xidcontinue, xidstart]
+        U+FF63 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, emojimodifier, emojimodifierbase]
+find type Sk
+U+005E ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0060 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+00A8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B4 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02C2..U+02C5 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02D2..U+02DF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02E5..U+02E9 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02EA..U+02EB ON  Symbol: Modifier symbol, bopomofo, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+02ED ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02EF..U+02FF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0375 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0384 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0385 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0888 AL  Symbol: Modifier symbol, arabic, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, math, softdotted, xidcontinue, xidstart]
+        U+1FBD ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FBF..U+1FC1 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FCD..U+1FCF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FDD..U+1FDF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FED..U+1FEF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FFD..U+1FFE ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+309B..U+309C ON  Symbol: Modifier symbol, common, Other, [hiragana, katakana], [alphabetic, bidimirrored, caseignorable, cased, changeswhencasefolded, changeswhenlowercased, changeswhentitlecased, changeswhenuppercased, dash, defaultignorablecodepoint, deprecated, diacritic, emoji, emojicomponent, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, extender, graphemebase, graphemeextend, graphemelink, hexdigit, idsbinaryoperator, idstrinaryoperator, idcontinue, idstart, ideographic, sentenceterminal, unifiedideograph, whitespace, xidcontinue]
+U+A700..U+A707 ON  Symbol: Modifier symbol, common, Other, [latin, han], [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A708..U+A716 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A720..U+A721 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A789..U+A78A L   Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+AB5B L   Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+AB6A..U+AB6B ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+FBB2..U+FBC2 AL  Symbol: Modifier symbol, arabic, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, math, softdotted, xidcontinue, xidstart]
+        U+FF3E ON  Symbol: Modifier symbol, common, Other, [asciihexdigit, bidicontrol, bidimirrored, cased, changeswhencasefolded, sentenceterminal, unifiedideograph, whitespace, xidstart]
+        U+FF40 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+FFE3 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1F3FB..U+1F3FF ON  Symbol: Modifier symbol, common, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternsyntax, radical, sentenceterminal, terminalpunctuation]
+find type Pd
+U+002D ES  Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+058A ON  Punctuation: Dash punctuation, armenian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+05BE R   Punctuation: Dash punctuation, hebrew, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+1400 ON  Punctuation: Dash punctuation, canadianaboriginal, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+1806 ON  Punctuation: Dash punctuation, mongolian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+2010..U+2015 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E17 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E1A ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+U+2E3A..U+2E3B ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E40 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E5D ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+301C ON  Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+        U+30A0 ON  Punctuation: Dash punctuation, common, Other, [hiragana, katakana], [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+FE31..U+FE32 ON  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+FE58 ON  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+FE63 ES  Punctuation: Dash punctuation, common, Other, [caseignorable, sentenceterminal, unifiedideograph, xidcontinue]
+        U+FF0D ES  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+10EAD R   Punctuation: Dash punctuation, yezidi, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+find gbreak LVT
+U+AC01..U+AC1B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC1D..U+AC37 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC39..U+AC53 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC55..U+AC6F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC71..U+AC8B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC8D..U+ACA7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACA9..U+ACC3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACC5..U+ACDF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACE1..U+ACFB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACFD..U+AD17 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD19..U+AD33 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD35..U+AD4F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD51..U+AD6B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD6D..U+AD87 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD89..U+ADA3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADA5..U+ADBF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADC1..U+ADDB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADDD..U+ADF7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADF9..U+AE13 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE15..U+AE2F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE31..U+AE4B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE4D..U+AE67 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE69..U+AE83 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE85..U+AE9F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEA1..U+AEBB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEBD..U+AED7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AED9..U+AEF3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEF5..U+AF0F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF11..U+AF2B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF2D..U+AF47 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF49..U+AF63 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF65..U+AF7F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF81..U+AF9B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF9D..U+AFB7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFB9..U+AFD3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFD5..U+AFEF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFF1..U+B00B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B00D..U+B027 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B029..U+B043 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B045..U+B05F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B061..U+B07B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B07D..U+B097 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B099..U+B0B3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0B5..U+B0CF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0D1..U+B0EB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0ED..U+B107 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B109..U+B123 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B125..U+B13F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B141..U+B15B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B15D..U+B177 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B179..U+B193 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B195..U+B1AF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1B1..U+B1CB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1CD..U+B1E7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1E9..U+B203 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B205..U+B21F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B221..U+B23B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B23D..U+B257 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B259..U+B273 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B275..U+B28F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B291..U+B2AB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2AD..U+B2C7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2C9..U+B2E3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2E5..U+B2FF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B301..U+B31B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B31D..U+B337 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B339..U+B353 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B355..U+B36F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B371..U+B38B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B38D..U+B3A7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3A9..U+B3C3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3C5..U+B3DF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3E1..U+B3FB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3FD..U+B417 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B419..U+B433 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B435..U+B44F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B451..U+B46B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B46D..U+B487 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B489..U+B4A3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4A5..U+B4BF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4C1..U+B4DB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4DD..U+B4F7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4F9..U+B513 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B515..U+B52F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B531..U+B54B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B54D..U+B567 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B569..U+B583 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B585..U+B59F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5A1..U+B5BB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5BD..U+B5D7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5D9..U+B5F3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5F5..U+B60F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B611..U+B62B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B62D..U+B647 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B649..U+B663 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B665..U+B67F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B681..U+B69B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B69D..U+B6B7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B6B9..U+B6D3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B6D5..U+B6EF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+...
+find script Old_Uyghur
+U+10F70..U+10F81 R   Letter: Other letter, olduyghur, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10F82..U+10F85 NSM Mark: Non-spacing mark, olduyghur, Extend, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+10F86..U+10F89 R   Punctuation: Other punctuation, olduyghur, Other, [bidimirrored, graphemebase, math, patternsyntax]
+find bidi PDF
+U+202C PDF Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi CS
+U+002C CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+002E CS  Punctuation: Other punctuation, common, Other, [graphemebase, whitespace]
+U+002F CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+003A CS  Punctuation: Other punctuation, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+00A0 CS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+060C CS  Punctuation: Other punctuation, common, Other, [arabic, syriac, thaana, nko, hanifirohingya, yezidi], [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+202F CS  Separator: Space separator, common, Other, [latin, mongolian], [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+2044 CS  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+FE50 CS  Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+FE52 CS  Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF0C CS  Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+FF0E CS  Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FF0F CS  Punctuation: Other punctuation, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+find bidi CS type Sm
+U+2044 CS  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+find bidi B
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+001C..U+001E B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+        U+0085 B   Control: Control, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+        U+2029 B   Separator: Paragraph separator, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+find bidi FSI
+U+2068 FSI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi PDI
+U+2069 PDI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi RLI
+U+2067 RLI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi RLO
+U+202E RLO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi S
+U+0009 S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000B S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+001F S   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+find bidi WS
+U+000C WS  Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+0020 WS  Separator: Space separator, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
+U+1680 WS  Separator: Space separator, ogham, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+2000..U+200A WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+        U+2028 WS  Separator: Line separator, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+        U+205F WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+        U+3000 WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+find script bopo
+U+02EA..U+02EB ON  Symbol: Modifier symbol, bopomofo, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+3105..U+312F L   Letter: Other letter, bopomofo, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+31A0..U+31BF L   Letter: Other letter, bopomofo, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+find bool prependedconcatenationmark
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+180E BN  Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark]
+U+200B BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2060 BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2118 ON  Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+AAC0 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+AAC2 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FEFF BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF21..U+FF26 L   Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+10D22..U+10D23 AL  Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+       U+1135D L   Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+1BCA0..U+1BCA3 BN  Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark]
+U+1D173..U+1D17A BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+1F1E6..U+1F1FF L   Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+find bool pcm
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+180E BN  Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark]
+U+200B BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2060 BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2118 ON  Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+AAC0 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+AAC2 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FEFF BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF21..U+FF26 L   Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+10D22..U+10D23 AL  Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+       U+1135D L   Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+1BCA0..U+1BCA3 BN  Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark]
+U+1D173..U+1D17A BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+1F1E6..U+1F1FF L   Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
--- a/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/utf8.c
+++ b/Kha/Backends/Kinc-hxcpp/khacpp/project/thirdparty/pcre2-10.42-8/maint/utf8.c
@ -0,0 +1,347 @@
+/****************************************************
+* PCRE maintainers' helper program: UTF-8 converter *
+****************************************************/
+
+/* This is a test program for converting character code points to UTF-8 and
+vice versa. Note that this program conforms to the original definition of
+UTF-8, which allows codepoints up to 7fffffff. The more recent definition
+limits the validity of Unicode UTF-8 codepoints to a maximum of 10ffffff, and
+forbids the "surrogate" code points. This program now gives warnings for these
+invalid code points.
+
+The arguments are either single code point values written as U+hh.. or 0xhh..
+for conversion to UTF-8, or sequences of hex values, written without 0x and
+optionally including spaces (but such arguments must be quoted), for conversion
+from UTF-8 to codepoints. For example:
+
+./utf8 0x1234
+U+00001234 => e1 88 b4
+
+./utf8 "e1 88 b4"
+U+00001234 <= e1 88 b4
+
+In the second case, a number of UTF-8 characters can be present in one
+argument. In other words, each such argument is interpreted (after ignoring
+spaces) as a string of UTF-8 bytes representing a string of characters:
+
+./utf8 "65 e188b4 77"
+0x00000065 <= 65
+0x00001234 <= e1 88 b4
+0x00000077 <= 77
+
+If the option -s is given, the sequence of UTF-bytes is written out between
+angle brackets at the end of the line. On a UTF-8 terminal, this will show the
+appropriate graphic for the code point.
+
+Errors provoke error messages, but the program carries on with the next
+argument. The return code is always zero.
+
+Philip Hazel
+Original creation data: unknown
+Code extended and tidied to avoid compiler warnings: 26 March 2020
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* The valid ranges for UTF-8 characters are:
+
+0000 0000  to  0000 007f   1 byte (ascii)
+0000 0080  to  0000 07ff   2 bytes
+0000 0800  to  0000 ffff   3 bytes
+0001 0000  to  001f ffff   4 bytes
+0020 0000  to  03ff ffff   5 bytes
+0400 0000  to  7fff ffff   6 bytes
+*/
+
+
+static const unsigned int utf8_table1[] = {
+  0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
+
+static const int utf8_table2[] = {
+  0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+
+static const int utf8_table3[] = {
+  0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+
+/*************************************************
+*       Convert character value to UTF-8         *
+*************************************************/
+
+/* This function takes an unsigned long integer value in the range 0 -
+0x7fffffff and encodes it as a UTF-8 character in 1 to 6 bytes.
+
+Arguments:
+  cvalue     the character value
+  buffer     pointer to buffer for result - at least 6 bytes long
+
+Returns:     number of bytes placed in the buffer
+             0 if input code point is too big
+*/
+
+static size_t
+ord2utf8(unsigned long int cvalue, unsigned char *buffer)
+{
+size_t i, j;
+for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
+  if (cvalue <= utf8_table1[i]) break;
+if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
+
+
+/*************************************************
+*            Convert UTF-8 string to value       *
+*************************************************/
+
+/* This function takes one or more bytes that represent a UTF-8 character from
+the start of a string of bytes. It returns the value of the character, or the
+offset of a malformation. For an overlong encoding that works but is not the
+correct (shortest) one, the error offset is just after the last byte.
+
+Argument:
+  buffer   a pointer to the byte vector
+  buffend  a pointer to the end of the buffer
+  vptr     a pointer to a variable to receive the value
+  lenptr   a pointer to a variable to receive the offset when error detected
+
+Returns:   > 0 => the number of bytes consumed
+             0 => invalid UTF-8: first byte missing 0x40 bit
+            -1 => invalid UTF-8: first byte has too many high-order 1-bits
+            -2 => incomplete sequence at end of string
+            -3 => incomplete sequence within string
+            -4 => overlong code sequence
+*/
+
+static int
+utf82ord(unsigned char *buffer, unsigned char *buffend,
+  long unsigned int *vptr, int *lenptr)
+{
+unsigned int c = *buffer++;
+unsigned int d = c;
+int i, j, s;
+
+/* Check for an ASCII character, or find the number of additional bytes in a
+multibyte character. */
+
+for (i = -1; i < 6; i++)
+  {
+  if ((d & 0x80) == 0) break;
+  d <<= 1;
+  }
+
+switch (i)
+  {
+  case -1:      /* ASCII character; first byte does not have 0x80 bit */
+  *vptr = c;
+  return 1;
+
+  case 0:       /* First byte has 0x80 but is missing 0x40 bit */
+  *lenptr = 0;
+  return 0;
+
+  case 6:
+  *lenptr = 0;  /* Too many high bits */
+  return -1;
+
+  default:
+  break;
+  }
+
+/* i now has a value in the range 1-5 */
+
+s = 6*i;
+d = (c & utf8_table3[i]) << s;
+
+for (j = 0; j < i; j++)
+  {
+  if (buffer >= buffend)
+    {
+    *lenptr = j + 1;
+    return -2;
+    }
+  c = *buffer++;
+  if ((c & 0xc0) != 0x80)
+    {
+    *lenptr = j + 1;
+    return -3;
+    }
+  s -= 6;
+  d |= (c & 0x3f) << s;
+  }
+
+/* Valid UTF-8 syntax */
+
+*vptr = d;
+
+/* Check that encoding was the correct one, not overlong */
+
+for (j = 0; j < (int)(sizeof(utf8_table1)/sizeof(int)); j++)
+  if (d <= utf8_table1[j]) break;
+if (j != i)
+  {
+  *lenptr = i + 1;
+  return -4;
+  }
+
+/* Valid value */
+
+return i + 1;
+}
+
+
+
+/*************************************************
+*                 Main Program                   *
+*************************************************/
+
+int
+main(int argc, char **argv)
+{
+int i = 1;
+int show = 0;
+unsigned char buffer[64];
+
+if (argc > 1 && strcmp(argv[1], "-s") == 0)
+  {
+  show = 1;
+  i = 2;
+  }
+
+for (; i < argc; i++)
+  {
+  char *x = argv[i];
+  char *endptr;
+  if (strncmp(x, "0x", 2) == 0 || strncmp(x, "U+", 2) == 0)
+    {
+    size_t rc, j;
+    unsigned long int d = strtoul(x+2, &endptr, 16);
+    if (*endptr != 0)
+      {
+      printf("** Invalid hex number %s\n", x);
+      continue;   /* With next argument */
+      }
+    rc = ord2utf8(d, buffer);
+    printf("U+%08lx => ", d);
+    if (rc == 0)
+      printf("** Code point greater than 0x7fffffff cannot be encoded");
+    else
+      {
+      for (j = 0; j < rc; j++) printf("%02x ", buffer[j]);
+      if (show)
+        {
+        printf(">");
+        for (j = 0; j < rc; j++) printf("%c", buffer[j]);
+        printf("< ");
+        }
+      if (d >= 0xd800 && d <= 0xdfff)
+        printf("** Invalid Unicode (surrogate)");
+      else if (d > 0x10ffff)
+        printf("** Invalid Unicode (greater than U+10ffff)");
+      }
+    printf("\n");
+    }
+  else
+    {
+    unsigned char *bptr;
+    unsigned char *buffend;
+    int len = 0;
+    int y = 0;
+    int z = 0;
+
+    for (;;)
+      {
+      while (*x == ' ') x++;
+      if (*x == 0 && !z) break;
+      if (!isxdigit(*x))
+        {
+        printf("** Malformed hex string: %s\n", argv[i]);
+        len = -1;
+        break;
+        }
+      y = y * 16 + tolower(*x) - ((isdigit(*x))? '0' : 'W');
+      x++;
+      if (z)
+        {
+        buffer[len++] = y;
+        y = 0;
+        }
+      z ^= 1;
+      }
+
+    if (len < 0) continue;  /* With next argument after malformation */
+
+    bptr = buffer;
+    buffend = buffer + len;
+
+    while (bptr < buffend)
+      {
+      unsigned long int d;
+      int j;
+      int offset;
+      int rc = utf82ord(bptr, buffend, &d, &offset);
+
+      if (rc > 0)
+        {
+        printf("U+%08lx <= ", d);
+        for (j = 0; j < rc; j++) printf("%02x ", bptr[j]);
+        if (show)
+          {
+          printf(">");
+          for (j = 0; j < rc; j++) printf("%c", bptr[j]);
+          printf("<");
+          }
+        printf("\n");
+        bptr += rc;
+        }
+      else if (rc == -4)
+        {
+        printf("U+%08lx <= ", d);
+        for (j = 0; j < offset; j++) printf("%02x ", bptr[j]);
+        printf("** Overlong UTF-8 sequence\n");
+        bptr += offset;
+        }
+      else
+        {
+        switch (rc)
+          {
+          case 0:  printf("** First byte missing 0x40 bit");
+          break;
+
+          case -1: printf("** First byte has too many high-order bits");
+          break;
+
+          case -2: printf("** Incomplete UTF-8 sequence at end of string");
+          break;
+
+          case -3: printf("** Incomplete UTF-8 sequence");
+          break;
+
+          default: printf("** Unexpected return %d from utf82ord()", rc);
+          break;
+          }
+        printf(" at offset %d in string ", offset);
+        while (bptr < buffend) printf("%02x ", *bptr++);
+        printf("\n");
+        break;
+        }
+      }
+    }
+  }
+
+return 0;
+}
+
+/* End */