99 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			99 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								#! /usr/bin/python
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#                   PCRE2 UNICODE PROPERTY SUPPORT
							 | 
						||
| 
								 | 
							
								#                   ------------------------------
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# This script generates the pcre2_ucp.h file from Unicode data files. This
							 | 
						||
| 
								 | 
							
								# header uses enumerations to give names to Unicode property types and script
							 | 
						||
| 
								 | 
							
								# names.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# This script was created in December 2021 as part of the Unicode data
							 | 
						||
| 
								 | 
							
								# generation refactoring.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Import common data lists and functions
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from GenerateCommon import \
							 | 
						||
| 
								 | 
							
								  bidi_classes, \
							 | 
						||
| 
								 | 
							
								  bool_properties, \
							 | 
						||
| 
								 | 
							
								  bool_props_list_item_size, \
							 | 
						||
| 
								 | 
							
								  break_properties, \
							 | 
						||
| 
								 | 
							
								  category_names, \
							 | 
						||
| 
								 | 
							
								  general_category_names, \
							 | 
						||
| 
								 | 
							
								  script_list_item_size, \
							 | 
						||
| 
								 | 
							
								  script_names, \
							 | 
						||
| 
								 | 
							
								  open_output
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Open the output file (no return on failure). This call also writes standard
							 | 
						||
| 
								 | 
							
								# header boilerplate.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f = open_output("pcre2_ucp.h")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Output this file's heading text
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("""\
							 | 
						||
| 
								 | 
							
								#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
							 | 
						||
| 
								 | 
							
								#define PCRE2_UCP_H_IDEMPOTENT_GUARD
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This file contains definitions of the Unicode property values that are
							 | 
						||
| 
								 | 
							
								returned by the UCD access macros and used throughout PCRE2.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								IMPORTANT: The specific values of the first two enums (general and particular
							 | 
						||
| 
								 | 
							
								character categories) are assumed by the table called catposstab in the file
							 | 
						||
| 
								 | 
							
								pcre2_auto_possess.c. They are unlikely to change, but should be checked after
							 | 
						||
| 
								 | 
							
								an update. */
							 | 
						||
| 
								 | 
							
								\n""")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are the general character categories. */\n\nenum {\n")
							 | 
						||
| 
								 | 
							
								for i in general_category_names:
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_%s,\n" % i)
							 | 
						||
| 
								 | 
							
								f.write("};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are the particular character categories. */\n\nenum {\n")
							 | 
						||
| 
								 | 
							
								for i in range(0, len(category_names), 2):
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_%s,    /* %s */\n" % (category_names[i], category_names[i+1]))
							 | 
						||
| 
								 | 
							
								f.write("};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are Boolean properties. */\n\nenum {\n")
							 | 
						||
| 
								 | 
							
								for i in bool_properties:
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_%s,\n" % i)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("  /* This must be last */\n")
							 | 
						||
| 
								 | 
							
								f.write("  ucp_Bprop_Count\n};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n")
							 | 
						||
| 
								 | 
							
								f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are the bidi class values. */\n\nenum {\n")
							 | 
						||
| 
								 | 
							
								for i in range(0, len(bidi_classes), 2):
							 | 
						||
| 
								 | 
							
								  sp = ' ' * (4 - len(bidi_classes[i]))
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1]))
							 | 
						||
| 
								 | 
							
								f.write("};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are grapheme break properties. The Extended Pictographic "
							 | 
						||
| 
								 | 
							
								  "property\ncomes from the emoji-data.txt file. */\n\nenum {\n")
							 | 
						||
| 
								 | 
							
								for i in range(0, len(break_properties), 2):
							 | 
						||
| 
								 | 
							
								  sp = ' ' * (21 - len(break_properties[i]))
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
							 | 
						||
| 
								 | 
							
								f.write("};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* These are the script identifications. */\n\nenum {\n  /* Scripts which has characters in other scripts. */\n")
							 | 
						||
| 
								 | 
							
								for i in script_names:
							 | 
						||
| 
								 | 
							
								  if i == "Unknown":
							 | 
						||
| 
								 | 
							
								    f.write("\n  /* Scripts which has no characters in other scripts. */\n")
							 | 
						||
| 
								 | 
							
								  f.write("  ucp_%s,\n" % i)
							 | 
						||
| 
								 | 
							
								f.write("\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("  /* This must be last */\n")
							 | 
						||
| 
								 | 
							
								f.write("  ucp_Script_Count\n};\n\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("/* Size of entries in ucd_script_sets[] */\n\n")
							 | 
						||
| 
								 | 
							
								f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.write("#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
							 | 
						||
| 
								 | 
							
								f.write("/* End of pcre2_ucp.h */\n")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								f.close()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# End
							 |