149 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			149 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | /*************************************************
 | ||
|  | *      Perl-Compatible Regular Expressions       * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | /* PCRE is a library of functions to support regular expressions whose syntax
 | ||
|  | and semantics are as close as possible to those of the Perl 5 language. | ||
|  | 
 | ||
|  |                        Written by Philip Hazel | ||
|  |      Original API code Copyright (c) 1997-2012 University of Cambridge | ||
|  |           New API code Copyright (c) 2016-2021 University of Cambridge | ||
|  | 
 | ||
|  | ----------------------------------------------------------------------------- | ||
|  | Redistribution and use in source and binary forms, with or without | ||
|  | modification, are permitted provided that the following conditions are met: | ||
|  | 
 | ||
|  |     * Redistributions of source code must retain the above copyright notice, | ||
|  |       this list of conditions and the following disclaimer. | ||
|  | 
 | ||
|  |     * Redistributions in binary form must reproduce the above copyright | ||
|  |       notice, this list of conditions and the following disclaimer in the | ||
|  |       documentation and/or other materials provided with the distribution. | ||
|  | 
 | ||
|  |     * Neither the name of the University of Cambridge nor the names of its | ||
|  |       contributors may be used to endorse or promote products derived from | ||
|  |       this software without specific prior written permission. | ||
|  | 
 | ||
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
|  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
|  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
|  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
|  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
|  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
|  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
|  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
|  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
|  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
|  | POSSIBILITY OF SUCH DAMAGE. | ||
|  | ----------------------------------------------------------------------------- | ||
|  | */ | ||
|  | 
 | ||
|  | /* This module contains an internal function that is used to match a Unicode
 | ||
|  | extended grapheme sequence. It is used by both pcre2_match() and | ||
|  | pcre2_def_match(). However, it is called only when Unicode support is being | ||
|  | compiled. Nevertheless, we provide a dummy function when there is no Unicode | ||
|  | support, because some compilers do not like functionless source files. */ | ||
|  | 
 | ||
|  | 
 | ||
|  | #ifdef HAVE_CONFIG_H
 | ||
|  | #include "config.h"
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #include "pcre2_internal.h"
 | ||
|  | 
 | ||
|  | 
 | ||
|  | /* Dummy function */ | ||
|  | 
 | ||
|  | #ifndef SUPPORT_UNICODE
 | ||
|  | PCRE2_SPTR | ||
|  | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, | ||
|  |   PCRE2_SPTR end_subject, BOOL utf, int *xcount) | ||
|  | { | ||
|  | (void)c; | ||
|  | (void)eptr; | ||
|  | (void)start_subject; | ||
|  | (void)end_subject; | ||
|  | (void)utf; | ||
|  | (void)xcount; | ||
|  | return NULL; | ||
|  | } | ||
|  | #else
 | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *      Match an extended grapheme sequence       * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | /*
 | ||
|  | Arguments: | ||
|  |   c              the first character | ||
|  |   eptr           pointer to next character | ||
|  |   start_subject  pointer to start of subject | ||
|  |   end_subject    pointer to end of subject | ||
|  |   utf            TRUE if in UTF mode | ||
|  |   xcount         pointer to count of additional characters, | ||
|  |                    or NULL if count not needed | ||
|  | 
 | ||
|  | Returns:         pointer after the end of the sequence | ||
|  | */ | ||
|  | 
 | ||
|  | PCRE2_SPTR | ||
|  | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, | ||
|  |   PCRE2_SPTR end_subject, BOOL utf, int *xcount) | ||
|  | { | ||
|  | int lgb = UCD_GRAPHBREAK(c); | ||
|  | 
 | ||
|  | while (eptr < end_subject) | ||
|  |   { | ||
|  |   int rgb; | ||
|  |   int len = 1; | ||
|  |   if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } | ||
|  |   rgb = UCD_GRAPHBREAK(c); | ||
|  |   if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; | ||
|  | 
 | ||
|  |   /* Not breaking between Regional Indicators is allowed only if there
 | ||
|  |   are an even number of preceding RIs. */ | ||
|  | 
 | ||
|  |   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) | ||
|  |     { | ||
|  |     int ricount = 0; | ||
|  |     PCRE2_SPTR bptr = eptr - 1; | ||
|  |     if (utf) BACKCHAR(bptr); | ||
|  | 
 | ||
|  |     /* bptr is pointing to the left-hand character */ | ||
|  | 
 | ||
|  |     while (bptr > start_subject) | ||
|  |       { | ||
|  |       bptr--; | ||
|  |       if (utf) | ||
|  |         { | ||
|  |         BACKCHAR(bptr); | ||
|  |         GETCHAR(c, bptr); | ||
|  |         } | ||
|  |       else | ||
|  |       c = *bptr; | ||
|  |       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; | ||
|  |       ricount++; | ||
|  |       } | ||
|  |     if ((ricount & 1) != 0) break;  /* Grapheme break required */ | ||
|  |     } | ||
|  | 
 | ||
|  |   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
 | ||
|  |   allows any number of them before a following Extended_Pictographic. */ | ||
|  | 
 | ||
|  |   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || | ||
|  |        lgb != ucp_gbExtended_Pictographic) | ||
|  |     lgb = rgb; | ||
|  | 
 | ||
|  |   eptr += len; | ||
|  |   if (xcount != NULL) *xcount += 1; | ||
|  |   } | ||
|  | 
 | ||
|  | return eptr; | ||
|  | } | ||
|  | 
 | ||
|  | #endif  /* SUPPORT_UNICODE */
 | ||
|  | 
 | ||
|  | /* End of pcre2_extuni.c */ |