1366 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			1366 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*      Perl-Compatible Regular Expressions       *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* PCRE is a library of functions to support regular expressions whose syntax
							 | 
						||
| 
								 | 
							
								and semantics are as close as possible to those of the Perl 5 language.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                       Written by Philip Hazel
							 | 
						||
| 
								 | 
							
								     Original API code Copyright (c) 1997-2012 University of Cambridge
							 | 
						||
| 
								 | 
							
								          New API code Copyright (c) 2016-2022 University of Cambridge
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								-----------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								Redistribution and use in source and binary forms, with or without
							 | 
						||
| 
								 | 
							
								modification, are permitted provided that the following conditions are met:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Redistributions of source code must retain the above copyright notice,
							 | 
						||
| 
								 | 
							
								      this list of conditions and the following disclaimer.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Redistributions in binary form must reproduce the above copyright
							 | 
						||
| 
								 | 
							
								      notice, this list of conditions and the following disclaimer in the
							 | 
						||
| 
								 | 
							
								      documentation and/or other materials provided with the distribution.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Neither the name of the University of Cambridge nor the names of its
							 | 
						||
| 
								 | 
							
								      contributors may be used to endorse or promote products derived from
							 | 
						||
| 
								 | 
							
								      this software without specific prior written permission.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
							 | 
						||
| 
								 | 
							
								AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
							 | 
						||
| 
								 | 
							
								IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
							 | 
						||
| 
								 | 
							
								ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
							 | 
						||
| 
								 | 
							
								LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
							 | 
						||
| 
								 | 
							
								CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
							 | 
						||
| 
								 | 
							
								SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
							 | 
						||
| 
								 | 
							
								INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
							 | 
						||
| 
								 | 
							
								CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
							 | 
						||
| 
								 | 
							
								ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
							 | 
						||
| 
								 | 
							
								POSSIBILITY OF SUCH DAMAGE.
							 | 
						||
| 
								 | 
							
								-----------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This module contains functions that scan a compiled pattern and change
							 | 
						||
| 
								 | 
							
								repeats into possessive repeats where possible. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef HAVE_CONFIG_H
							 | 
						||
| 
								 | 
							
								#include "config.h"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "pcre2_internal.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*        Tables for auto-possessification        *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This table is used to check whether auto-possessification is possible
							 | 
						||
| 
								 | 
							
								between adjacent character-type opcodes. The left-hand (repeated) opcode is
							 | 
						||
| 
								 | 
							
								used to select the row, and the right-hand opcode is use to select the column.
							 | 
						||
| 
								 | 
							
								A value of 1 means that auto-possessification is OK. For example, the second
							 | 
						||
| 
								 | 
							
								value in the first row means that \D+\d can be turned into \D++\d.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The Unicode property types (\P and \p) have to be present to fill out the table
							 | 
						||
| 
								 | 
							
								because of what their opcode values are, but the table values should always be
							 | 
						||
| 
								 | 
							
								zero because property types are handled separately in the code. The last four
							 | 
						||
| 
								 | 
							
								columns apply to items that cannot be repeated, so there is no need to have
							 | 
						||
| 
								 | 
							
								rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
							 | 
						||
| 
								 | 
							
								*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
							 | 
						||
| 
								 | 
							
								#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static const uint8_t autoposstab[APTROWS][APTCOLS] = {
							 | 
						||
| 
								 | 
							
								/* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
							 | 
						||
| 
								 | 
							
								  { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
							 | 
						||
| 
								 | 
							
								  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								/* This table is used to check whether auto-possessification is possible
							 | 
						||
| 
								 | 
							
								between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
							 | 
						||
| 
								 | 
							
								left-hand (repeated) opcode is used to select the row, and the right-hand
							 | 
						||
| 
								 | 
							
								opcode is used to select the column. The values are as follows:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  0   Always return FALSE (never auto-possessify)
							 | 
						||
| 
								 | 
							
								  1   Character groups are distinct (possessify if both are OP_PROP)
							 | 
						||
| 
								 | 
							
								  2   Check character categories in the same group (general or particular)
							 | 
						||
| 
								 | 
							
								  3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  4   Check left general category vs right particular category
							 | 
						||
| 
								 | 
							
								  5   Check right general category vs left particular category
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  6   Left alphanum vs right general category
							 | 
						||
| 
								 | 
							
								  7   Left space vs right general category
							 | 
						||
| 
								 | 
							
								  8   Left word vs right general category
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  9   Right alphanum vs left general category
							 | 
						||
| 
								 | 
							
								 10   Right space vs left general category
							 | 
						||
| 
								 | 
							
								 11   Right word vs left general category
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								 12   Left alphanum vs right particular category
							 | 
						||
| 
								 | 
							
								 13   Left space vs right particular category
							 | 
						||
| 
								 | 
							
								 14   Left word vs right particular category
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								 15   Right alphanum vs left particular category
							 | 
						||
| 
								 | 
							
								 16   Right space vs left particular category
							 | 
						||
| 
								 | 
							
								 17   Right word vs left particular category
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
							 | 
						||
| 
								 | 
							
								/* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
							 | 
						||
| 
								 | 
							
								  { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
							 | 
						||
| 
								 | 
							
								  { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
							 | 
						||
| 
								 | 
							
								  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
							 | 
						||
| 
								 | 
							
								  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
							 | 
						||
| 
								 | 
							
								  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This table is used to check whether auto-possessification is possible
							 | 
						||
| 
								 | 
							
								between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
							 | 
						||
| 
								 | 
							
								specifies a general category and the other specifies a particular category. The
							 | 
						||
| 
								 | 
							
								row is selected by the general category and the column by the particular
							 | 
						||
| 
								 | 
							
								category. The value is 1 if the particular category is not part of the general
							 | 
						||
| 
								 | 
							
								category. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static const uint8_t catposstab[7][30] = {
							 | 
						||
| 
								 | 
							
								/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
							 | 
						||
| 
								 | 
							
								  { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
							 | 
						||
| 
								 | 
							
								  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
							 | 
						||
| 
								 | 
							
								a general or particular category. The properties in each row are those
							 | 
						||
| 
								 | 
							
								that apply to the character set in question. Duplication means that a little
							 | 
						||
| 
								 | 
							
								unnecessary work is done when checking, but this keeps things much simpler
							 | 
						||
| 
								 | 
							
								because they can all use the same code. For more details see the comment where
							 | 
						||
| 
								 | 
							
								this table is used.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Note: SPACE and PXSPACE used to be different because Perl excluded VT from
							 | 
						||
| 
								 | 
							
								"space", but from Perl 5.18 it's included, so both categories are treated the
							 | 
						||
| 
								 | 
							
								same here. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static const uint8_t posspropstab[3][4] = {
							 | 
						||
| 
								 | 
							
								  { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
							 | 
						||
| 
								 | 
							
								  { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
							 | 
						||
| 
								 | 
							
								  { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								#endif  /* SUPPORT_UNICODE */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*        Check a character and a property        *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This function is called by compare_opcodes() when a property item is
							 | 
						||
| 
								 | 
							
								adjacent to a fixed character.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Arguments:
							 | 
						||
| 
								 | 
							
								  c            the character
							 | 
						||
| 
								 | 
							
								  ptype        the property type
							 | 
						||
| 
								 | 
							
								  pdata        the data for the type
							 | 
						||
| 
								 | 
							
								  negated      TRUE if it's a negated property (\P or \p{^)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Returns:       TRUE if auto-possessifying is OK
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static BOOL
							 | 
						||
| 
								 | 
							
								check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
							 | 
						||
| 
								 | 
							
								  BOOL negated)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								BOOL ok;
							 | 
						||
| 
								 | 
							
								const uint32_t *p;
							 | 
						||
| 
								 | 
							
								const ucd_record *prop = GET_UCD(c);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								switch(ptype)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  case PT_LAMP:
							 | 
						||
| 
								 | 
							
								  return (prop->chartype == ucp_Lu ||
							 | 
						||
| 
								 | 
							
								          prop->chartype == ucp_Ll ||
							 | 
						||
| 
								 | 
							
								          prop->chartype == ucp_Lt) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_GC:
							 | 
						||
| 
								 | 
							
								  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_PC:
							 | 
						||
| 
								 | 
							
								  return (pdata == prop->chartype) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_SC:
							 | 
						||
| 
								 | 
							
								  return (pdata == prop->script) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_SCX:
							 | 
						||
| 
								 | 
							
								  ok = (pdata == prop->script
							 | 
						||
| 
								 | 
							
								        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
							 | 
						||
| 
								 | 
							
								  return ok == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* These are specials */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_ALNUM:
							 | 
						||
| 
								 | 
							
								  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
							 | 
						||
| 
								 | 
							
								          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
							 | 
						||
| 
								 | 
							
								  means that Perl space and POSIX space are now identical. PCRE was changed
							 | 
						||
| 
								 | 
							
								  at release 8.34. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_SPACE:    /* Perl space */
							 | 
						||
| 
								 | 
							
								  case PT_PXSPACE:  /* POSIX space */
							 | 
						||
| 
								 | 
							
								  switch(c)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    HSPACE_CASES:
							 | 
						||
| 
								 | 
							
								    VSPACE_CASES:
							 | 
						||
| 
								 | 
							
								    return negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  break;  /* Control never reaches here */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_WORD:
							 | 
						||
| 
								 | 
							
								  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
							 | 
						||
| 
								 | 
							
								          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
							 | 
						||
| 
								 | 
							
								          c == CHAR_UNDERSCORE) == negated;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_CLIST:
							 | 
						||
| 
								 | 
							
								  p = PRIV(ucd_caseless_sets) + prop->caseset;
							 | 
						||
| 
								 | 
							
								  for (;;)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    if (c < *p) return !negated;
							 | 
						||
| 
								 | 
							
								    if (c == *p++) return negated;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  break;  /* Control never reaches here */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Haven't yet thought these through. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_BIDICL:
							 | 
						||
| 
								 | 
							
								  return FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case PT_BOOL:
							 | 
						||
| 
								 | 
							
								  return FALSE;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								return FALSE;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								#endif  /* SUPPORT_UNICODE */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*        Base opcode of repeated opcodes         *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Returns the base opcode for repeated single character type opcodes. If the
							 | 
						||
| 
								 | 
							
								opcode is not a repeated character type, it returns with the original value.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Arguments:  c opcode
							 | 
						||
| 
								 | 
							
								Returns:    base opcode for the type
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static PCRE2_UCHAR
							 | 
						||
| 
								 | 
							
								get_repeat_base(PCRE2_UCHAR c)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								return (c > OP_TYPEPOSUPTO)? c :
							 | 
						||
| 
								 | 
							
								       (c >= OP_TYPESTAR)?   OP_TYPESTAR :
							 | 
						||
| 
								 | 
							
								       (c >= OP_NOTSTARI)?   OP_NOTSTARI :
							 | 
						||
| 
								 | 
							
								       (c >= OP_NOTSTAR)?    OP_NOTSTAR :
							 | 
						||
| 
								 | 
							
								       (c >= OP_STARI)?      OP_STARI :
							 | 
						||
| 
								 | 
							
								                             OP_STAR;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*        Fill the character property list        *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Checks whether the code points to an opcode that can take part in auto-
							 | 
						||
| 
								 | 
							
								possessification, and if so, fills a list with its properties.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Arguments:
							 | 
						||
| 
								 | 
							
								  code        points to start of expression
							 | 
						||
| 
								 | 
							
								  utf         TRUE if in UTF mode
							 | 
						||
| 
								 | 
							
								  ucp         TRUE if in UCP mode
							 | 
						||
| 
								 | 
							
								  fcc         points to the case-flipping table
							 | 
						||
| 
								 | 
							
								  list        points to output list
							 | 
						||
| 
								 | 
							
								              list[0] will be filled with the opcode
							 | 
						||
| 
								 | 
							
								              list[1] will be non-zero if this opcode
							 | 
						||
| 
								 | 
							
								                can match an empty character string
							 | 
						||
| 
								 | 
							
								              list[2..7] depends on the opcode
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Returns:      points to the start of the next opcode if *code is accepted
							 | 
						||
| 
								 | 
							
								              NULL if *code is not accepted
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static PCRE2_SPTR
							 | 
						||
| 
								 | 
							
								get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
							 | 
						||
| 
								 | 
							
								  uint32_t *list)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								PCRE2_UCHAR c = *code;
							 | 
						||
| 
								 | 
							
								PCRE2_UCHAR base;
							 | 
						||
| 
								 | 
							
								PCRE2_SPTR end;
							 | 
						||
| 
								 | 
							
								uint32_t chr;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								uint32_t *clist_dest;
							 | 
						||
| 
								 | 
							
								const uint32_t *clist_src;
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								(void)utf;    /* Suppress "unused parameter" compiler warnings */
							 | 
						||
| 
								 | 
							
								(void)ucp;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								list[0] = c;
							 | 
						||
| 
								 | 
							
								list[1] = FALSE;
							 | 
						||
| 
								 | 
							
								code++;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  base = get_repeat_base(c);
							 | 
						||
| 
								 | 
							
								  c -= (base - OP_STAR);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
							 | 
						||
| 
								 | 
							
								    code += IMM2_SIZE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
							 | 
						||
| 
								 | 
							
								             c != OP_POSPLUS);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  switch(base)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    case OP_STAR:
							 | 
						||
| 
								 | 
							
								    list[0] = OP_CHAR;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_STARI:
							 | 
						||
| 
								 | 
							
								    list[0] = OP_CHARI;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_NOTSTAR:
							 | 
						||
| 
								 | 
							
								    list[0] = OP_NOT;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_NOTSTARI:
							 | 
						||
| 
								 | 
							
								    list[0] = OP_NOTI;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_TYPESTAR:
							 | 
						||
| 
								 | 
							
								    list[0] = *code;
							 | 
						||
| 
								 | 
							
								    code++;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  c = list[0];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								switch(c)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  case OP_NOT_DIGIT:
							 | 
						||
| 
								 | 
							
								  case OP_DIGIT:
							 | 
						||
| 
								 | 
							
								  case OP_NOT_WHITESPACE:
							 | 
						||
| 
								 | 
							
								  case OP_WHITESPACE:
							 | 
						||
| 
								 | 
							
								  case OP_NOT_WORDCHAR:
							 | 
						||
| 
								 | 
							
								  case OP_WORDCHAR:
							 | 
						||
| 
								 | 
							
								  case OP_ANY:
							 | 
						||
| 
								 | 
							
								  case OP_ALLANY:
							 | 
						||
| 
								 | 
							
								  case OP_ANYNL:
							 | 
						||
| 
								 | 
							
								  case OP_NOT_HSPACE:
							 | 
						||
| 
								 | 
							
								  case OP_HSPACE:
							 | 
						||
| 
								 | 
							
								  case OP_NOT_VSPACE:
							 | 
						||
| 
								 | 
							
								  case OP_VSPACE:
							 | 
						||
| 
								 | 
							
								  case OP_EXTUNI:
							 | 
						||
| 
								 | 
							
								  case OP_EODN:
							 | 
						||
| 
								 | 
							
								  case OP_EOD:
							 | 
						||
| 
								 | 
							
								  case OP_DOLL:
							 | 
						||
| 
								 | 
							
								  case OP_DOLLM:
							 | 
						||
| 
								 | 
							
								  return code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case OP_CHAR:
							 | 
						||
| 
								 | 
							
								  case OP_NOT:
							 | 
						||
| 
								 | 
							
								  GETCHARINCTEST(chr, code);
							 | 
						||
| 
								 | 
							
								  list[2] = chr;
							 | 
						||
| 
								 | 
							
								  list[3] = NOTACHAR;
							 | 
						||
| 
								 | 
							
								  return code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case OP_CHARI:
							 | 
						||
| 
								 | 
							
								  case OP_NOTI:
							 | 
						||
| 
								 | 
							
								  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
							 | 
						||
| 
								 | 
							
								  GETCHARINCTEST(chr, code);
							 | 
						||
| 
								 | 
							
								  list[2] = chr;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								  if (chr < 128 || (chr < 256 && !utf && !ucp))
							 | 
						||
| 
								 | 
							
								    list[3] = fcc[chr];
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								    list[3] = UCD_OTHERCASE(chr);
							 | 
						||
| 
								 | 
							
								#elif defined SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								  list[3] = (chr < 256) ? fcc[chr] : chr;
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								  list[3] = fcc[chr];
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* The othercase might be the same value. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (chr == list[3])
							 | 
						||
| 
								 | 
							
								    list[3] = NOTACHAR;
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								    list[4] = NOTACHAR;
							 | 
						||
| 
								 | 
							
								  return code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								  case OP_PROP:
							 | 
						||
| 
								 | 
							
								  case OP_NOTPROP:
							 | 
						||
| 
								 | 
							
								  if (code[0] != PT_CLIST)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    list[2] = code[0];
							 | 
						||
| 
								 | 
							
								    list[3] = code[1];
							 | 
						||
| 
								 | 
							
								    return code + 2;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Convert only if we have enough space. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  clist_src = PRIV(ucd_caseless_sets) + code[1];
							 | 
						||
| 
								 | 
							
								  clist_dest = list + 2;
							 | 
						||
| 
								 | 
							
								  code += 2;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  do {
							 | 
						||
| 
								 | 
							
								     if (clist_dest >= list + 8)
							 | 
						||
| 
								 | 
							
								       {
							 | 
						||
| 
								 | 
							
								       /* Early return if there is not enough space. This should never
							 | 
						||
| 
								 | 
							
								       happen, since all clists are shorter than 5 character now. */
							 | 
						||
| 
								 | 
							
								       list[2] = code[0];
							 | 
						||
| 
								 | 
							
								       list[3] = code[1];
							 | 
						||
| 
								 | 
							
								       return code;
							 | 
						||
| 
								 | 
							
								       }
							 | 
						||
| 
								 | 
							
								     *clist_dest++ = *clist_src;
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								  while(*clist_src++ != NOTACHAR);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* All characters are stored. The terminating NOTACHAR is copied from the
							 | 
						||
| 
								 | 
							
								  clist itself. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
							 | 
						||
| 
								 | 
							
								  return code;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  case OP_NCLASS:
							 | 
						||
| 
								 | 
							
								  case OP_CLASS:
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								  case OP_XCLASS:
							 | 
						||
| 
								 | 
							
								  if (c == OP_XCLASS)
							 | 
						||
| 
								 | 
							
								    end = code + GET(code, 0) - 1;
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								    end = code + 32 / sizeof(PCRE2_UCHAR);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  switch(*end)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    case OP_CRSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_CRMINSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_CRQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_CRMINQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_CRPOSSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_CRPOSQUERY:
							 | 
						||
| 
								 | 
							
								    list[1] = TRUE;
							 | 
						||
| 
								 | 
							
								    end++;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_CRPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_CRMINPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_CRPOSPLUS:
							 | 
						||
| 
								 | 
							
								    end++;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_CRRANGE:
							 | 
						||
| 
								 | 
							
								    case OP_CRMINRANGE:
							 | 
						||
| 
								 | 
							
								    case OP_CRPOSRANGE:
							 | 
						||
| 
								 | 
							
								    list[1] = (GET2(end, 1) == 0);
							 | 
						||
| 
								 | 
							
								    end += 1 + 2 * IMM2_SIZE;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  list[2] = (uint32_t)(end - code);
							 | 
						||
| 
								 | 
							
								  return end;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								return NULL;    /* Opcode not accepted */
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*    Scan further character sets for match       *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Checks whether the base and the current opcode have a common character, in
							 | 
						||
| 
								 | 
							
								which case the base cannot be possessified.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Arguments:
							 | 
						||
| 
								 | 
							
								  code        points to the byte code
							 | 
						||
| 
								 | 
							
								  utf         TRUE in UTF mode
							 | 
						||
| 
								 | 
							
								  ucp         TRUE in UCP mode
							 | 
						||
| 
								 | 
							
								  cb          compile data block
							 | 
						||
| 
								 | 
							
								  base_list   the data list of the base opcode
							 | 
						||
| 
								 | 
							
								  base_end    the end of the base opcode
							 | 
						||
| 
								 | 
							
								  rec_limit   points to recursion depth counter
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Returns:      TRUE if the auto-possessification is possible
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static BOOL
							 | 
						||
| 
								 | 
							
								compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
							 | 
						||
| 
								 | 
							
								  const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								PCRE2_UCHAR c;
							 | 
						||
| 
								 | 
							
								uint32_t list[8];
							 | 
						||
| 
								 | 
							
								const uint32_t *chr_ptr;
							 | 
						||
| 
								 | 
							
								const uint32_t *ochr_ptr;
							 | 
						||
| 
								 | 
							
								const uint32_t *list_ptr;
							 | 
						||
| 
								 | 
							
								PCRE2_SPTR next_code;
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								PCRE2_SPTR xclass_flags;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								const uint8_t *class_bitset;
							 | 
						||
| 
								 | 
							
								const uint8_t *set1, *set2, *set_end;
							 | 
						||
| 
								 | 
							
								uint32_t chr;
							 | 
						||
| 
								 | 
							
								BOOL accepted, invert_bits;
							 | 
						||
| 
								 | 
							
								BOOL entered_a_group = FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Note: the base_list[1] contains whether the current opcode has a greedy
							 | 
						||
| 
								 | 
							
								(represented by a non-zero value) quantifier. This is a different from
							 | 
						||
| 
								 | 
							
								other character type lists, which store here that the character iterator
							 | 
						||
| 
								 | 
							
								matches to an empty string (also represented by a non-zero value). */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								for(;;)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  /* All operations move the code pointer forward.
							 | 
						||
| 
								 | 
							
								  Therefore infinite recursions are not possible. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  c = *code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Skip over callouts */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_CALLOUT)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_CALLOUT_STR)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    code += GET(code, 1 + 2*LINK_SIZE);
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* At the end of a branch, skip to the end of the group. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_ALT)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    do code += GET(code, 1); while (*code == OP_ALT);
							 | 
						||
| 
								 | 
							
								    c = *code;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Inspect the next opcode. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  switch(c)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    /* We can always possessify a greedy iterator at the end of the pattern,
							 | 
						||
| 
								 | 
							
								    which is reached after skipping over the final OP_KET. A non-greedy
							 | 
						||
| 
								 | 
							
								    iterator must never be possessified. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_END:
							 | 
						||
| 
								 | 
							
								    return base_list[1] != 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* When an iterator is at the end of certain kinds of group we can inspect
							 | 
						||
| 
								 | 
							
								    what follows the group by skipping over the closing ket. Note that this
							 | 
						||
| 
								 | 
							
								    does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
							 | 
						||
| 
								 | 
							
								    iteration is variable (could be another iteration or could be the next
							 | 
						||
| 
								 | 
							
								    item). As these two opcodes are not listed in the next switch, they will
							 | 
						||
| 
								 | 
							
								    end up as the next code to inspect, and return FALSE by virtue of being
							 | 
						||
| 
								 | 
							
								    unsupported. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_KET:
							 | 
						||
| 
								 | 
							
								    case OP_KETRPOS:
							 | 
						||
| 
								 | 
							
								    /* The non-greedy case cannot be converted to a possessive form. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (base_list[1] == 0) return FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* If the bracket is capturing it might be referenced by an OP_RECURSE
							 | 
						||
| 
								 | 
							
								    so its last iterator can never be possessified if the pattern contains
							 | 
						||
| 
								 | 
							
								    recursions. (This could be improved by keeping a list of group numbers that
							 | 
						||
| 
								 | 
							
								    are called by recursion.) */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    switch(*(code - GET(code, 1)))
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      case OP_CBRA:
							 | 
						||
| 
								 | 
							
								      case OP_SCBRA:
							 | 
						||
| 
								 | 
							
								      case OP_CBRAPOS:
							 | 
						||
| 
								 | 
							
								      case OP_SCBRAPOS:
							 | 
						||
| 
								 | 
							
								      if (cb->had_recurse) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      /* A script run might have to backtrack if the iterated item can match
							 | 
						||
| 
								 | 
							
								      characters from more than one script. So give up unless repeating an
							 | 
						||
| 
								 | 
							
								      explicit character. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_SCRIPT_RUN:
							 | 
						||
| 
								 | 
							
								      if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
							 | 
						||
| 
								 | 
							
								        return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      /* Atomic sub-patterns and assertions can always auto-possessify their
							 | 
						||
| 
								 | 
							
								      last iterator. However, if the group was entered as a result of checking
							 | 
						||
| 
								 | 
							
								      a previous iterator, this is not possible. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_ASSERT:
							 | 
						||
| 
								 | 
							
								      case OP_ASSERT_NOT:
							 | 
						||
| 
								 | 
							
								      case OP_ASSERTBACK:
							 | 
						||
| 
								 | 
							
								      case OP_ASSERTBACK_NOT:
							 | 
						||
| 
								 | 
							
								      case OP_ONCE:
							 | 
						||
| 
								 | 
							
								      return !entered_a_group;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      /* Non-atomic assertions - don't possessify last iterator. This needs
							 | 
						||
| 
								 | 
							
								      more thought. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_ASSERT_NA:
							 | 
						||
| 
								 | 
							
								      case OP_ASSERTBACK_NA:
							 | 
						||
| 
								 | 
							
								      return FALSE;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* Skip over the bracket and inspect what comes next. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* Handle cases where the next item is a group. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_ONCE:
							 | 
						||
| 
								 | 
							
								    case OP_BRA:
							 | 
						||
| 
								 | 
							
								    case OP_CBRA:
							 | 
						||
| 
								 | 
							
								    next_code = code + GET(code, 1);
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* Check each branch. We have to recurse a level for all but the last
							 | 
						||
| 
								 | 
							
								    branch. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    while (*next_code == OP_ALT)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
							 | 
						||
| 
								 | 
							
								        return FALSE;
							 | 
						||
| 
								 | 
							
								      code = next_code + 1 + LINK_SIZE;
							 | 
						||
| 
								 | 
							
								      next_code += GET(next_code, 1);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    entered_a_group = TRUE;
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_BRAZERO:
							 | 
						||
| 
								 | 
							
								    case OP_BRAMINZERO:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    next_code = code + 1;
							 | 
						||
| 
								 | 
							
								    if (*next_code != OP_BRA && *next_code != OP_CBRA &&
							 | 
						||
| 
								 | 
							
								        *next_code != OP_ONCE) return FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    next_code += 1 + LINK_SIZE;
							 | 
						||
| 
								 | 
							
								    if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
							 | 
						||
| 
								 | 
							
								         rec_limit))
							 | 
						||
| 
								 | 
							
								      return FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* The next opcode does not need special handling; fall through and use it
							 | 
						||
| 
								 | 
							
								    to see if the base can be possessified. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* We now have the next appropriate opcode to compare with the base. Check
							 | 
						||
| 
								 | 
							
								  for a supported opcode, and load its properties. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
							 | 
						||
| 
								 | 
							
								  if (code == NULL) return FALSE;    /* Unsupported */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* If either opcode is a small character list, set pointers for comparing
							 | 
						||
| 
								 | 
							
								  characters from that list with another list, or with a property. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (base_list[0] == OP_CHAR)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    chr_ptr = base_list + 2;
							 | 
						||
| 
								 | 
							
								    list_ptr = list;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  else if (list[0] == OP_CHAR)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    chr_ptr = list + 2;
							 | 
						||
| 
								 | 
							
								    list_ptr = base_list;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Character bitsets can also be compared to certain opcodes. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
							 | 
						||
| 
								 | 
							
								#if PCRE2_CODE_UNIT_WIDTH == 8
							 | 
						||
| 
								 | 
							
								      /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
							 | 
						||
| 
								 | 
							
								      || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      )
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								#if PCRE2_CODE_UNIT_WIDTH == 8
							 | 
						||
| 
								 | 
							
								    if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								    if (base_list[0] == OP_CLASS)
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      set1 = (uint8_t *)(base_end - base_list[2]);
							 | 
						||
| 
								 | 
							
								      list_ptr = list;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      set1 = (uint8_t *)(code - list[2]);
							 | 
						||
| 
								 | 
							
								      list_ptr = base_list;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    invert_bits = FALSE;
							 | 
						||
| 
								 | 
							
								    switch(list_ptr[0])
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      case OP_CLASS:
							 | 
						||
| 
								 | 
							
								      case OP_NCLASS:
							 | 
						||
| 
								 | 
							
								      set2 = (uint8_t *)
							 | 
						||
| 
								 | 
							
								        ((list_ptr == list ? code : base_end) - list_ptr[2]);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								      case OP_XCLASS:
							 | 
						||
| 
								 | 
							
								      xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
							 | 
						||
| 
								 | 
							
								      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      if ((*xclass_flags & XCL_MAP) == 0)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        /* No bits are set for characters < 256. */
							 | 
						||
| 
								 | 
							
								        if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
							 | 
						||
| 
								 | 
							
								        /* Might be an empty repeat. */
							 | 
						||
| 
								 | 
							
								        continue;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      set2 = (uint8_t *)(xclass_flags + 1);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_DIGIT:
							 | 
						||
| 
								 | 
							
								      invert_bits = TRUE;
							 | 
						||
| 
								 | 
							
								      /* Fall through */
							 | 
						||
| 
								 | 
							
								      case OP_DIGIT:
							 | 
						||
| 
								 | 
							
								      set2 = (uint8_t *)(cb->cbits + cbit_digit);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_WHITESPACE:
							 | 
						||
| 
								 | 
							
								      invert_bits = TRUE;
							 | 
						||
| 
								 | 
							
								      /* Fall through */
							 | 
						||
| 
								 | 
							
								      case OP_WHITESPACE:
							 | 
						||
| 
								 | 
							
								      set2 = (uint8_t *)(cb->cbits + cbit_space);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_WORDCHAR:
							 | 
						||
| 
								 | 
							
								      invert_bits = TRUE;
							 | 
						||
| 
								 | 
							
								      /* Fall through */
							 | 
						||
| 
								 | 
							
								      case OP_WORDCHAR:
							 | 
						||
| 
								 | 
							
								      set2 = (uint8_t *)(cb->cbits + cbit_word);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								      return FALSE;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* Because the bit sets are unaligned bytes, we need to perform byte
							 | 
						||
| 
								 | 
							
								    comparison here. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    set_end = set1 + 32;
							 | 
						||
| 
								 | 
							
								    if (invert_bits)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      do
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        if ((*set1++ & ~(*set2++)) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      while (set1 < set_end);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      do
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        if ((*set1++ & *set2++) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      while (set1 < set_end);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (list[1] == 0) return TRUE;
							 | 
						||
| 
								 | 
							
								    /* Might be an empty repeat. */
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Some property combinations also acceptable. Unicode property opcodes are
							 | 
						||
| 
								 | 
							
								  processed specially; the rest can be handled with a lookup table. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    uint32_t leftop, rightop;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    leftop = base_list[0];
							 | 
						||
| 
								 | 
							
								    rightop = list[0];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								    accepted = FALSE; /* Always set in non-unicode case. */
							 | 
						||
| 
								 | 
							
								    if (leftop == OP_PROP || leftop == OP_NOTPROP)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      if (rightop == OP_EOD)
							 | 
						||
| 
								 | 
							
								        accepted = TRUE;
							 | 
						||
| 
								 | 
							
								      else if (rightop == OP_PROP || rightop == OP_NOTPROP)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        int n;
							 | 
						||
| 
								 | 
							
								        const uint8_t *p;
							 | 
						||
| 
								 | 
							
								        BOOL same = leftop == rightop;
							 | 
						||
| 
								 | 
							
								        BOOL lisprop = leftop == OP_PROP;
							 | 
						||
| 
								 | 
							
								        BOOL risprop = rightop == OP_PROP;
							 | 
						||
| 
								 | 
							
								        BOOL bothprop = lisprop && risprop;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        /* There's a table that specifies how each combination is to be
							 | 
						||
| 
								 | 
							
								        processed:
							 | 
						||
| 
								 | 
							
								          0   Always return FALSE (never auto-possessify)
							 | 
						||
| 
								 | 
							
								          1   Character groups are distinct (possessify if both are OP_PROP)
							 | 
						||
| 
								 | 
							
								          2   Check character categories in the same group (general or particular)
							 | 
						||
| 
								 | 
							
								          3   Return TRUE if the two opcodes are not the same
							 | 
						||
| 
								 | 
							
								          ... see comments below
							 | 
						||
| 
								 | 
							
								        */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        n = propposstab[base_list[2]][list[2]];
							 | 
						||
| 
								 | 
							
								        switch(n)
							 | 
						||
| 
								 | 
							
								          {
							 | 
						||
| 
								 | 
							
								          case 0: break;
							 | 
						||
| 
								 | 
							
								          case 1: accepted = bothprop; break;
							 | 
						||
| 
								 | 
							
								          case 2: accepted = (base_list[3] == list[3]) != same; break;
							 | 
						||
| 
								 | 
							
								          case 3: accepted = !same; break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 4:  /* Left general category, right particular category */
							 | 
						||
| 
								 | 
							
								          accepted = risprop && catposstab[base_list[3]][list[3]] == same;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 5:  /* Right general category, left particular category */
							 | 
						||
| 
								 | 
							
								          accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          /* This code is logically tricky. Think hard before fiddling with it.
							 | 
						||
| 
								 | 
							
								          The posspropstab table has four entries per row. Each row relates to
							 | 
						||
| 
								 | 
							
								          one of PCRE's special properties such as ALNUM or SPACE or WORD.
							 | 
						||
| 
								 | 
							
								          Only WORD actually needs all four entries, but using repeats for the
							 | 
						||
| 
								 | 
							
								          others means they can all use the same code below.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          The first two entries in each row are Unicode general categories, and
							 | 
						||
| 
								 | 
							
								          apply always, because all the characters they include are part of the
							 | 
						||
| 
								 | 
							
								          PCRE character set. The third and fourth entries are a general and a
							 | 
						||
| 
								 | 
							
								          particular category, respectively, that include one or more relevant
							 | 
						||
| 
								 | 
							
								          characters. One or the other is used, depending on whether the check
							 | 
						||
| 
								 | 
							
								          is for a general or a particular category. However, in both cases the
							 | 
						||
| 
								 | 
							
								          category contains more characters than the specials that are defined
							 | 
						||
| 
								 | 
							
								          for the property being tested against. Therefore, it cannot be used
							 | 
						||
| 
								 | 
							
								          in a NOTPROP case.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
							 | 
						||
| 
								 | 
							
								          Underscore is covered by ucp_P or ucp_Po. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 6:  /* Left alphanum vs right general category */
							 | 
						||
| 
								 | 
							
								          case 7:  /* Left space vs right general category */
							 | 
						||
| 
								 | 
							
								          case 8:  /* Left word vs right general category */
							 | 
						||
| 
								 | 
							
								          p = posspropstab[n-6];
							 | 
						||
| 
								 | 
							
								          accepted = risprop && lisprop ==
							 | 
						||
| 
								 | 
							
								            (list[3] != p[0] &&
							 | 
						||
| 
								 | 
							
								             list[3] != p[1] &&
							 | 
						||
| 
								 | 
							
								            (list[3] != p[2] || !lisprop));
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 9:   /* Right alphanum vs left general category */
							 | 
						||
| 
								 | 
							
								          case 10:  /* Right space vs left general category */
							 | 
						||
| 
								 | 
							
								          case 11:  /* Right word vs left general category */
							 | 
						||
| 
								 | 
							
								          p = posspropstab[n-9];
							 | 
						||
| 
								 | 
							
								          accepted = lisprop && risprop ==
							 | 
						||
| 
								 | 
							
								            (base_list[3] != p[0] &&
							 | 
						||
| 
								 | 
							
								             base_list[3] != p[1] &&
							 | 
						||
| 
								 | 
							
								            (base_list[3] != p[2] || !risprop));
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 12:  /* Left alphanum vs right particular category */
							 | 
						||
| 
								 | 
							
								          case 13:  /* Left space vs right particular category */
							 | 
						||
| 
								 | 
							
								          case 14:  /* Left word vs right particular category */
							 | 
						||
| 
								 | 
							
								          p = posspropstab[n-12];
							 | 
						||
| 
								 | 
							
								          accepted = risprop && lisprop ==
							 | 
						||
| 
								 | 
							
								            (catposstab[p[0]][list[3]] &&
							 | 
						||
| 
								 | 
							
								             catposstab[p[1]][list[3]] &&
							 | 
						||
| 
								 | 
							
								            (list[3] != p[3] || !lisprop));
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 15:  /* Right alphanum vs left particular category */
							 | 
						||
| 
								 | 
							
								          case 16:  /* Right space vs left particular category */
							 | 
						||
| 
								 | 
							
								          case 17:  /* Right word vs left particular category */
							 | 
						||
| 
								 | 
							
								          p = posspropstab[n-15];
							 | 
						||
| 
								 | 
							
								          accepted = lisprop && risprop ==
							 | 
						||
| 
								 | 
							
								            (catposstab[p[0]][base_list[3]] &&
							 | 
						||
| 
								 | 
							
								             catposstab[p[1]][base_list[3]] &&
							 | 
						||
| 
								 | 
							
								            (base_list[3] != p[3] || !risprop));
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								#endif  /* SUPPORT_UNICODE */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
							 | 
						||
| 
								 | 
							
								           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
							 | 
						||
| 
								 | 
							
								           autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (!accepted) return FALSE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (list[1] == 0) return TRUE;
							 | 
						||
| 
								 | 
							
								    /* Might be an empty repeat. */
							 | 
						||
| 
								 | 
							
								    continue;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Control reaches here only if one of the items is a small character list.
							 | 
						||
| 
								 | 
							
								  All characters are checked against the other side. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  do
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    chr = *chr_ptr;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    switch(list_ptr[0])
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      case OP_CHAR:
							 | 
						||
| 
								 | 
							
								      ochr_ptr = list_ptr + 2;
							 | 
						||
| 
								 | 
							
								      do
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        if (chr == *ochr_ptr) return FALSE;
							 | 
						||
| 
								 | 
							
								        ochr_ptr++;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      while(*ochr_ptr != NOTACHAR);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT:
							 | 
						||
| 
								 | 
							
								      ochr_ptr = list_ptr + 2;
							 | 
						||
| 
								 | 
							
								      do
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        if (chr == *ochr_ptr)
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								        ochr_ptr++;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      while(*ochr_ptr != NOTACHAR);
							 | 
						||
| 
								 | 
							
								      if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
							 | 
						||
| 
								 | 
							
								      set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_DIGIT:
							 | 
						||
| 
								 | 
							
								      if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_DIGIT:
							 | 
						||
| 
								 | 
							
								      if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_WHITESPACE:
							 | 
						||
| 
								 | 
							
								      if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_WHITESPACE:
							 | 
						||
| 
								 | 
							
								      if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_WORDCHAR:
							 | 
						||
| 
								 | 
							
								      if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_WORDCHAR:
							 | 
						||
| 
								 | 
							
								      if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_HSPACE:
							 | 
						||
| 
								 | 
							
								      switch(chr)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        HSPACE_CASES: return FALSE;
							 | 
						||
| 
								 | 
							
								        default: break;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_HSPACE:
							 | 
						||
| 
								 | 
							
								      switch(chr)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        HSPACE_CASES: break;
							 | 
						||
| 
								 | 
							
								        default: return FALSE;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_ANYNL:
							 | 
						||
| 
								 | 
							
								      case OP_VSPACE:
							 | 
						||
| 
								 | 
							
								      switch(chr)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        VSPACE_CASES: return FALSE;
							 | 
						||
| 
								 | 
							
								        default: break;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NOT_VSPACE:
							 | 
						||
| 
								 | 
							
								      switch(chr)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        VSPACE_CASES: break;
							 | 
						||
| 
								 | 
							
								        default: return FALSE;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_DOLL:
							 | 
						||
| 
								 | 
							
								      case OP_EODN:
							 | 
						||
| 
								 | 
							
								      switch (chr)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        case CHAR_CR:
							 | 
						||
| 
								 | 
							
								        case CHAR_LF:
							 | 
						||
| 
								 | 
							
								        case CHAR_VT:
							 | 
						||
| 
								 | 
							
								        case CHAR_FF:
							 | 
						||
| 
								 | 
							
								        case CHAR_NEL:
							 | 
						||
| 
								 | 
							
								#ifndef EBCDIC
							 | 
						||
| 
								 | 
							
								        case 0x2028:
							 | 
						||
| 
								 | 
							
								        case 0x2029:
							 | 
						||
| 
								 | 
							
								#endif  /* Not EBCDIC */
							 | 
						||
| 
								 | 
							
								        return FALSE;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_EOD:    /* Can always possessify before \z */
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_UNICODE
							 | 
						||
| 
								 | 
							
								      case OP_PROP:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPROP:
							 | 
						||
| 
								 | 
							
								      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
							 | 
						||
| 
								 | 
							
								            list_ptr[0] == OP_NOTPROP))
							 | 
						||
| 
								 | 
							
								        return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_NCLASS:
							 | 
						||
| 
								 | 
							
								      if (chr > 255) return FALSE;
							 | 
						||
| 
								 | 
							
								      /* Fall through */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_CLASS:
							 | 
						||
| 
								 | 
							
								      if (chr > 255) break;
							 | 
						||
| 
								 | 
							
								      class_bitset = (uint8_t *)
							 | 
						||
| 
								 | 
							
								        ((list_ptr == list ? code : base_end) - list_ptr[2]);
							 | 
						||
| 
								 | 
							
								      if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								      case OP_XCLASS:
							 | 
						||
| 
								 | 
							
								      if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
							 | 
						||
| 
								 | 
							
								          list_ptr[2] + LINK_SIZE, utf)) return FALSE;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								      return FALSE;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    chr_ptr++;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  while(*chr_ptr != NOTACHAR);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* At least one character must be matched from this opcode. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (list[1] == 0) return TRUE;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Control never reaches here. There used to be a fail-save return FALSE; here,
							 | 
						||
| 
								 | 
							
								but some compilers complain about an unreachable statement. */
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*    Scan compiled regex for auto-possession     *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Replaces single character iterations with their possessive alternatives
							 | 
						||
| 
								 | 
							
								if appropriate. This function modifies the compiled opcode! Hitting a
							 | 
						||
| 
								 | 
							
								non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
							 | 
						||
| 
								 | 
							
								bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
							 | 
						||
| 
								 | 
							
								overly complicated or large patterns. In these cases, the check just stops,
							 | 
						||
| 
								 | 
							
								leaving the remainder of the pattern unpossessified.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Arguments:
							 | 
						||
| 
								 | 
							
								  code        points to start of the byte code
							 | 
						||
| 
								 | 
							
								  cb          compile data block
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Returns:      0 for success
							 | 
						||
| 
								 | 
							
								              -1 if a non-existant opcode is encountered
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								int
							 | 
						||
| 
								 | 
							
								PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								PCRE2_UCHAR c;
							 | 
						||
| 
								 | 
							
								PCRE2_SPTR end;
							 | 
						||
| 
								 | 
							
								PCRE2_UCHAR *repeat_opcode;
							 | 
						||
| 
								 | 
							
								uint32_t list[8];
							 | 
						||
| 
								 | 
							
								int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
							 | 
						||
| 
								 | 
							
								BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
							 | 
						||
| 
								 | 
							
								BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								for (;;)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  c = *code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    c -= get_repeat_base(c) - OP_STAR;
							 | 
						||
| 
								 | 
							
								    end = (c <= OP_MINUPTO) ?
							 | 
						||
| 
								 | 
							
								      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
							 | 
						||
| 
								 | 
							
								    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
							 | 
						||
| 
								 | 
							
								        &rec_limit))
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      switch(c)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        case OP_STAR:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSSTAR - OP_STAR;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_MINSTAR:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSSTAR - OP_MINSTAR;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_PLUS:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSPLUS - OP_PLUS;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_MINPLUS:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSPLUS - OP_MINPLUS;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_QUERY:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSQUERY - OP_QUERY;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_MINQUERY:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSQUERY - OP_MINQUERY;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_UPTO:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSUPTO - OP_UPTO;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        case OP_MINUPTO:
							 | 
						||
| 
								 | 
							
								        *code += OP_POSUPTO - OP_MINUPTO;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    c = *code;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								    if (c == OP_XCLASS)
							 | 
						||
| 
								 | 
							
								      repeat_opcode = code + GET(code, 1);
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    c = *repeat_opcode;
							 | 
						||
| 
								 | 
							
								    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      /* The return from get_chr_property_list() will never be NULL when
							 | 
						||
| 
								 | 
							
								      *code (aka c) is one of the three class opcodes. However, gcc with
							 | 
						||
| 
								 | 
							
								      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
							 | 
						||
| 
								 | 
							
								      put in a check. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
							 | 
						||
| 
								 | 
							
								      list[1] = (c & 1) == 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if (end != NULL &&
							 | 
						||
| 
								 | 
							
								          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        switch (c)
							 | 
						||
| 
								 | 
							
								          {
							 | 
						||
| 
								 | 
							
								          case OP_CRSTAR:
							 | 
						||
| 
								 | 
							
								          case OP_CRMINSTAR:
							 | 
						||
| 
								 | 
							
								          *repeat_opcode = OP_CRPOSSTAR;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case OP_CRPLUS:
							 | 
						||
| 
								 | 
							
								          case OP_CRMINPLUS:
							 | 
						||
| 
								 | 
							
								          *repeat_opcode = OP_CRPOSPLUS;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case OP_CRQUERY:
							 | 
						||
| 
								 | 
							
								          case OP_CRMINQUERY:
							 | 
						||
| 
								 | 
							
								          *repeat_opcode = OP_CRPOSQUERY;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case OP_CRRANGE:
							 | 
						||
| 
								 | 
							
								          case OP_CRMINRANGE:
							 | 
						||
| 
								 | 
							
								          *repeat_opcode = OP_CRPOSRANGE;
							 | 
						||
| 
								 | 
							
								          break;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    c = *code;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  switch(c)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    case OP_END:
							 | 
						||
| 
								 | 
							
								    return 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_TYPESTAR:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEMINSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEMINPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEMINQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEPOSSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEPOSPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEPOSQUERY:
							 | 
						||
| 
								 | 
							
								    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_TYPEUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEMINUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEEXACT:
							 | 
						||
| 
								 | 
							
								    case OP_TYPEPOSUPTO:
							 | 
						||
| 
								 | 
							
								    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
							 | 
						||
| 
								 | 
							
								      code += 2;
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_CALLOUT_STR:
							 | 
						||
| 
								 | 
							
								    code += GET(code, 1 + 2*LINK_SIZE);
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef SUPPORT_WIDE_CHARS
							 | 
						||
| 
								 | 
							
								    case OP_XCLASS:
							 | 
						||
| 
								 | 
							
								    code += GET(code, 1);
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case OP_MARK:
							 | 
						||
| 
								 | 
							
								    case OP_COMMIT_ARG:
							 | 
						||
| 
								 | 
							
								    case OP_PRUNE_ARG:
							 | 
						||
| 
								 | 
							
								    case OP_SKIP_ARG:
							 | 
						||
| 
								 | 
							
								    case OP_THEN_ARG:
							 | 
						||
| 
								 | 
							
								    code += code[1];
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Add in the fixed length from the table */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
							 | 
						||
| 
								 | 
							
								  followed by a multi-byte character. The length in the table is a minimum, so
							 | 
						||
| 
								 | 
							
								  we have to arrange to skip the extra code units. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef MAYBE_UTF_MULTI
							 | 
						||
| 
								 | 
							
								  if (utf) switch(c)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    case OP_CHAR:
							 | 
						||
| 
								 | 
							
								    case OP_CHARI:
							 | 
						||
| 
								 | 
							
								    case OP_NOT:
							 | 
						||
| 
								 | 
							
								    case OP_NOTI:
							 | 
						||
| 
								 | 
							
								    case OP_STAR:
							 | 
						||
| 
								 | 
							
								    case OP_MINSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_PLUS:
							 | 
						||
| 
								 | 
							
								    case OP_MINPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_QUERY:
							 | 
						||
| 
								 | 
							
								    case OP_MINQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_UPTO:
							 | 
						||
| 
								 | 
							
								    case OP_MINUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_EXACT:
							 | 
						||
| 
								 | 
							
								    case OP_POSSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_POSPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_POSQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_POSUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_STARI:
							 | 
						||
| 
								 | 
							
								    case OP_MINSTARI:
							 | 
						||
| 
								 | 
							
								    case OP_PLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_MINPLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_QUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_MINQUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_UPTOI:
							 | 
						||
| 
								 | 
							
								    case OP_MINUPTOI:
							 | 
						||
| 
								 | 
							
								    case OP_EXACTI:
							 | 
						||
| 
								 | 
							
								    case OP_POSSTARI:
							 | 
						||
| 
								 | 
							
								    case OP_POSPLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_POSQUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_POSUPTOI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_NOTQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_NOTUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_NOTEXACT:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSSTAR:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSPLUS:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSQUERY:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSUPTO:
							 | 
						||
| 
								 | 
							
								    case OP_NOTSTARI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINSTARI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINPLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTQUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINQUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTUPTOI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTMINUPTOI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTEXACTI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSSTARI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSPLUSI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSQUERYI:
							 | 
						||
| 
								 | 
							
								    case OP_NOTPOSUPTOI:
							 | 
						||
| 
								 | 
							
								    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
							 | 
						||
| 
								 | 
							
								    break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								  (void)(utf);  /* Keep compiler happy by referencing function argument */
							 | 
						||
| 
								 | 
							
								#endif  /* SUPPORT_WIDE_CHARS */
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* End of pcre2_auto_possess.c */
							 |