220 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			220 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | /*************************************************
 | ||
|  | *      Perl-Compatible Regular Expressions       * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | /* PCRE is a library of functions to support regular expressions whose syntax
 | ||
|  | and semantics are as close as possible to those of the Perl 5 language. | ||
|  | 
 | ||
|  |                        Written by Philip Hazel | ||
|  |      Original API code Copyright (c) 1997-2012 University of Cambridge | ||
|  |           New API code Copyright (c) 2016-2018 University of Cambridge | ||
|  | 
 | ||
|  | ----------------------------------------------------------------------------- | ||
|  | Redistribution and use in source and binary forms, with or without | ||
|  | modification, are permitted provided that the following conditions are met: | ||
|  | 
 | ||
|  |     * Redistributions of source code must retain the above copyright notice, | ||
|  |       this list of conditions and the following disclaimer. | ||
|  | 
 | ||
|  |     * Redistributions in binary form must reproduce the above copyright | ||
|  |       notice, this list of conditions and the following disclaimer in the | ||
|  |       documentation and/or other materials provided with the distribution. | ||
|  | 
 | ||
|  |     * Neither the name of the University of Cambridge nor the names of its | ||
|  |       contributors may be used to endorse or promote products derived from | ||
|  |       this software without specific prior written permission. | ||
|  | 
 | ||
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
|  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
|  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
|  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
|  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
|  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
|  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
|  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
|  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
|  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
|  | POSSIBILITY OF SUCH DAMAGE. | ||
|  | ----------------------------------------------------------------------------- | ||
|  | */ | ||
|  | 
 | ||
|  | 
 | ||
|  | /* This module contains a single function that scans through a compiled pattern
 | ||
|  | until it finds a capturing bracket with the given number, or, if the number is | ||
|  | negative, an instance of OP_REVERSE for a lookbehind. The function is called | ||
|  | from pcre2_compile.c and also from pcre2_study.c when finding the minimum | ||
|  | matching length. */ | ||
|  | 
 | ||
|  | 
 | ||
|  | #ifdef HAVE_CONFIG_H
 | ||
|  | #include "config.h"
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #include "pcre2_internal.h"
 | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *    Scan compiled regex for specific bracket    * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | /*
 | ||
|  | Arguments: | ||
|  |   code        points to start of expression | ||
|  |   utf         TRUE in UTF mode | ||
|  |   number      the required bracket number or negative to find a lookbehind | ||
|  | 
 | ||
|  | Returns:      pointer to the opcode for the bracket, or NULL if not found | ||
|  | */ | ||
|  | 
 | ||
|  | PCRE2_SPTR | ||
|  | PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) | ||
|  | { | ||
|  | for (;;) | ||
|  |   { | ||
|  |   PCRE2_UCHAR c = *code; | ||
|  | 
 | ||
|  |   if (c == OP_END) return NULL; | ||
|  | 
 | ||
|  |   /* XCLASS is used for classes that cannot be represented just by a bit map.
 | ||
|  |   This includes negated single high-valued characters. CALLOUT_STR is used for | ||
|  |   callouts with string arguments. In both cases the length in the table is | ||
|  |   zero; the actual length is stored in the compiled code. */ | ||
|  | 
 | ||
|  |   if (c == OP_XCLASS) code += GET(code, 1); | ||
|  |     else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); | ||
|  | 
 | ||
|  |   /* Handle lookbehind */ | ||
|  | 
 | ||
|  |   else if (c == OP_REVERSE) | ||
|  |     { | ||
|  |     if (number < 0) return (PCRE2_UCHAR *)code; | ||
|  |     code += PRIV(OP_lengths)[c]; | ||
|  |     } | ||
|  | 
 | ||
|  |   /* Handle capturing bracket */ | ||
|  | 
 | ||
|  |   else if (c == OP_CBRA || c == OP_SCBRA || | ||
|  |            c == OP_CBRAPOS || c == OP_SCBRAPOS) | ||
|  |     { | ||
|  |     int n = (int)GET2(code, 1+LINK_SIZE); | ||
|  |     if (n == number) return (PCRE2_UCHAR *)code; | ||
|  |     code += PRIV(OP_lengths)[c]; | ||
|  |     } | ||
|  | 
 | ||
|  |   /* Otherwise, we can get the item's length from the table, except that for
 | ||
|  |   repeated character types, we have to test for \p and \P, which have an extra | ||
|  |   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we | ||
|  |   must add in its length. */ | ||
|  | 
 | ||
|  |   else | ||
|  |     { | ||
|  |     switch(c) | ||
|  |       { | ||
|  |       case OP_TYPESTAR: | ||
|  |       case OP_TYPEMINSTAR: | ||
|  |       case OP_TYPEPLUS: | ||
|  |       case OP_TYPEMINPLUS: | ||
|  |       case OP_TYPEQUERY: | ||
|  |       case OP_TYPEMINQUERY: | ||
|  |       case OP_TYPEPOSSTAR: | ||
|  |       case OP_TYPEPOSPLUS: | ||
|  |       case OP_TYPEPOSQUERY: | ||
|  |       if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; | ||
|  |       break; | ||
|  | 
 | ||
|  |       case OP_TYPEUPTO: | ||
|  |       case OP_TYPEMINUPTO: | ||
|  |       case OP_TYPEEXACT: | ||
|  |       case OP_TYPEPOSUPTO: | ||
|  |       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) | ||
|  |         code += 2; | ||
|  |       break; | ||
|  | 
 | ||
|  |       case OP_MARK: | ||
|  |       case OP_COMMIT_ARG: | ||
|  |       case OP_PRUNE_ARG: | ||
|  |       case OP_SKIP_ARG: | ||
|  |       case OP_THEN_ARG: | ||
|  |       code += code[1]; | ||
|  |       break; | ||
|  |       } | ||
|  | 
 | ||
|  |     /* Add in the fixed length from the table */ | ||
|  | 
 | ||
|  |     code += PRIV(OP_lengths)[c]; | ||
|  | 
 | ||
|  |   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
 | ||
|  |   followed by a multi-byte character. The length in the table is a minimum, so | ||
|  |   we have to arrange to skip the extra bytes. */ | ||
|  | 
 | ||
|  | #ifdef MAYBE_UTF_MULTI
 | ||
|  |     if (utf) switch(c) | ||
|  |       { | ||
|  |       case OP_CHAR: | ||
|  |       case OP_CHARI: | ||
|  |       case OP_NOT: | ||
|  |       case OP_NOTI: | ||
|  |       case OP_EXACT: | ||
|  |       case OP_EXACTI: | ||
|  |       case OP_NOTEXACT: | ||
|  |       case OP_NOTEXACTI: | ||
|  |       case OP_UPTO: | ||
|  |       case OP_UPTOI: | ||
|  |       case OP_NOTUPTO: | ||
|  |       case OP_NOTUPTOI: | ||
|  |       case OP_MINUPTO: | ||
|  |       case OP_MINUPTOI: | ||
|  |       case OP_NOTMINUPTO: | ||
|  |       case OP_NOTMINUPTOI: | ||
|  |       case OP_POSUPTO: | ||
|  |       case OP_POSUPTOI: | ||
|  |       case OP_NOTPOSUPTO: | ||
|  |       case OP_NOTPOSUPTOI: | ||
|  |       case OP_STAR: | ||
|  |       case OP_STARI: | ||
|  |       case OP_NOTSTAR: | ||
|  |       case OP_NOTSTARI: | ||
|  |       case OP_MINSTAR: | ||
|  |       case OP_MINSTARI: | ||
|  |       case OP_NOTMINSTAR: | ||
|  |       case OP_NOTMINSTARI: | ||
|  |       case OP_POSSTAR: | ||
|  |       case OP_POSSTARI: | ||
|  |       case OP_NOTPOSSTAR: | ||
|  |       case OP_NOTPOSSTARI: | ||
|  |       case OP_PLUS: | ||
|  |       case OP_PLUSI: | ||
|  |       case OP_NOTPLUS: | ||
|  |       case OP_NOTPLUSI: | ||
|  |       case OP_MINPLUS: | ||
|  |       case OP_MINPLUSI: | ||
|  |       case OP_NOTMINPLUS: | ||
|  |       case OP_NOTMINPLUSI: | ||
|  |       case OP_POSPLUS: | ||
|  |       case OP_POSPLUSI: | ||
|  |       case OP_NOTPOSPLUS: | ||
|  |       case OP_NOTPOSPLUSI: | ||
|  |       case OP_QUERY: | ||
|  |       case OP_QUERYI: | ||
|  |       case OP_NOTQUERY: | ||
|  |       case OP_NOTQUERYI: | ||
|  |       case OP_MINQUERY: | ||
|  |       case OP_MINQUERYI: | ||
|  |       case OP_NOTMINQUERY: | ||
|  |       case OP_NOTMINQUERYI: | ||
|  |       case OP_POSQUERY: | ||
|  |       case OP_POSQUERYI: | ||
|  |       case OP_NOTPOSQUERY: | ||
|  |       case OP_NOTPOSQUERYI: | ||
|  |       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); | ||
|  |       break; | ||
|  |       } | ||
|  | #else
 | ||
|  |     (void)(utf);  /* Keep compiler happy by referencing function argument */ | ||
|  | #endif  /* MAYBE_UTF_MULTI */
 | ||
|  |     } | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | /* End of pcre2_find_bracket.c */ |