220 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			220 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*      Perl-Compatible Regular Expressions       *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* PCRE is a library of functions to support regular expressions whose syntax
							 | 
						||
| 
								 | 
							
								and semantics are as close as possible to those of the Perl 5 language.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                       Written by Philip Hazel
							 | 
						||
| 
								 | 
							
								     Original API code Copyright (c) 1997-2012 University of Cambridge
							 | 
						||
| 
								 | 
							
								          New API code Copyright (c) 2016-2018 University of Cambridge
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								-----------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								Redistribution and use in source and binary forms, with or without
							 | 
						||
| 
								 | 
							
								modification, are permitted provided that the following conditions are met:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Redistributions of source code must retain the above copyright notice,
							 | 
						||
| 
								 | 
							
								      this list of conditions and the following disclaimer.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Redistributions in binary form must reproduce the above copyright
							 | 
						||
| 
								 | 
							
								      notice, this list of conditions and the following disclaimer in the
							 | 
						||
| 
								 | 
							
								      documentation and/or other materials provided with the distribution.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * Neither the name of the University of Cambridge nor the names of its
							 | 
						||
| 
								 | 
							
								      contributors may be used to endorse or promote products derived from
							 | 
						||
| 
								 | 
							
								      this software without specific prior written permission.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
							 | 
						||
| 
								 | 
							
								AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
							 | 
						||
| 
								 | 
							
								IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
							 | 
						||
| 
								 | 
							
								ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
							 | 
						||
| 
								 | 
							
								LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
							 | 
						||
| 
								 | 
							
								CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
							 | 
						||
| 
								 | 
							
								SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
							 | 
						||
| 
								 | 
							
								INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
							 | 
						||
| 
								 | 
							
								CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
							 | 
						||
| 
								 | 
							
								ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
							 | 
						||
| 
								 | 
							
								POSSIBILITY OF SUCH DAMAGE.
							 | 
						||
| 
								 | 
							
								-----------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This module contains a single function that scans through a compiled pattern
							 | 
						||
| 
								 | 
							
								until it finds a capturing bracket with the given number, or, if the number is
							 | 
						||
| 
								 | 
							
								negative, an instance of OP_REVERSE for a lookbehind. The function is called
							 | 
						||
| 
								 | 
							
								from pcre2_compile.c and also from pcre2_study.c when finding the minimum
							 | 
						||
| 
								 | 
							
								matching length. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef HAVE_CONFIG_H
							 | 
						||
| 
								 | 
							
								#include "config.h"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "pcre2_internal.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*************************************************
							 | 
						||
| 
								 | 
							
								*    Scan compiled regex for specific bracket    *
							 | 
						||
| 
								 | 
							
								*************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								Arguments:
							 | 
						||
| 
								 | 
							
								  code        points to start of expression
							 | 
						||
| 
								 | 
							
								  utf         TRUE in UTF mode
							 | 
						||
| 
								 | 
							
								  number      the required bracket number or negative to find a lookbehind
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Returns:      pointer to the opcode for the bracket, or NULL if not found
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								PCRE2_SPTR
							 | 
						||
| 
								 | 
							
								PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								for (;;)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  PCRE2_UCHAR c = *code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_END) return NULL;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* XCLASS is used for classes that cannot be represented just by a bit map.
							 | 
						||
| 
								 | 
							
								  This includes negated single high-valued characters. CALLOUT_STR is used for
							 | 
						||
| 
								 | 
							
								  callouts with string arguments. In both cases the length in the table is
							 | 
						||
| 
								 | 
							
								  zero; the actual length is stored in the compiled code. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (c == OP_XCLASS) code += GET(code, 1);
							 | 
						||
| 
								 | 
							
								    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Handle lookbehind */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  else if (c == OP_REVERSE)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    if (number < 0) return (PCRE2_UCHAR *)code;
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Handle capturing bracket */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  else if (c == OP_CBRA || c == OP_SCBRA ||
							 | 
						||
| 
								 | 
							
								           c == OP_CBRAPOS || c == OP_SCBRAPOS)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    int n = (int)GET2(code, 1+LINK_SIZE);
							 | 
						||
| 
								 | 
							
								    if (n == number) return (PCRE2_UCHAR *)code;
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Otherwise, we can get the item's length from the table, except that for
							 | 
						||
| 
								 | 
							
								  repeated character types, we have to test for \p and \P, which have an extra
							 | 
						||
| 
								 | 
							
								  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
							 | 
						||
| 
								 | 
							
								  must add in its length. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    switch(c)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      case OP_TYPESTAR:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEMINSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEMINPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEMINQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEPOSSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEPOSPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEPOSQUERY:
							 | 
						||
| 
								 | 
							
								      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_TYPEUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEMINUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEEXACT:
							 | 
						||
| 
								 | 
							
								      case OP_TYPEPOSUPTO:
							 | 
						||
| 
								 | 
							
								      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
							 | 
						||
| 
								 | 
							
								        code += 2;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case OP_MARK:
							 | 
						||
| 
								 | 
							
								      case OP_COMMIT_ARG:
							 | 
						||
| 
								 | 
							
								      case OP_PRUNE_ARG:
							 | 
						||
| 
								 | 
							
								      case OP_SKIP_ARG:
							 | 
						||
| 
								 | 
							
								      case OP_THEN_ARG:
							 | 
						||
| 
								 | 
							
								      code += code[1];
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* Add in the fixed length from the table */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    code += PRIV(OP_lengths)[c];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
							 | 
						||
| 
								 | 
							
								  followed by a multi-byte character. The length in the table is a minimum, so
							 | 
						||
| 
								 | 
							
								  we have to arrange to skip the extra bytes. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef MAYBE_UTF_MULTI
							 | 
						||
| 
								 | 
							
								    if (utf) switch(c)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      case OP_CHAR:
							 | 
						||
| 
								 | 
							
								      case OP_CHARI:
							 | 
						||
| 
								 | 
							
								      case OP_NOT:
							 | 
						||
| 
								 | 
							
								      case OP_NOTI:
							 | 
						||
| 
								 | 
							
								      case OP_EXACT:
							 | 
						||
| 
								 | 
							
								      case OP_EXACTI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTEXACT:
							 | 
						||
| 
								 | 
							
								      case OP_NOTEXACTI:
							 | 
						||
| 
								 | 
							
								      case OP_UPTO:
							 | 
						||
| 
								 | 
							
								      case OP_UPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_NOTUPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_MINUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_MINUPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINUPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_POSUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_POSUPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSUPTO:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSUPTOI:
							 | 
						||
| 
								 | 
							
								      case OP_STAR:
							 | 
						||
| 
								 | 
							
								      case OP_STARI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_NOTSTARI:
							 | 
						||
| 
								 | 
							
								      case OP_MINSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_MINSTARI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINSTARI:
							 | 
						||
| 
								 | 
							
								      case OP_POSSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_POSSTARI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSSTAR:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSSTARI:
							 | 
						||
| 
								 | 
							
								      case OP_PLUS:
							 | 
						||
| 
								 | 
							
								      case OP_PLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_MINPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_MINPLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINPLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_POSPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_POSPLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSPLUS:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSPLUSI:
							 | 
						||
| 
								 | 
							
								      case OP_QUERY:
							 | 
						||
| 
								 | 
							
								      case OP_QUERYI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_NOTQUERYI:
							 | 
						||
| 
								 | 
							
								      case OP_MINQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_MINQUERYI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_NOTMINQUERYI:
							 | 
						||
| 
								 | 
							
								      case OP_POSQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_POSQUERYI:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSQUERY:
							 | 
						||
| 
								 | 
							
								      case OP_NOTPOSQUERYI:
							 | 
						||
| 
								 | 
							
								      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								    (void)(utf);  /* Keep compiler happy by referencing function argument */
							 | 
						||
| 
								 | 
							
								#endif  /* MAYBE_UTF_MULTI */
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* End of pcre2_find_bracket.c */
							 |