287 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			287 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | /*************************************************
 | ||
|  | *      Perl-Compatible Regular Expressions       * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | /* PCRE is a library of functions to support regular expressions whose syntax
 | ||
|  | and semantics are as close as possible to those of the Perl 5 language. | ||
|  | 
 | ||
|  |                        Written by Philip Hazel | ||
|  |      Original API code Copyright (c) 1997-2012 University of Cambridge | ||
|  |           New API code Copyright (c) 2016-2020 University of Cambridge | ||
|  | 
 | ||
|  | ----------------------------------------------------------------------------- | ||
|  | Redistribution and use in source and binary forms, with or without | ||
|  | modification, are permitted provided that the following conditions are met: | ||
|  | 
 | ||
|  |     * Redistributions of source code must retain the above copyright notice, | ||
|  |       this list of conditions and the following disclaimer. | ||
|  | 
 | ||
|  |     * Redistributions in binary form must reproduce the above copyright | ||
|  |       notice, this list of conditions and the following disclaimer in the | ||
|  |       documentation and/or other materials provided with the distribution. | ||
|  | 
 | ||
|  |     * Neither the name of the University of Cambridge nor the names of its | ||
|  |       contributors may be used to endorse or promote products derived from | ||
|  |       this software without specific prior written permission. | ||
|  | 
 | ||
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
|  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
|  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
|  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
|  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
|  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
|  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
|  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
|  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
|  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
|  | POSSIBILITY OF SUCH DAMAGE. | ||
|  | ----------------------------------------------------------------------------- | ||
|  | */ | ||
|  | 
 | ||
|  | /* This module contains functions for serializing and deserializing
 | ||
|  | a sequence of compiled codes. */ | ||
|  | 
 | ||
|  | 
 | ||
|  | #ifdef HAVE_CONFIG_H
 | ||
|  | #include "config.h"
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #include "pcre2_internal.h"
 | ||
|  | 
 | ||
|  | /* Magic number to provide a small check against being handed junk. */ | ||
|  | 
 | ||
|  | #define SERIALIZED_DATA_MAGIC 0x50523253u
 | ||
|  | 
 | ||
|  | /* Deserialization is limited to the current PCRE version and
 | ||
|  | character width. */ | ||
|  | 
 | ||
|  | #define SERIALIZED_DATA_VERSION \
 | ||
|  |   ((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16)) | ||
|  | 
 | ||
|  | #define SERIALIZED_DATA_CONFIG \
 | ||
|  |   (sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16)) | ||
|  | 
 | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *           Serialize compiled patterns          * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION | ||
|  | pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes, | ||
|  |    uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, | ||
|  |    pcre2_general_context *gcontext) | ||
|  | { | ||
|  | uint8_t *bytes; | ||
|  | uint8_t *dst_bytes; | ||
|  | int32_t i; | ||
|  | PCRE2_SIZE total_size; | ||
|  | const pcre2_real_code *re; | ||
|  | const uint8_t *tables; | ||
|  | pcre2_serialized_data *data; | ||
|  | 
 | ||
|  | const pcre2_memctl *memctl = (gcontext != NULL) ? | ||
|  |   &gcontext->memctl : &PRIV(default_compile_context).memctl; | ||
|  | 
 | ||
|  | if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL) | ||
|  |   return PCRE2_ERROR_NULL; | ||
|  | 
 | ||
|  | if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; | ||
|  | 
 | ||
|  | /* Compute total size. */ | ||
|  | total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH; | ||
|  | tables = NULL; | ||
|  | 
 | ||
|  | for (i = 0; i < number_of_codes; i++) | ||
|  |   { | ||
|  |   if (codes[i] == NULL) return PCRE2_ERROR_NULL; | ||
|  |   re = (const pcre2_real_code *)(codes[i]); | ||
|  |   if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; | ||
|  |   if (tables == NULL) | ||
|  |     tables = re->tables; | ||
|  |   else if (tables != re->tables) | ||
|  |     return PCRE2_ERROR_MIXEDTABLES; | ||
|  |   total_size += re->blocksize; | ||
|  |   } | ||
|  | 
 | ||
|  | /* Initialize the byte stream. */ | ||
|  | bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data); | ||
|  | if (bytes == NULL) return PCRE2_ERROR_NOMEMORY; | ||
|  | 
 | ||
|  | /* The controller is stored as a hidden parameter. */ | ||
|  | memcpy(bytes, memctl, sizeof(pcre2_memctl)); | ||
|  | bytes += sizeof(pcre2_memctl); | ||
|  | 
 | ||
|  | data = (pcre2_serialized_data *)bytes; | ||
|  | data->magic = SERIALIZED_DATA_MAGIC; | ||
|  | data->version = SERIALIZED_DATA_VERSION; | ||
|  | data->config = SERIALIZED_DATA_CONFIG; | ||
|  | data->number_of_codes = number_of_codes; | ||
|  | 
 | ||
|  | /* Copy all compiled code data. */ | ||
|  | dst_bytes = bytes + sizeof(pcre2_serialized_data); | ||
|  | memcpy(dst_bytes, tables, TABLES_LENGTH); | ||
|  | dst_bytes += TABLES_LENGTH; | ||
|  | 
 | ||
|  | for (i = 0; i < number_of_codes; i++) | ||
|  |   { | ||
|  |   re = (const pcre2_real_code *)(codes[i]); | ||
|  |   (void)memcpy(dst_bytes, (char *)re, re->blocksize); | ||
|  |    | ||
|  |   /* Certain fields in the compiled code block are re-set during 
 | ||
|  |   deserialization. In order to ensure that the serialized data stream is always  | ||
|  |   the same for the same pattern, set them to zero here. We can't assume the  | ||
|  |   copy of the pattern is correctly aligned for accessing the fields as part of  | ||
|  |   a structure. Note the use of sizeof(void *) in the second of these, to | ||
|  |   specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a  | ||
|  |   pointer to uint8_t), gcc gives a warning because the first argument is also a  | ||
|  |   pointer to uint8_t. Casting the first argument to (void *) can stop this, but  | ||
|  |   it didn't stop Coverity giving the same complaint. */ | ||
|  |    | ||
|  |   (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,  | ||
|  |     sizeof(pcre2_memctl)); | ||
|  |   (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,  | ||
|  |     sizeof(void *)); | ||
|  |   (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, | ||
|  |     sizeof(void *));         | ||
|  |   | ||
|  |   dst_bytes += re->blocksize; | ||
|  |   } | ||
|  | 
 | ||
|  | *serialized_bytes = bytes; | ||
|  | *serialized_size = total_size; | ||
|  | return number_of_codes; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *          Deserialize compiled patterns         * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION | ||
|  | pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes, | ||
|  |    const uint8_t *bytes, pcre2_general_context *gcontext) | ||
|  | { | ||
|  | const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; | ||
|  | const pcre2_memctl *memctl = (gcontext != NULL) ? | ||
|  |   &gcontext->memctl : &PRIV(default_compile_context).memctl; | ||
|  | 
 | ||
|  | const uint8_t *src_bytes; | ||
|  | pcre2_real_code *dst_re; | ||
|  | uint8_t *tables; | ||
|  | int32_t i, j; | ||
|  | 
 | ||
|  | /* Sanity checks. */ | ||
|  | 
 | ||
|  | if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL; | ||
|  | if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; | ||
|  | if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA; | ||
|  | if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; | ||
|  | if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; | ||
|  | if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; | ||
|  | 
 | ||
|  | if (number_of_codes > data->number_of_codes) | ||
|  |   number_of_codes = data->number_of_codes; | ||
|  | 
 | ||
|  | src_bytes = bytes + sizeof(pcre2_serialized_data); | ||
|  | 
 | ||
|  | /* Decode tables. The reference count for the tables is stored immediately
 | ||
|  | following them. */ | ||
|  | 
 | ||
|  | tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data); | ||
|  | if (tables == NULL) return PCRE2_ERROR_NOMEMORY; | ||
|  | 
 | ||
|  | memcpy(tables, src_bytes, TABLES_LENGTH); | ||
|  | *(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes; | ||
|  | src_bytes += TABLES_LENGTH; | ||
|  | 
 | ||
|  | /* Decode the byte stream. We must not try to read the size from the compiled
 | ||
|  | code block in the stream, because it might be unaligned, which causes errors on | ||
|  | hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type | ||
|  | of the blocksize field is given its own name to ensure that it is the same here | ||
|  | as in the block. */ | ||
|  | 
 | ||
|  | for (i = 0; i < number_of_codes; i++) | ||
|  |   { | ||
|  |   CODE_BLOCKSIZE_TYPE blocksize; | ||
|  |   memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize), | ||
|  |     sizeof(CODE_BLOCKSIZE_TYPE)); | ||
|  |   if (blocksize <= sizeof(pcre2_real_code)) | ||
|  |     return PCRE2_ERROR_BADSERIALIZEDDATA; | ||
|  | 
 | ||
|  |   /* The allocator provided by gcontext replaces the original one. */ | ||
|  | 
 | ||
|  |   dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, | ||
|  |     (pcre2_memctl *)gcontext); | ||
|  |   if (dst_re == NULL) | ||
|  |     { | ||
|  |     memctl->free(tables, memctl->memory_data); | ||
|  |     for (j = 0; j < i; j++) | ||
|  |       { | ||
|  |       memctl->free(codes[j], memctl->memory_data); | ||
|  |       codes[j] = NULL; | ||
|  |       } | ||
|  |     return PCRE2_ERROR_NOMEMORY; | ||
|  |     } | ||
|  | 
 | ||
|  |   /* The new allocator must be preserved. */ | ||
|  | 
 | ||
|  |   memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl), | ||
|  |     src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl)); | ||
|  |   if (dst_re->magic_number != MAGIC_NUMBER || | ||
|  |       dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || | ||
|  |       dst_re->name_count > MAX_NAME_COUNT) | ||
|  |     {    | ||
|  |     memctl->free(dst_re, memctl->memory_data);  | ||
|  |     return PCRE2_ERROR_BADSERIALIZEDDATA; | ||
|  |     }  | ||
|  | 
 | ||
|  |   /* At the moment only one table is supported. */ | ||
|  | 
 | ||
|  |   dst_re->tables = tables; | ||
|  |   dst_re->executable_jit = NULL; | ||
|  |   dst_re->flags |= PCRE2_DEREF_TABLES; | ||
|  | 
 | ||
|  |   codes[i] = dst_re; | ||
|  |   src_bytes += blocksize; | ||
|  |   } | ||
|  | 
 | ||
|  | return number_of_codes; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *    Get the number of serialized patterns       * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION | ||
|  | pcre2_serialize_get_number_of_codes(const uint8_t *bytes) | ||
|  | { | ||
|  | const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; | ||
|  | 
 | ||
|  | if (data == NULL) return PCRE2_ERROR_NULL; | ||
|  | if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; | ||
|  | if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; | ||
|  | if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; | ||
|  | 
 | ||
|  | return data->number_of_codes; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | /*************************************************
 | ||
|  | *            Free the allocated stream           * | ||
|  | *************************************************/ | ||
|  | 
 | ||
|  | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION | ||
|  | pcre2_serialize_free(uint8_t *bytes) | ||
|  | { | ||
|  | if (bytes != NULL) | ||
|  |   { | ||
|  |   pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl)); | ||
|  |   memctl->free(memctl, memctl->memory_data); | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | /* End of pcre2_serialize.c */ |