mirror of
https://github.com/google/pebble.git
synced 2025-05-28 14:03:12 +00:00
Import of the watch repository from Pebble
This commit is contained in:
commit
3b92768480
10334 changed files with 2564465 additions and 0 deletions
514
third_party/jerryscript/jerry-core/lit/lit-char-helpers.c
vendored
Normal file
514
third_party/jerryscript/jerry-core/lit/lit-char-helpers.c
vendored
Normal file
|
@ -0,0 +1,514 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "lit-char-helpers.h"
|
||||
#include "lit/lit-unicode-ranges.inc.h"
|
||||
#include "lit-strings.h"
|
||||
|
||||
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
||||
|
||||
/**
|
||||
* Binary search algorithm that searches the a
|
||||
* character in the given char array.
|
||||
*
|
||||
* @return true - if the character is in the given array
|
||||
* false - otherwise
|
||||
*/
|
||||
static bool
|
||||
search_char_in_char_array (ecma_char_t c, /**< code unit */
|
||||
const ecma_char_t *array, /**< array */
|
||||
int size_of_array) /**< length of the array */
|
||||
{
|
||||
int bottom = 0;
|
||||
int top = size_of_array - 1;
|
||||
|
||||
while (bottom <= top)
|
||||
{
|
||||
int middle = (bottom + top) / 2;
|
||||
ecma_char_t current = array[middle];
|
||||
|
||||
if (current == c)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (c < current)
|
||||
{
|
||||
top = middle - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
bottom = middle + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} /* search_char_in_char_array */
|
||||
|
||||
/**
|
||||
* Binary search algorithm that searches a character in the given intervals.
|
||||
* Intervals specifed by two arrays. The first one contains the starting points
|
||||
* of the intervals, the second one contains the length of them.
|
||||
*
|
||||
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
|
||||
* false - otherwise
|
||||
*/
|
||||
static bool
|
||||
search_char_in_interval_array (ecma_char_t c, /**< code unit */
|
||||
const ecma_char_t *array_sp, /**< array of interval starting points */
|
||||
const uint8_t *lengths, /**< array of interval lengths */
|
||||
int size_of_array) /**< length of the array */
|
||||
{
|
||||
int bottom = 0;
|
||||
int top = size_of_array - 1;
|
||||
|
||||
while (bottom <= top)
|
||||
{
|
||||
int middle = (bottom + top) / 2;
|
||||
ecma_char_t current_sp = array_sp[middle];
|
||||
|
||||
if (current_sp <= c && c <= current_sp + lengths[middle])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (c > current_sp)
|
||||
{
|
||||
bottom = middle + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
top = middle - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} /* search_char_in_interval_array */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of the Format-Control characters
|
||||
*
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 1,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_format_control (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c == LIT_CHAR_ZWNJ
|
||||
|| c == LIT_CHAR_ZWJ
|
||||
|| c == LIT_CHAR_BOM);
|
||||
} /* lit_char_is_format_control */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of the Whitespace characters including those
|
||||
* that fall into "Space, Separator" ("Zs") Unicode character category.
|
||||
*
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return (c == LIT_CHAR_TAB
|
||||
|| c == LIT_CHAR_VTAB
|
||||
|| c == LIT_CHAR_FF
|
||||
|| c == LIT_CHAR_SP);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (c == LIT_CHAR_NBSP
|
||||
|| c == LIT_CHAR_BOM
|
||||
|| (c >= unicode_separator_char_interv_sps[0]
|
||||
&& c <= unicode_separator_char_interv_sps[0] + unicode_separator_char_interv_lens[0])
|
||||
|| search_char_in_char_array (c, unicode_separator_chars, NUM_OF_ELEMENTS (unicode_separator_chars)));
|
||||
}
|
||||
} /* lit_char_is_white_space */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of LineTerminator characters
|
||||
*
|
||||
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c == LIT_CHAR_LF
|
||||
|| c == LIT_CHAR_CR
|
||||
|| c == LIT_CHAR_LS
|
||||
|| c == LIT_CHAR_PS);
|
||||
} /* lit_char_is_line_terminator */
|
||||
|
||||
/**
|
||||
* Check if specified character is a unicode letter
|
||||
*
|
||||
* Note:
|
||||
* Unicode letter is a character, included into one of the following categories:
|
||||
* - Uppercase letter (Lu);
|
||||
* - Lowercase letter (Ll);
|
||||
* - Titlecase letter (Lt);
|
||||
* - Modifier letter (Lm);
|
||||
* - Other letter (Lo);
|
||||
* - Letter number (Nl).
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into one of the listed categories,
|
||||
* false - otherwise.
|
||||
*/
|
||||
static bool
|
||||
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (search_char_in_interval_array (c, unicode_letter_interv_sps, unicode_letter_interv_lens,
|
||||
NUM_OF_ELEMENTS (unicode_letter_interv_sps))
|
||||
|| search_char_in_char_array (c, unicode_letter_chars, NUM_OF_ELEMENTS (unicode_letter_chars)));
|
||||
} /* lit_char_is_unicode_letter */
|
||||
|
||||
/**
|
||||
* Check if specified character is a non-letter character and can be used as a
|
||||
* non-first character of an identifier.
|
||||
* These characters coverd by the following unicode categories:
|
||||
* - digit (Nd)
|
||||
* - punctuation mark (Mn, Mc)
|
||||
* - connector punctuation (Pc)
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v5, 7.6
|
||||
*
|
||||
* @return true - if specified character falls into one of the listed categories,
|
||||
* false - otherwise.
|
||||
*/
|
||||
static bool
|
||||
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (search_char_in_interval_array (c, unicode_non_letter_ident_part_interv_sps,
|
||||
unicode_non_letter_ident_part_interv_lens,
|
||||
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_interv_sps))
|
||||
|| search_char_in_char_array (c, unicode_non_letter_ident_part_chars,
|
||||
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_chars)));
|
||||
} /* lit_char_is_unicode_non_letter_ident_part */
|
||||
|
||||
/**
|
||||
* Checks whether the next UTF8 character is a valid identifier start.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_start (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
||||
{
|
||||
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return lit_char_is_identifier_start_character (*src_p);
|
||||
}
|
||||
|
||||
return lit_char_is_identifier_start_character (lit_utf8_peek_next (src_p));
|
||||
} /* lit_char_is_identifier_start */
|
||||
|
||||
/**
|
||||
* Checks whether the character is a valid identifier start.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_start_character (uint16_t chr) /**< EcmaScript character */
|
||||
{
|
||||
// Fast path for ASCII-defined letters
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| chr == LIT_CHAR_DOLLAR_SIGN
|
||||
|| chr == LIT_CHAR_UNDERSCORE);
|
||||
}
|
||||
|
||||
return lit_char_is_unicode_letter (chr);
|
||||
} /* lit_char_is_identifier_start_character */
|
||||
|
||||
/**
|
||||
* Checks whether the next UTF8 character is a valid identifier part.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
||||
{
|
||||
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return lit_char_is_identifier_part_character (*src_p);
|
||||
}
|
||||
|
||||
return lit_char_is_identifier_part_character (lit_utf8_peek_next (src_p));
|
||||
} /* lit_char_is_identifier_part */
|
||||
|
||||
/**
|
||||
* Checks whether the character is a valid identifier part.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
|
||||
{
|
||||
// Fast path for ASCII-defined letters
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| (chr >= LIT_CHAR_0 && chr <= LIT_CHAR_9)
|
||||
|| chr == LIT_CHAR_DOLLAR_SIGN
|
||||
|| chr == LIT_CHAR_UNDERSCORE);
|
||||
}
|
||||
|
||||
return (lit_char_is_unicode_letter (chr)
|
||||
|| lit_char_is_unicode_non_letter_ident_part (chr));
|
||||
} /* lit_char_is_identifier_part_character */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
|
||||
*
|
||||
* @return true / false
|
||||
*/
|
||||
bool
|
||||
lit_char_is_octal_digit (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c >= LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_OCTAL_DIGITS_END);
|
||||
} /* lit_char_is_octal_digit */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of DecimalDigit characters (ECMA-262 v5, 7.8.3)
|
||||
*
|
||||
* @return true / false
|
||||
*/
|
||||
bool
|
||||
lit_char_is_decimal_digit (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END);
|
||||
} /* lit_char_is_decimal_digit */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of HexDigit characters (ECMA-262 v5, 7.8.3)
|
||||
*
|
||||
* @return true / false
|
||||
*/
|
||||
bool
|
||||
lit_char_is_hex_digit (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return ((c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
|| (LEXER_TO_ASCII_LOWERCASE (c) >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
|
||||
&& LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END));
|
||||
} /* lit_char_is_hex_digit */
|
||||
|
||||
/**
|
||||
* Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
|
||||
*
|
||||
* @return digit value, corresponding to the hex char
|
||||
*/
|
||||
uint32_t
|
||||
lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
|
||||
* one of HexDigit characters */
|
||||
{
|
||||
JERRY_ASSERT (lit_char_is_hex_digit (c));
|
||||
|
||||
if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
{
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
||||
}
|
||||
else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
|
||||
}
|
||||
} /* lit_char_hex_to_int */
|
||||
|
||||
/**
|
||||
* Converts a character to UTF8 bytes.
|
||||
*
|
||||
* @return length of the UTF8 representation.
|
||||
*/
|
||||
size_t
|
||||
lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
|
||||
ecma_char_t chr) /**< EcmaScript character */
|
||||
{
|
||||
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
||||
{
|
||||
/* 00000000 0xxxxxxx -> 0xxxxxxx */
|
||||
*dst_p = (uint8_t) chr;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
||||
{
|
||||
/* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
return 2;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (!(chr & ~LIT_UTF8_3_BYTE_CODE_POINT_MAX));
|
||||
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((chr >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
return 3;
|
||||
} /* lit_char_to_utf8_bytes */
|
||||
|
||||
/**
|
||||
* Returns the length of the UTF8 representation of a character.
|
||||
*
|
||||
* @return length of the UTF8 representation.
|
||||
*/
|
||||
size_t
|
||||
lit_char_get_utf8_length (ecma_char_t chr) /**< EcmaScript character */
|
||||
{
|
||||
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
||||
{
|
||||
/* 00000000 0xxxxxxx */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
||||
{
|
||||
/* 00000yyy yyxxxxxx */
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* zzzzyyyy yyxxxxxx */
|
||||
JERRY_ASSERT (!(chr & ~LIT_UTF8_3_BYTE_CODE_POINT_MAX));
|
||||
return 3;
|
||||
} /* lit_char_get_utf8_length */
|
||||
|
||||
/**
|
||||
* Parse the next number_of_characters hexadecimal character,
|
||||
* and construct a code unit from them. The buffer must
|
||||
* be zero terminated.
|
||||
*
|
||||
* @return true if decoding was successful, false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
|
||||
ecma_char_ptr_t out_code_unit_p) /**< [out] decoded result */
|
||||
{
|
||||
ecma_char_t code_unit = LIT_CHAR_NULL;
|
||||
|
||||
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
|
||||
|
||||
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
|
||||
{
|
||||
code_unit = (ecma_char_t) (code_unit << 4u);
|
||||
|
||||
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
||||
}
|
||||
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
|
||||
}
|
||||
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
|
||||
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
|
||||
{
|
||||
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
buf_p++;
|
||||
}
|
||||
|
||||
*out_code_unit_p = code_unit;
|
||||
return true;
|
||||
} /* lit_read_code_unit_from_hex */
|
||||
|
||||
/**
|
||||
* Check if specified character is a word character (part of IsWordChar abstract operation)
|
||||
*
|
||||
* See also: ECMA-262 v5, 15.10.2.6 (IsWordChar)
|
||||
*
|
||||
* @return true - if the character is a word character
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
||||
{
|
||||
return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
||||
|| c == LIT_CHAR_UNDERSCORE);
|
||||
} /* lit_char_is_word_char */
|
||||
|
||||
/**
|
||||
* Returns the lowercase character sequence of an ecma character.
|
||||
*
|
||||
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
||||
*
|
||||
* @return the length of the lowercase character sequence
|
||||
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
||||
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
||||
ecma_length_t buffer_size) /**< buffer size */
|
||||
{
|
||||
/* TODO: Needs a proper lower case implementation. See issue #323. */
|
||||
|
||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
|
||||
{
|
||||
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||
return 1;
|
||||
}
|
||||
|
||||
output_buffer_p[0] = character;
|
||||
return 1;
|
||||
} /* lit_char_to_lower_case */
|
||||
|
||||
/**
|
||||
* Returns the uppercase character sequence of an ecma character.
|
||||
*
|
||||
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
||||
*
|
||||
* @return the length of the uppercase character sequence
|
||||
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
||||
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
||||
ecma_length_t buffer_size) /**< buffer size */
|
||||
{
|
||||
/* TODO: Needs a proper upper case implementation. See issue #323. */
|
||||
|
||||
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
|
||||
{
|
||||
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||
return 1;
|
||||
}
|
||||
|
||||
output_buffer_p[0] = character;
|
||||
return 1;
|
||||
} /* lit_char_to_upper_case */
|
249
third_party/jerryscript/jerry-core/lit/lit-char-helpers.h
vendored
Normal file
249
third_party/jerryscript/jerry-core/lit/lit-char-helpers.h
vendored
Normal file
|
@ -0,0 +1,249 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LIT_CHAR_HELPERS_H
|
||||
#define LIT_CHAR_HELPERS_H
|
||||
|
||||
#include "lit-globals.h"
|
||||
|
||||
#define LIT_CHAR_UNDEF ((ecma_char_t) 0xFFFF) /* undefined character */
|
||||
|
||||
/*
|
||||
* Format control characters (ECMA-262 v5, Table 1)
|
||||
*/
|
||||
#define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */
|
||||
#define LIT_CHAR_ZWJ ((ecma_char_t) 0x200D) /* zero width joiner */
|
||||
#define LIT_CHAR_BOM ((ecma_char_t) 0xFEFF) /* byte order mark */
|
||||
|
||||
extern bool lit_char_is_format_control (ecma_char_t);
|
||||
|
||||
/*
|
||||
* Whitespace characters (ECMA-262 v5, Table 2)
|
||||
*/
|
||||
#define LIT_CHAR_TAB ((ecma_char_t) 0x0009) /* tab */
|
||||
#define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */
|
||||
#define LIT_CHAR_FF ((ecma_char_t) 0x000C) /* form feed */
|
||||
#define LIT_CHAR_SP ((ecma_char_t) 0x0020) /* space */
|
||||
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
|
||||
/* LIT_CHAR_BOM is defined above */
|
||||
|
||||
extern bool lit_char_is_white_space (ecma_char_t);
|
||||
|
||||
/*
|
||||
* Line terminator characters (ECMA-262 v5, Table 3)
|
||||
*/
|
||||
#define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */
|
||||
#define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */
|
||||
#define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */
|
||||
#define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */
|
||||
|
||||
extern bool lit_char_is_line_terminator (ecma_char_t);
|
||||
|
||||
/*
|
||||
* String Single Character Escape Sequences (ECMA-262 v5, Table 4)
|
||||
*/
|
||||
#define LIT_CHAR_BS ((ecma_char_t) 0x0008) /* backspace */
|
||||
/* LIT_CHAR_TAB is defined above */
|
||||
/* LIT_CHAR_LF is defined above */
|
||||
/* LIT_CHAR_VTAB is defined above */
|
||||
/* LIT_CHAR_FF is defined above */
|
||||
/* LIT_CHAR_CR is defined above */
|
||||
#define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */
|
||||
#define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */
|
||||
#define LIT_CHAR_BACKSLASH ((ecma_char_t) '\\') /* reverse solidus (backslash) */
|
||||
|
||||
/*
|
||||
* Comment characters (ECMA-262 v5, 7.4)
|
||||
*/
|
||||
#define LIT_CHAR_SLASH ((ecma_char_t) '/') /* solidus */
|
||||
#define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */
|
||||
|
||||
/*
|
||||
* Identifier name characters (ECMA-262 v5, 7.6)
|
||||
*/
|
||||
#define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$') /* dollar sign */
|
||||
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
|
||||
/* LIT_CHAR_BACKSLASH defined above */
|
||||
|
||||
extern bool lit_char_is_identifier_start (const uint8_t *);
|
||||
extern bool lit_char_is_identifier_part (const uint8_t *);
|
||||
extern bool lit_char_is_identifier_start_character (ecma_char_t);
|
||||
extern bool lit_char_is_identifier_part_character (ecma_char_t);
|
||||
|
||||
/*
|
||||
* Punctuator characters (ECMA-262 v5, 7.7)
|
||||
*/
|
||||
#define LIT_CHAR_LEFT_BRACE ((ecma_char_t) '{') /* left curly bracket */
|
||||
#define LIT_CHAR_RIGHT_BRACE ((ecma_char_t) '}') /* right curly bracket */
|
||||
#define LIT_CHAR_LEFT_PAREN ((ecma_char_t) '(') /* left parenthesis */
|
||||
#define LIT_CHAR_RIGHT_PAREN ((ecma_char_t) ')') /* right parenthesis */
|
||||
#define LIT_CHAR_LEFT_SQUARE ((ecma_char_t) '[') /* left square bracket */
|
||||
#define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */
|
||||
#define LIT_CHAR_DOT ((ecma_char_t) '.') /* dot */
|
||||
#define LIT_CHAR_SEMICOLON ((ecma_char_t) ';') /* semicolon */
|
||||
#define LIT_CHAR_COMMA ((ecma_char_t) ',') /* comma */
|
||||
#define LIT_CHAR_LESS_THAN ((ecma_char_t) '<') /* less-than sign */
|
||||
#define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */
|
||||
#define LIT_CHAR_EQUALS ((ecma_char_t) '=') /* equals sign */
|
||||
#define LIT_CHAR_PLUS ((ecma_char_t) '+') /* plus sign */
|
||||
#define LIT_CHAR_MINUS ((ecma_char_t) '-') /* hyphen-minus */
|
||||
/* LIT_CHAR_ASTERISK is defined above */
|
||||
#define LIT_CHAR_PERCENT ((ecma_char_t) '%') /* percent sign */
|
||||
#define LIT_CHAR_AMPERSAND ((ecma_char_t) '&') /* ampersand */
|
||||
#define LIT_CHAR_VLINE ((ecma_char_t) '|') /* vertical line */
|
||||
#define LIT_CHAR_CIRCUMFLEX ((ecma_char_t) '^') /* circumflex accent */
|
||||
#define LIT_CHAR_EXCLAMATION ((ecma_char_t) '!') /* exclamation mark */
|
||||
#define LIT_CHAR_TILDE ((ecma_char_t) '~') /* tilde */
|
||||
#define LIT_CHAR_QUESTION ((ecma_char_t) '?') /* question mark */
|
||||
#define LIT_CHAR_COLON ((ecma_char_t) ':') /* colon */
|
||||
|
||||
/*
|
||||
* Special characters for String.prototype.replace.
|
||||
*/
|
||||
#define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */
|
||||
|
||||
/**
|
||||
* Uppercase ASCII letters
|
||||
*/
|
||||
#define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A')
|
||||
#define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B')
|
||||
#define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C')
|
||||
#define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D')
|
||||
#define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E')
|
||||
#define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F')
|
||||
#define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G')
|
||||
#define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H')
|
||||
#define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I')
|
||||
#define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J')
|
||||
#define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K')
|
||||
#define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L')
|
||||
#define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M')
|
||||
#define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N')
|
||||
#define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O')
|
||||
#define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P')
|
||||
#define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q')
|
||||
#define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R')
|
||||
#define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S')
|
||||
#define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T')
|
||||
#define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U')
|
||||
#define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V')
|
||||
#define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W')
|
||||
#define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X')
|
||||
#define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y')
|
||||
#define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z')
|
||||
|
||||
/**
|
||||
* Lowercase ASCII letters
|
||||
*/
|
||||
#define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a')
|
||||
#define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b')
|
||||
#define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c')
|
||||
#define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd')
|
||||
#define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e')
|
||||
#define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f')
|
||||
#define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g')
|
||||
#define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h')
|
||||
#define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i')
|
||||
#define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j')
|
||||
#define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k')
|
||||
#define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l')
|
||||
#define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm')
|
||||
#define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n')
|
||||
#define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o')
|
||||
#define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p')
|
||||
#define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q')
|
||||
#define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r')
|
||||
#define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's')
|
||||
#define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't')
|
||||
#define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u')
|
||||
#define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v')
|
||||
#define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w')
|
||||
#define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x')
|
||||
#define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y')
|
||||
#define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z')
|
||||
|
||||
/**
|
||||
* ASCII decimal digits
|
||||
*/
|
||||
#define LIT_CHAR_0 ((ecma_char_t) '0')
|
||||
#define LIT_CHAR_1 ((ecma_char_t) '1')
|
||||
#define LIT_CHAR_2 ((ecma_char_t) '2')
|
||||
#define LIT_CHAR_3 ((ecma_char_t) '3')
|
||||
#define LIT_CHAR_4 ((ecma_char_t) '4')
|
||||
#define LIT_CHAR_5 ((ecma_char_t) '5')
|
||||
#define LIT_CHAR_6 ((ecma_char_t) '6')
|
||||
#define LIT_CHAR_7 ((ecma_char_t) '7')
|
||||
#define LIT_CHAR_8 ((ecma_char_t) '8')
|
||||
#define LIT_CHAR_9 ((ecma_char_t) '9')
|
||||
|
||||
/**
|
||||
* ASCII character ranges
|
||||
*/
|
||||
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters range */
|
||||
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END LIT_CHAR_UPPERCASE_Z
|
||||
|
||||
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters range */
|
||||
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END LIT_CHAR_LOWERCASE_Z
|
||||
|
||||
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters for
|
||||
* hexadecimal digits range */
|
||||
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END LIT_CHAR_UPPERCASE_F
|
||||
|
||||
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters for
|
||||
* hexadecimal digits range */
|
||||
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END LIT_CHAR_LOWERCASE_F
|
||||
|
||||
#define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN LIT_CHAR_0 /* octal digits range */
|
||||
#define LIT_CHAR_ASCII_OCTAL_DIGITS_END LIT_CHAR_7
|
||||
|
||||
#define LIT_CHAR_ASCII_DIGITS_BEGIN LIT_CHAR_0 /* decimal digits range */
|
||||
#define LIT_CHAR_ASCII_DIGITS_END LIT_CHAR_9
|
||||
|
||||
#define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP)
|
||||
|
||||
extern bool lit_char_is_octal_digit (ecma_char_t);
|
||||
extern bool lit_char_is_decimal_digit (ecma_char_t);
|
||||
extern bool lit_char_is_hex_digit (ecma_char_t);
|
||||
extern uint32_t lit_char_hex_to_int (ecma_char_t);
|
||||
extern size_t lit_char_to_utf8_bytes (uint8_t *, ecma_char_t);
|
||||
extern size_t lit_char_get_utf8_length (ecma_char_t);
|
||||
|
||||
/* read a hex encoded code point from a zero terminated buffer */
|
||||
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t);
|
||||
|
||||
/**
|
||||
* Null character
|
||||
*/
|
||||
#define LIT_CHAR_NULL ((ecma_char_t) '\0')
|
||||
|
||||
/*
|
||||
* Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
|
||||
*/
|
||||
extern bool lit_char_is_word_char (ecma_char_t);
|
||||
|
||||
/*
|
||||
* Utility functions for uppercasing / lowercasing
|
||||
*/
|
||||
|
||||
/**
|
||||
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
|
||||
*/
|
||||
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
|
||||
|
||||
ecma_length_t lit_char_to_lower_case (ecma_char_t, ecma_char_t *, ecma_length_t);
|
||||
ecma_length_t lit_char_to_upper_case (ecma_char_t, ecma_char_t *, ecma_length_t);
|
||||
|
||||
#endif /* !LIT_CHAR_HELPERS_H */
|
147
third_party/jerryscript/jerry-core/lit/lit-globals.h
vendored
Normal file
147
third_party/jerryscript/jerry-core/lit/lit-globals.h
vendored
Normal file
|
@ -0,0 +1,147 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LIT_GLOBALS_H
|
||||
#define LIT_GLOBALS_H
|
||||
|
||||
#include "jrt.h"
|
||||
|
||||
/**
|
||||
* ECMAScript standard defines terms "code unit" and "character" as 16-bit unsigned value
|
||||
* used to represent 16-bit unit of text, this is the same as code unit in UTF-16 (See ECMA-262 5.1 Chapter 6).
|
||||
*
|
||||
* The term "code point" or "Unicode character" is used to refer a single Unicode scalar value (may be longer
|
||||
* than 16 bits: 0x0 - 0x10FFFFF). One code point could be represented with one ore two 16-bit code units.
|
||||
*
|
||||
* According to the standard all strings and source text are assumed to be a sequence of code units.
|
||||
* Length of a string equals to number of code units in the string, which is not the same as number of Unicode
|
||||
* characters in a string.
|
||||
*
|
||||
* Internally JerryScript engine uses UTF-8 representation of strings to reduce memory overhead. Unicode character
|
||||
* occupies from one to four bytes in UTF-8 representation.
|
||||
*
|
||||
* Unicode scalar value | Bytes in UTF-8 | Bytes in UTF-16
|
||||
* | (internal representation) |
|
||||
* ----------------------------------------------------------------------
|
||||
* 0x0 - 0x7F | 1 byte | 2 bytes
|
||||
* 0x80 - 0x7FF | 2 bytes | 2 bytes
|
||||
* 0x800 - 0xFFFF | 3 bytes | 2 bytes
|
||||
* 0x10000 - 0x10FFFF | 4 bytes | 4 bytes
|
||||
*
|
||||
* Scalar values from 0xD800 to 0xDFFF are permanently reserved by Unicode standard to encode high and low
|
||||
* surrogates in UTF-16 (Code points 0x10000 - 0x10FFFF are encoded via pair of surrogates in UTF-16).
|
||||
* Despite that the official Unicode standard says that no UTF forms can encode these code points, we allow
|
||||
* them to be encoded inside strings. The reason for that is compatibility with ECMA standard.
|
||||
*
|
||||
* For example, assume a string which consists one Unicode character: 0x1D700 (Mathematical Italic Small Epsilon).
|
||||
* It has the following representation in UTF-16: 0xD835 0xDF00.
|
||||
*
|
||||
* ECMA standard allows extracting a substring from this string:
|
||||
* > var str = String.fromCharCode (0xD835, 0xDF00); // Create a string containing one character: 0x1D700
|
||||
* > str.length; // 2
|
||||
* > var str1 = str.substring (0, 1);
|
||||
* > str1.length; // 1
|
||||
* > str1.charCodeAt (0); // 55349 (this equals to 0xD835)
|
||||
*
|
||||
* Internally original string would be represented in UTF-8 as the following byte sequence: 0xF0 0x9D 0x9C 0x80.
|
||||
* After substring extraction high surrogate 0xD835 should be encoded via UTF-8: 0xED 0xA0 0xB5.
|
||||
*
|
||||
* Pair of low and high surrogates encoded separately should never occur in internal string representation,
|
||||
* it should be encoded as any code point and occupy 4 bytes. So, when constructing a string from two surrogates,
|
||||
* it should be processed gracefully;
|
||||
* > var str1 = String.fromCharCode (0xD835); // 0xED 0xA0 0xB5 - internal representation
|
||||
* > var str2 = String.fromCharCode (0xDF00); // 0xED 0xBC 0x80 - internal representation
|
||||
* > var str = str1 + str2; // 0xF0 0x9D 0x9C 0x80 - internal representation,
|
||||
* // !!! not 0xED 0xA0 0xB5 0xED 0xBC 0x80
|
||||
*/
|
||||
|
||||
/**
|
||||
* Description of an ecma-character, which represents 16-bit code unit,
|
||||
* which is equal to UTF-16 character (see Chapter 6 from ECMA-262 5.1)
|
||||
*/
|
||||
typedef uint16_t ecma_char_t;
|
||||
|
||||
/**
|
||||
* Description of a collection's/string's length
|
||||
*/
|
||||
typedef uint32_t ecma_length_t;
|
||||
|
||||
/**
|
||||
* Description of an ecma-character pointer
|
||||
*/
|
||||
typedef ecma_char_t *ecma_char_ptr_t;
|
||||
|
||||
/**
|
||||
* Max bytes needed to represent a code unit (utf-16 char) via utf-8 encoding
|
||||
*/
|
||||
#define LIT_UTF8_MAX_BYTES_IN_CODE_UNIT (3)
|
||||
|
||||
/**
|
||||
* Max bytes needed to represent a code point (Unicode character) via utf-8 encoding
|
||||
*/
|
||||
#define LIT_UTF8_MAX_BYTES_IN_CODE_POINT (4)
|
||||
|
||||
/**
|
||||
* Max bytes needed to represent a code unit (utf-16 char) via cesu-8 encoding
|
||||
*/
|
||||
#define LIT_CESU8_MAX_BYTES_IN_CODE_UNIT (3)
|
||||
|
||||
/**
|
||||
* Max bytes needed to represent a code point (Unicode character) via cesu-8 encoding
|
||||
*/
|
||||
#define LIT_CESU8_MAX_BYTES_IN_CODE_POINT (6)
|
||||
|
||||
/**
|
||||
* A byte of utf-8 string
|
||||
*/
|
||||
typedef uint8_t lit_utf8_byte_t;
|
||||
|
||||
/**
|
||||
* Size of a utf-8 string in bytes
|
||||
*/
|
||||
typedef uint32_t lit_utf8_size_t;
|
||||
|
||||
/**
|
||||
* Size of a magic string in bytes
|
||||
*/
|
||||
typedef uint8_t lit_magic_size_t;
|
||||
|
||||
/**
|
||||
* Unicode code point
|
||||
*/
|
||||
typedef uint32_t lit_code_point_t;
|
||||
|
||||
/**
|
||||
* ECMA string hash
|
||||
*/
|
||||
typedef uint16_t lit_string_hash_t;
|
||||
|
||||
/**
|
||||
* Maximum value of ECMA string hash + 1
|
||||
*
|
||||
* Note:
|
||||
* On ARM, this constant can be encoded as an immediate value
|
||||
* while 0xffffu cannot be. Hence using this constant reduces
|
||||
* binary size and improves performance.
|
||||
*/
|
||||
#define LIT_STRING_HASH_LIMIT 0x10000u
|
||||
|
||||
/**
|
||||
* Hash of the frequently used "length" string.
|
||||
*/
|
||||
#define LIT_STRING_LENGTH_HASH 0x3615u
|
||||
|
||||
#endif /* !LIT_GLOBALS_H */
|
261
third_party/jerryscript/jerry-core/lit/lit-magic-strings.c
vendored
Normal file
261
third_party/jerryscript/jerry-core/lit/lit-magic-strings.c
vendored
Normal file
|
@ -0,0 +1,261 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "jcontext.h"
|
||||
#include "lit-magic-strings.h"
|
||||
#include "lit-strings.h"
|
||||
|
||||
/**
|
||||
* Get number of external magic strings
|
||||
*
|
||||
* @return number of the strings, if there were registered,
|
||||
* zero - otherwise.
|
||||
*/
|
||||
uint32_t
|
||||
lit_get_magic_string_ex_count (void)
|
||||
{
|
||||
return JERRY_CONTEXT (lit_magic_string_ex_count);
|
||||
} /* lit_get_magic_string_ex_count */
|
||||
|
||||
/**
|
||||
* Get specified magic string as zero-terminated string
|
||||
*
|
||||
* @return pointer to zero-terminated magic string
|
||||
*/
|
||||
const lit_utf8_byte_t *
|
||||
lit_get_magic_string_utf8 (lit_magic_string_id_t id) /**< magic string id */
|
||||
{
|
||||
static const lit_utf8_byte_t * const magic_strings[] JERRY_CONST_DATA =
|
||||
{
|
||||
#define LIT_MAGIC_STRING_DEF(id, utf8_string) \
|
||||
(const lit_utf8_byte_t *) utf8_string,
|
||||
#include "lit-magic-strings.inc.h"
|
||||
#undef LIT_MAGIC_STRING_DEF
|
||||
};
|
||||
|
||||
JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
|
||||
|
||||
return magic_strings[id];
|
||||
} /* lit_get_magic_string_utf8 */
|
||||
|
||||
/**
|
||||
* Get size of specified magic string
|
||||
*
|
||||
* @return size in bytes
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_get_magic_string_size (lit_magic_string_id_t id) /**< magic string id */
|
||||
{
|
||||
static const lit_magic_size_t lit_magic_string_sizes[] JERRY_CONST_DATA =
|
||||
{
|
||||
#define LIT_MAGIC_STRING_DEF(id, utf8_string) \
|
||||
sizeof(utf8_string) - 1,
|
||||
#include "lit-magic-strings.inc.h"
|
||||
#undef LIT_MAGIC_STRING_DEF
|
||||
};
|
||||
|
||||
JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
|
||||
|
||||
return lit_magic_string_sizes[id];
|
||||
} /* lit_get_magic_string_size */
|
||||
|
||||
/**
|
||||
* Get specified magic string as zero-terminated string from external table
|
||||
*
|
||||
* @return pointer to zero-terminated magic string
|
||||
*/
|
||||
const lit_utf8_byte_t *
|
||||
lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t id) /**< extern magic string id */
|
||||
{
|
||||
if (JERRY_CONTEXT (lit_magic_string_ex_array) && id < JERRY_CONTEXT (lit_magic_string_ex_count))
|
||||
{
|
||||
return JERRY_CONTEXT (lit_magic_string_ex_array)[id];
|
||||
}
|
||||
|
||||
JERRY_UNREACHABLE ();
|
||||
} /* lit_get_magic_string_ex_utf8 */
|
||||
|
||||
/**
|
||||
* Get size of specified external magic string
|
||||
*
|
||||
* @return size in bytes
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_get_magic_string_ex_size (lit_magic_string_ex_id_t id) /**< external magic string id */
|
||||
{
|
||||
return JERRY_CONTEXT (lit_magic_string_ex_sizes)[id];
|
||||
} /* lit_get_magic_string_ex_size */
|
||||
|
||||
/**
|
||||
* Register external magic strings
|
||||
*/
|
||||
void
|
||||
lit_magic_strings_ex_set (const lit_utf8_byte_t **ex_str_items, /**< character arrays, representing
|
||||
* external magic strings' contents */
|
||||
uint32_t count, /**< number of the strings */
|
||||
const lit_utf8_size_t *ex_str_sizes) /**< sizes of the strings */
|
||||
{
|
||||
JERRY_ASSERT (ex_str_items != NULL);
|
||||
JERRY_ASSERT (count > 0);
|
||||
JERRY_ASSERT (ex_str_sizes != NULL);
|
||||
|
||||
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_array) == NULL);
|
||||
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_count) == 0);
|
||||
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes) == NULL);
|
||||
|
||||
/* Set external magic strings information */
|
||||
JERRY_CONTEXT (lit_magic_string_ex_array) = ex_str_items;
|
||||
JERRY_CONTEXT (lit_magic_string_ex_count) = count;
|
||||
JERRY_CONTEXT (lit_magic_string_ex_sizes) = ex_str_sizes;
|
||||
|
||||
#ifndef JERRY_NDEBUG
|
||||
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
|
||||
id < JERRY_CONTEXT (lit_magic_string_ex_count);
|
||||
id = (lit_magic_string_ex_id_t) (id + 1))
|
||||
{
|
||||
lit_utf8_size_t string_size = lit_zt_utf8_string_size (lit_get_magic_string_ex_utf8 (id));
|
||||
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes)[id] == string_size);
|
||||
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes)[id] <= LIT_MAGIC_STRING_LENGTH_LIMIT);
|
||||
}
|
||||
#endif /* !JERRY_NDEBUG */
|
||||
} /* lit_magic_strings_ex_set */
|
||||
|
||||
|
||||
/**
|
||||
* Check if passed cesu-8 string equals to one of magic strings
|
||||
* and if equal magic string was found, return it's id in 'out_id_p' argument.
|
||||
*
|
||||
* @return true - if magic string equal to passed string was found,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_is_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_id_t *out_id_p) /**< [out] magic string's id */
|
||||
{
|
||||
/* TODO: Improve performance of search */
|
||||
|
||||
for (lit_magic_string_id_t id = (lit_magic_string_id_t) 0;
|
||||
id < LIT_MAGIC_STRING__COUNT;
|
||||
id = (lit_magic_string_id_t) (id + 1))
|
||||
{
|
||||
if (lit_compare_utf8_string_and_magic_string (string_p, string_size, id))
|
||||
{
|
||||
*out_id_p = id;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
*out_id_p = LIT_MAGIC_STRING__COUNT;
|
||||
|
||||
return false;
|
||||
} /* lit_is_utf8_string_magic */
|
||||
|
||||
/**
|
||||
* Check if passed utf-8 string equals to one of external magic strings
|
||||
* and if equal magic string was found, return it's id in 'out_id_p' argument.
|
||||
*
|
||||
* @return true - if external magic string equal to passed string was found,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_ex_id_t *out_id_p) /**< [out] magic string's id */
|
||||
{
|
||||
/* TODO: Improve performance of search */
|
||||
|
||||
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
|
||||
id < JERRY_CONTEXT (lit_magic_string_ex_count);
|
||||
id = (lit_magic_string_ex_id_t) (id + 1))
|
||||
{
|
||||
if (lit_compare_utf8_string_and_magic_string_ex (string_p, string_size, id))
|
||||
{
|
||||
*out_id_p = id;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
*out_id_p = JERRY_CONTEXT (lit_magic_string_ex_count);
|
||||
|
||||
return false;
|
||||
} /* lit_is_ex_utf8_string_magic */
|
||||
|
||||
/**
|
||||
* Compare utf-8 string and magic string for equality
|
||||
*
|
||||
* @return true if strings are equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_id_t magic_string_id) /**< magic string's id */
|
||||
{
|
||||
return lit_compare_utf8_strings (string_p,
|
||||
string_size,
|
||||
lit_get_magic_string_utf8 (magic_string_id),
|
||||
lit_get_magic_string_size (magic_string_id));
|
||||
} /* lit_compare_utf8_string_and_magic_string */
|
||||
|
||||
/**
|
||||
* Compare utf-8 string and external magic string for equality
|
||||
*
|
||||
* @return true if strings are equal
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *string_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string_size, /**< string size in bytes */
|
||||
lit_magic_string_ex_id_t magic_string_ex_id) /**< external magic string's
|
||||
* id */
|
||||
{
|
||||
return lit_compare_utf8_strings (string_p,
|
||||
string_size,
|
||||
lit_get_magic_string_ex_utf8 (magic_string_ex_id),
|
||||
lit_get_magic_string_ex_size (magic_string_ex_id));
|
||||
} /* lit_compare_utf8_string_and_magic_string_ex */
|
||||
|
||||
/**
|
||||
* Copy magic string to buffer
|
||||
*
|
||||
* Warning:
|
||||
* the routine requires that buffer size is enough
|
||||
*
|
||||
* @return pointer to the byte next to the last copied in the buffer
|
||||
*/
|
||||
extern lit_utf8_byte_t *
|
||||
lit_copy_magic_string_to_buffer (lit_magic_string_id_t id, /**< magic string id */
|
||||
lit_utf8_byte_t *buffer_p, /**< destination buffer */
|
||||
lit_utf8_size_t buffer_size) /**< size of buffer */
|
||||
{
|
||||
const lit_utf8_byte_t *magic_string_bytes_p = lit_get_magic_string_utf8 (id);
|
||||
lit_utf8_size_t magic_string_bytes_count = lit_get_magic_string_size (id);
|
||||
|
||||
const lit_utf8_byte_t *str_iter_p = magic_string_bytes_p;
|
||||
lit_utf8_byte_t *buf_iter_p = buffer_p;
|
||||
lit_utf8_size_t bytes_copied = 0;
|
||||
|
||||
while (magic_string_bytes_count--)
|
||||
{
|
||||
bytes_copied ++;
|
||||
JERRY_ASSERT (bytes_copied <= buffer_size);
|
||||
|
||||
*buf_iter_p++ = *str_iter_p++;
|
||||
}
|
||||
|
||||
return buf_iter_p;
|
||||
} /* lit_copy_magic_string_to_buffer */
|
66
third_party/jerryscript/jerry-core/lit/lit-magic-strings.h
vendored
Normal file
66
third_party/jerryscript/jerry-core/lit/lit-magic-strings.h
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LIT_MAGIC_STRINGS_H
|
||||
#define LIT_MAGIC_STRINGS_H
|
||||
|
||||
#include "lit-globals.h"
|
||||
|
||||
/**
|
||||
* Limit for magic string length
|
||||
*/
|
||||
#define LIT_MAGIC_STRING_LENGTH_LIMIT 32
|
||||
|
||||
/**
|
||||
* Identifiers of ECMA and implementation-defined magic string constants
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
#define LIT_MAGIC_STRING_DEF(id, ascii_zt_string) \
|
||||
id,
|
||||
#include "lit-magic-strings.inc.h"
|
||||
#undef LIT_MAGIC_STRING_DEF
|
||||
|
||||
LIT_MAGIC_STRING__COUNT, /**< number of magic strings */
|
||||
LIT_MAGIC_STRING__FORCE_LARGE = INT32_MAX,
|
||||
} lit_magic_string_id_t;
|
||||
|
||||
/**
|
||||
* Identifiers of implementation-defined external magic string constants
|
||||
*/
|
||||
typedef uint32_t lit_magic_string_ex_id_t;
|
||||
|
||||
extern uint32_t lit_get_magic_string_ex_count (void);
|
||||
|
||||
extern const lit_utf8_byte_t *lit_get_magic_string_utf8 (lit_magic_string_id_t);
|
||||
extern lit_utf8_size_t lit_get_magic_string_size (lit_magic_string_id_t);
|
||||
|
||||
extern const lit_utf8_byte_t *lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t);
|
||||
extern lit_utf8_size_t lit_get_magic_string_ex_size (lit_magic_string_ex_id_t);
|
||||
|
||||
extern void lit_magic_strings_ex_set (const lit_utf8_byte_t **, uint32_t, const lit_utf8_size_t *);
|
||||
|
||||
extern bool lit_is_utf8_string_magic (const lit_utf8_byte_t *, lit_utf8_size_t, lit_magic_string_id_t *);
|
||||
extern bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *, lit_utf8_size_t, lit_magic_string_ex_id_t *);
|
||||
|
||||
extern bool lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *, lit_utf8_size_t,
|
||||
lit_magic_string_id_t);
|
||||
|
||||
extern bool lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *, lit_utf8_size_t,
|
||||
lit_magic_string_ex_id_t);
|
||||
|
||||
extern lit_utf8_byte_t *lit_copy_magic_string_to_buffer (lit_magic_string_id_t, lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
#endif /* !LIT_MAGIC_STRINGS_H */
|
260
third_party/jerryscript/jerry-core/lit/lit-magic-strings.inc.h
vendored
Normal file
260
third_party/jerryscript/jerry-core/lit/lit-magic-strings.inc.h
vendored
Normal file
|
@ -0,0 +1,260 @@
|
|||
/* Copyright 2014-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* List of ECMA magic strings
|
||||
*
|
||||
* These strings must be ascii strings. If non-ascii strings
|
||||
* will be ever needed, a divider will be added to separate
|
||||
* ascii and non-ascii groups.
|
||||
*/
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARGUMENTS, "arguments")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVAL, "eval")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PROTOTYPE, "prototype")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONSTRUCTOR, "constructor")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALLER, "caller")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALLEE, "callee")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNDEFINED, "undefined")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NULL, "null")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FALSE, "false")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TRUE, "true")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BOOLEAN, "boolean")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NUMBER, "number")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING, "string")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_OBJECT, "object")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FUNCTION, "function")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LENGTH, "length")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOURCE, "source")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GLOBAL, "global")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IGNORECASE_UL, "ignoreCase")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MULTILINE, "multiline")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX, "index")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INPUT, "input")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LASTINDEX_UL, "lastIndex")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NAN, "NaN")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INFINITY_UL, "Infinity")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEGATIVE_INFINITY_UL, "-Infinity")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNDEFINED_UL, "Undefined")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NULL_UL, "Null")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_OBJECT_UL, "Object")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FUNCTION_UL, "Function")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARRAY_UL, "Array")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARGUMENTS_UL, "Arguments")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING_UL, "String")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BOOLEAN_UL, "Boolean")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NUMBER_UL, "Number")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DATE_UL, "Date")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REGEXP_UL, "RegExp")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REGEXP_SOURCE_UL, "Source")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ERROR_UL, "Error")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVAL_ERROR_UL, "EvalError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RANGE_ERROR_UL, "RangeError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REFERENCE_ERROR_UL, "ReferenceError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SYNTAX_ERROR_UL, "SyntaxError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TYPE_ERROR_UL, "TypeError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_URI_ERROR_UL, "URIError")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MATH_UL, "Math")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JSON_U, "JSON")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRINGIFY, "stringify")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE, "parse")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE_INT, "parseInt")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE_FLOAT, "parseFloat")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_NAN, "isNaN")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_FINITE, "isFinite")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI, "decodeURI")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI_COMPONENT, "decodeURIComponent")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI, "encodeURI")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI_COMPONENT, "encodeURIComponent")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNESCAPE, "unescape")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_PROTOTYPE_OF_UL, "getPrototypeOf")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_DESCRIPTOR_UL, "getOwnPropertyDescriptor")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_NAMES_UL, "getOwnPropertyNames")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CREATE, "create")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DEFINE_PROPERTY_UL, "defineProperty")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DEFINE_PROPERTIES_UL, "defineProperties")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SEAL, "seal")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FREEZE, "freeze")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PREVENT_EXTENSIONS_UL, "preventExtensions")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_SEALED_UL, "isSealed")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_FROZEN_UL, "isFrozen")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_EXTENSIBLE, "isExtensible")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_KEYS, "keys")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_WRITABLE, "writable")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENUMERABLE, "enumerable")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONFIGURABLE, "configurable")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_VALUE, "value")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET, "get")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET, "set")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_E_U, "E")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LN10_U, "LN10")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LN2_U, "LN2")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG2E_U, "LOG2E")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG10E_U, "LOG10E")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PI_U, "PI")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT1_2_U, "SQRT1_2")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT2_U, "SQRT2")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ABS, "abs")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ACOS, "acos")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ASIN, "asin")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ATAN, "atan")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ATAN2, "atan2")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CEIL, "ceil")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COS, "cos")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EXP, "exp")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLOOR, "floor")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG, "log")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAX, "max")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MIN, "min")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POW, "pow")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RANDOM, "random")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ROUND, "round")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SIN, "sin")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT, "sqrt")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TAN, "tan")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FROM_CHAR_CODE_UL, "fromCharCode")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_ARRAY_UL, "isArray")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_STRING_UL, "toString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_VALUE_OF_UL, "valueOf")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_STRING_UL, "toLocaleString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_HAS_OWN_PROPERTY_UL, "hasOwnProperty")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_PROTOTYPE_OF_UL, "isPrototypeOf")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PROPERTY_IS_ENUMERABLE_UL, "propertyIsEnumerable")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONCAT, "concat")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POP, "pop")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JOIN, "join")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PUSH, "push")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REVERSE, "reverse")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SHIFT, "shift")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLICE, "slice")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SORT, "sort")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLICE, "splice")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNSHIFT, "unshift")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX_OF_UL, "indexOf")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LAST_INDEX_OF_UL, "lastIndexOf")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVERY, "every")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOME, "some")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FOR_EACH_UL, "forEach")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAP, "map")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FILTER, "filter")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REDUCE, "reduce")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REDUCE_RIGHT_UL, "reduceRight")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CHAR_AT_UL, "charAt")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CHAR_CODE_AT_UL, "charCodeAt")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, "localeCompare")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MATCH, "match")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REPLACE, "replace")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SEARCH, "search")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLIT, "split")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SUBSTR, "substr")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SUBSTRING, "substring")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, "toLowerCase")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_LOWER_CASE_UL, "toLocaleLowerCase")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_UPPER_CASE_UL, "toUpperCase")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_UPPER_CASE_UL, "toLocaleUpperCase")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TRIM, "trim")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_FIXED_UL, "toFixed")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_EXPONENTIAL_UL, "toExponential")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_PRECISION_UL, "toPrecision")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NOW, "now")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_DATE_STRING_UL, "toDateString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_TIME_STRING_UL, "toTimeString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_DATE_STRING_UL, "toLocaleDateString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_TIME_STRING_UL, "toLocaleTimeString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_TIME_UL, "getTime")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_FULL_YEAR_UL, "getFullYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UTC_U, "UTC")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_FULL_YEAR_UL, "getUTCFullYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_YEAR_UL, "getYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MONTH_UL, "getMonth")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MONTH_UL, "getUTCMonth")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DATE_UL, "getDate")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_DATE_UL, "getUTCDate")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DAY_UL, "getDay")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_DAY_UL, "getUTCDay")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_HOURS_UL, "getHours")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_HOURS_UL, "getUTCHours")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MINUTES_UL, "getMinutes")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MINUTES_UL, "getUTCMinutes")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_SECONDS_UL, "getSeconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_SECONDS_UL, "getUTCSeconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MILLISECONDS_UL, "getMilliseconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MILLISECONDS_UL, "getUTCMilliseconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_TIMEZONE_OFFSET_UL, "getTimezoneOffset")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_TIME_UL, "setTime")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MILLISECONDS_UL, "setMilliseconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MILLISECONDS_UL, "setUTCMilliseconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_SECONDS_UL, "setSeconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_SECONDS_UL, "setUTCSeconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MINUTES_UL, "setMinutes")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MINUTES_UL, "setUTCMinutes")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_HOURS_UL, "setHours")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_HOURS_UL, "setUTCHours")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_DATE_UL, "setDate")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_DATE_UL, "setUTCDate")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MONTH_UL, "setMonth")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MONTH_UL, "setUTCMonth")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_FULL_YEAR_UL, "setFullYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_FULL_YEAR_UL, "setUTCFullYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_YEAR_UL, "setYear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_UTC_STRING_UL, "toUTCString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_ISO_STRING_UL, "toISOString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_GMT_STRING_UL, "toGMTString")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_JSON_UL, "toJSON")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAX_VALUE_U, "MAX_VALUE")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MIN_VALUE_U, "MIN_VALUE")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POSITIVE_INFINITY_U, "POSITIVE_INFINITY")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEGATIVE_INFINITY_U, "NEGATIVE_INFINITY")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INVALID_DATE_UL, "Invalid Date")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_APPLY, "apply")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALL, "call")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BIND, "bind")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMPILE, "compile")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EXEC, "exec")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TEST, "test")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NAME, "name")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MESSAGE, "message")
|
||||
#ifndef CONFIG_DISABLE_PRINT_BUILTIN
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PRINT, "print")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_G_CHAR, "g")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_I_CHAR, "i")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_M_CHAR, "m")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TIME_SEP_U, "T")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_Z_CHAR, "Z")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLASH_CHAR, "/")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BACKSLASH_CHAR, "\\")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP, "(?:)")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_SQUARE_CHAR, "[")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_SQUARE_CHAR, "]")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_BRACE_CHAR, "{")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_BRACE_CHAR, "}")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_PARENTHESIS_CHAR, "(")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_PARENTHESIS_CHAR, ")")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MINUS_CHAR, "-")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COLON_CHAR, ":")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMMA_CHAR, ",")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOT_CHAR, ".")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR, "\"")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEW_LINE_CHAR, "\n")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPACE_CHAR, " ")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING__EMPTY, "")
|
||||
|
||||
/*
|
||||
* Implementation-defined magic strings
|
||||
*/
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JERRY_UL, "Jerry")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING__FUNCTION_TO_STRING, "function(){/* ecmascript */}")
|
966
third_party/jerryscript/jerry-core/lit/lit-strings.c
vendored
Normal file
966
third_party/jerryscript/jerry-core/lit/lit-strings.c
vendored
Normal file
|
@ -0,0 +1,966 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "lit-strings.h"
|
||||
|
||||
#include "jrt-libc-includes.h"
|
||||
|
||||
/**
|
||||
* Validate utf-8 string
|
||||
*
|
||||
* NOTE:
|
||||
* Isolated surrogates are allowed.
|
||||
* Correct pair of surrogates is not allowed, it should be represented as 4-byte utf-8 character.
|
||||
*
|
||||
* @return true if utf-8 string is well-formed
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t buf_size) /**< string size */
|
||||
{
|
||||
lit_utf8_size_t idx = 0;
|
||||
|
||||
bool is_prev_code_point_high_surrogate = false;
|
||||
while (idx < buf_size)
|
||||
{
|
||||
lit_utf8_byte_t c = utf8_buf_p[idx++];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
is_prev_code_point_high_surrogate = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
lit_code_point_t code_point = 0;
|
||||
lit_code_point_t min_code_point = 0;
|
||||
lit_utf8_size_t extra_bytes_count;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 1;
|
||||
min_code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 2;
|
||||
min_code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 3;
|
||||
min_code_point = LIT_UTF8_4_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* utf-8 string could not contain 5- and 6-byte sequences. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (idx + extra_bytes_count > buf_size)
|
||||
{
|
||||
/* utf-8 string breaks in the middle */
|
||||
return false;
|
||||
}
|
||||
|
||||
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
|
||||
{
|
||||
c = utf8_buf_p[idx + offset];
|
||||
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
|
||||
{
|
||||
/* invalid continuation byte */
|
||||
return false;
|
||||
}
|
||||
code_point <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
code_point |= (c & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
if (code_point < min_code_point
|
||||
|| code_point > LIT_UNICODE_CODE_POINT_MAX)
|
||||
{
|
||||
/* utf-8 string doesn't encode valid unicode code point */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (code_point >= LIT_UTF16_HIGH_SURROGATE_MIN
|
||||
&& code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
||||
{
|
||||
is_prev_code_point_high_surrogate = true;
|
||||
}
|
||||
else if (code_point >= LIT_UTF16_LOW_SURROGATE_MIN
|
||||
&& code_point <= LIT_UTF16_LOW_SURROGATE_MAX
|
||||
&& is_prev_code_point_high_surrogate)
|
||||
{
|
||||
/* sequence of high and low surrogate is not allowed */
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
is_prev_code_point_high_surrogate = false;
|
||||
}
|
||||
|
||||
idx += extra_bytes_count;
|
||||
}
|
||||
|
||||
return true;
|
||||
} /* lit_is_utf8_string_valid */
|
||||
|
||||
/**
|
||||
* Validate cesu-8 string
|
||||
*
|
||||
* @return true if cesu-8 string is well-formed
|
||||
* false otherwise
|
||||
*/
|
||||
bool
|
||||
lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t buf_size) /**< string size */
|
||||
{
|
||||
lit_utf8_size_t idx = 0;
|
||||
|
||||
while (idx < buf_size)
|
||||
{
|
||||
lit_utf8_byte_t c = utf8_buf_p[idx++];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
lit_code_point_t code_point = 0;
|
||||
lit_code_point_t min_code_point = 0;
|
||||
lit_utf8_size_t extra_bytes_count;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 1;
|
||||
min_code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
extra_bytes_count = 2;
|
||||
min_code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN;
|
||||
code_point = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (idx + extra_bytes_count > buf_size)
|
||||
{
|
||||
/* cesu-8 string breaks in the middle */
|
||||
return false;
|
||||
}
|
||||
|
||||
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
|
||||
{
|
||||
c = utf8_buf_p[idx + offset];
|
||||
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
|
||||
{
|
||||
/* invalid continuation byte */
|
||||
return false;
|
||||
}
|
||||
code_point <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
code_point |= (c & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
if (code_point < min_code_point)
|
||||
{
|
||||
/* cesu-8 string doesn't encode valid unicode code point */
|
||||
return false;
|
||||
}
|
||||
|
||||
idx += extra_bytes_count;
|
||||
}
|
||||
|
||||
return true;
|
||||
} /* lit_is_cesu8_string_valid */
|
||||
|
||||
/**
|
||||
* Check if the code point is UTF-16 low surrogate
|
||||
*
|
||||
* @return true / false
|
||||
*/
|
||||
bool
|
||||
lit_is_code_point_utf16_low_surrogate (lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
return LIT_UTF16_LOW_SURROGATE_MIN <= code_point && code_point <= LIT_UTF16_LOW_SURROGATE_MAX;
|
||||
} /* lit_is_code_point_utf16_low_surrogate */
|
||||
|
||||
/**
|
||||
* Check if the code point is UTF-16 high surrogate
|
||||
*
|
||||
* @return true / false
|
||||
*/
|
||||
bool
|
||||
lit_is_code_point_utf16_high_surrogate (lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
return LIT_UTF16_HIGH_SURROGATE_MIN <= code_point && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX;
|
||||
} /* lit_is_code_point_utf16_high_surrogate */
|
||||
|
||||
/**
|
||||
* Represents code point (>0xFFFF) as surrogate pair and returns its lower part
|
||||
*
|
||||
* @return lower code_unit of the surrogate pair
|
||||
*/
|
||||
static ecma_char_t
|
||||
convert_code_point_to_low_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
|
||||
{
|
||||
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
|
||||
|
||||
ecma_char_t code_unit_bits;
|
||||
code_unit_bits = (ecma_char_t) (code_point & LIT_UTF16_LAST_10_BITS_MASK);
|
||||
|
||||
return (ecma_char_t) (LIT_UTF16_LOW_SURROGATE_MARKER | code_unit_bits);
|
||||
} /* convert_code_point_to_low_surrogate */
|
||||
|
||||
/**
|
||||
* Represents code point (>0xFFFF) as surrogate pair and returns its higher part
|
||||
*
|
||||
* @return higher code_unit of the surrogate pair
|
||||
*/
|
||||
static ecma_char_t
|
||||
convert_code_point_to_high_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
|
||||
{
|
||||
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
|
||||
JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
|
||||
|
||||
ecma_char_t code_unit_bits;
|
||||
code_unit_bits = (ecma_char_t) ((code_point - LIT_UTF16_FIRST_SURROGATE_CODE_POINT) >> LIT_UTF16_BITS_IN_SURROGATE);
|
||||
|
||||
return (LIT_UTF16_HIGH_SURROGATE_MARKER | code_unit_bits);
|
||||
} /* convert_code_point_to_high_surrogate */
|
||||
|
||||
/**
|
||||
*
|
||||
* @return byte count required to represent the code point
|
||||
*/
|
||||
|
||||
lit_utf8_size_t
|
||||
lit_get_codepoint_utf8_size(lit_code_point_t code_point) {
|
||||
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_3_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MAX);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle surrogate code point in CESU-8 string and increase the pointer by one code unit.
|
||||
* When a complete surrogate code point is found, the callback will be called.
|
||||
*
|
||||
* NOTE:
|
||||
* Half surrogate pairs will be ignored and "dropped" silently.
|
||||
*
|
||||
* @return required size for the UTF-8 buffer
|
||||
*/
|
||||
static lit_utf8_size_t
|
||||
lit_cesu8_inc_and_handle_surrogate(const lit_utf8_byte_t **cesu8_str_p,
|
||||
ecma_char_t *high_surrogate_ch,
|
||||
void(*surrogate_pair_cb)(const lit_code_point_t cp, void *ctx),
|
||||
void(*non_surrogate_ch_cb)(const lit_code_point_t cp, void *ctx),
|
||||
void *ctx) {
|
||||
ecma_char_t ch = 0;
|
||||
lit_utf8_size_t surrogate_size = 0;
|
||||
const lit_utf8_size_t codepoint_sz = lit_read_code_unit_from_utf8(*cesu8_str_p, &ch);
|
||||
*cesu8_str_p += codepoint_sz;
|
||||
if (lit_is_code_point_utf16_high_surrogate(ch)) {
|
||||
*high_surrogate_ch = ch;
|
||||
return codepoint_sz;
|
||||
}
|
||||
else if (lit_is_code_point_utf16_low_surrogate(ch)) {
|
||||
surrogate_size = codepoint_sz;
|
||||
if (0 != *high_surrogate_ch) {
|
||||
const lit_code_point_t cp = lit_convert_surrogate_pair_to_code_point(*high_surrogate_ch, ch);
|
||||
surrogate_pair_cb(cp, ctx);
|
||||
}
|
||||
} else if (non_surrogate_ch_cb) {
|
||||
non_surrogate_ch_cb((lit_code_point_t)ch, ctx);
|
||||
}
|
||||
*high_surrogate_ch = 0;
|
||||
return surrogate_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper/callback for lit_utf8_string_size_from_cesu8_string() implementation.
|
||||
*/
|
||||
static void lit_cesu8_handle_surrogates_utf8_size_callback(const lit_code_point_t cp,
|
||||
void *ctx) {
|
||||
lit_utf8_size_t *utf8_buf_size = (lit_utf8_size_t *)ctx;
|
||||
*utf8_buf_size += lit_get_codepoint_utf8_size(cp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the required size for a buffer to contain the UTF-8 encoded data, given a CESU-8
|
||||
* encoded string.
|
||||
*
|
||||
* NOTE:
|
||||
* Half surrogate pairs will be ignored and "dropped" silently.
|
||||
*
|
||||
* @return required size for the UTF-8 buffer
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_utf8_string_size_from_cesu8_string (const lit_utf8_byte_t *cesu8_str_p, /**< cesu-8 string */
|
||||
lit_utf8_size_t cesu8_buf_size) /**< cesu-8 string size */
|
||||
{
|
||||
lit_utf8_size_t utf8_buf_size = cesu8_buf_size;
|
||||
ecma_char_t high_surrogate_ch = LIT_UNICODE_CODE_POINT_NULL;
|
||||
const lit_utf8_byte_t *const end = cesu8_str_p + cesu8_buf_size;
|
||||
while (cesu8_str_p < end) {
|
||||
if ((*cesu8_str_p & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER) {
|
||||
++cesu8_str_p;
|
||||
high_surrogate_ch = 0;
|
||||
continue;
|
||||
}
|
||||
utf8_buf_size -=
|
||||
lit_cesu8_inc_and_handle_surrogate(&cesu8_str_p, &high_surrogate_ch,
|
||||
lit_cesu8_handle_surrogates_utf8_size_callback, NULL,
|
||||
&utf8_buf_size);
|
||||
}
|
||||
return utf8_buf_size;
|
||||
} /** lit_utf8_string_size_from_cesu8_string */
|
||||
|
||||
typedef struct {
|
||||
lit_utf8_byte_t **utf8_str_ptr_ptr;
|
||||
const lit_utf8_byte_t *utf8_end;
|
||||
bool is_buffer_too_small;
|
||||
} Cesu8ToUtf8CbData;
|
||||
|
||||
/**
|
||||
* Helper/callback for lit_utf8_string_convert_from_cesu8() implementation.
|
||||
*/
|
||||
static void lit_cesu8_handle_surrogates_utf8_convert_callback(const lit_code_point_t cp,
|
||||
void *ctx) {
|
||||
Cesu8ToUtf8CbData *cb_data = (Cesu8ToUtf8CbData *)ctx;
|
||||
lit_utf8_byte_t **utf8_str_p = cb_data->utf8_str_ptr_ptr;
|
||||
if (*utf8_str_p + lit_get_codepoint_utf8_size(cp) > cb_data->utf8_end) {
|
||||
cb_data->is_buffer_too_small = true;
|
||||
return;
|
||||
}
|
||||
*utf8_str_p += lit_code_point_to_utf8(cp, *utf8_str_p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy & convert a CESU-8 encoded string into a UTF-8 encoded string.
|
||||
*
|
||||
* NOTE:
|
||||
* Half surrogate pairs will be ignored and "dropped" silently.
|
||||
*
|
||||
* @return number of bytes of UTF-8 data written
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_utf8_string_convert_from_cesu8 (const lit_utf8_byte_t *cesu8_str_p, /**< cesu-8 string */
|
||||
lit_utf8_size_t cesu8_buf_size, /**< cesu-8 buffer size */
|
||||
lit_utf8_byte_t *utf8_str_p, /**< [out] utf-8 buffer */
|
||||
lit_utf8_size_t utf8_buf_size) /**< utf-8 buffer size */
|
||||
{
|
||||
ecma_char_t high_surrogate_ch = LIT_UNICODE_CODE_POINT_NULL;
|
||||
const lit_utf8_byte_t *const cesu8_end = cesu8_str_p + cesu8_buf_size;
|
||||
lit_utf8_byte_t *const begin = utf8_str_p;
|
||||
const lit_utf8_byte_t *const utf8_end = utf8_str_p + utf8_buf_size;
|
||||
Cesu8ToUtf8CbData cb_data = {
|
||||
.utf8_str_ptr_ptr = &utf8_str_p,
|
||||
.utf8_end = utf8_end,
|
||||
.is_buffer_too_small = false,
|
||||
};
|
||||
while (cesu8_str_p < cesu8_end && *cesu8_str_p) {
|
||||
const lit_utf8_byte_t byte = *cesu8_str_p;
|
||||
if ((byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER) {
|
||||
if (utf8_str_p >= utf8_end) {
|
||||
return 0;
|
||||
}
|
||||
*(utf8_str_p++) = byte;
|
||||
++cesu8_str_p;
|
||||
high_surrogate_ch = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
lit_cesu8_inc_and_handle_surrogate(&cesu8_str_p, &high_surrogate_ch,
|
||||
lit_cesu8_handle_surrogates_utf8_convert_callback,
|
||||
lit_cesu8_handle_surrogates_utf8_convert_callback,
|
||||
&cb_data);
|
||||
if (cb_data.is_buffer_too_small) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return (lit_utf8_size_t) (utf8_str_p - begin);
|
||||
} /** lit_utf8_string_size_from_cesu8_string */
|
||||
|
||||
/**
|
||||
* Calculate size of a zero-terminated utf-8 string
|
||||
*
|
||||
* NOTE:
|
||||
* string should not contain zero characters in the middel
|
||||
*
|
||||
* @return size of a string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_zt_utf8_string_size (const lit_utf8_byte_t *utf8_str_p) /**< zero-terminated utf-8 string */
|
||||
{
|
||||
return (lit_utf8_size_t) strlen ((const char *) utf8_str_p);
|
||||
} /* lit_zt_utf8_string_size */
|
||||
|
||||
/**
|
||||
* Calculate length of a cesu-8 encoded string
|
||||
*
|
||||
* @return UTF-16 code units count
|
||||
*/
|
||||
ecma_length_t
|
||||
lit_utf8_string_length (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t utf8_buf_size) /**< string size */
|
||||
{
|
||||
ecma_length_t length = 0;
|
||||
lit_utf8_size_t size = 0;
|
||||
|
||||
while (size < utf8_buf_size)
|
||||
{
|
||||
size += lit_get_unicode_char_size_by_utf8_first_byte (*(utf8_buf_p + size));
|
||||
length++;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (size == utf8_buf_size);
|
||||
|
||||
return length;
|
||||
} /* lit_utf8_string_length */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code point from non-empty utf-8-encoded buffer
|
||||
*
|
||||
* @return number of bytes occupied by code point in the string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_read_code_point_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
lit_utf8_size_t buf_size, /**< size of the buffer in bytes */
|
||||
lit_code_point_t *code_point) /**< [out] code point */
|
||||
{
|
||||
JERRY_ASSERT (buf_p && buf_size);
|
||||
|
||||
lit_utf8_byte_t c = buf_p[0];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
*code_point = (lit_code_point_t) (c & LIT_UTF8_LAST_7_BITS_MASK);
|
||||
return 1;
|
||||
}
|
||||
|
||||
lit_code_point_t ret = LIT_UNICODE_CODE_POINT_NULL;
|
||||
ecma_length_t bytes_count = 0;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 2;
|
||||
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 3;
|
||||
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 4;
|
||||
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (false);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (buf_size >= bytes_count);
|
||||
|
||||
for (uint32_t i = 1; i < bytes_count; ++i)
|
||||
{
|
||||
ret <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
ret |= (buf_p[i] & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
*code_point = ret;
|
||||
return bytes_count;
|
||||
} /* lit_read_code_point_from_utf8 */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return number of bytes occupied by code point in the string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
ecma_char_t *code_point) /**< [out] code point */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
|
||||
lit_utf8_byte_t c = buf_p[0];
|
||||
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
*code_point = (ecma_char_t) (c & LIT_UTF8_LAST_7_BITS_MASK);
|
||||
return 1;
|
||||
}
|
||||
|
||||
lit_code_point_t ret = LIT_UNICODE_CODE_POINT_NULL;
|
||||
ecma_length_t bytes_count;
|
||||
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
bytes_count = 2;
|
||||
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
|
||||
bytes_count = 3;
|
||||
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
}
|
||||
|
||||
for (uint32_t i = 1; i < bytes_count; ++i)
|
||||
{
|
||||
ret <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
ret |= (buf_p[i] & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (ret <= LIT_UTF16_CODE_UNIT_MAX);
|
||||
*code_point = (ecma_char_t) ret;
|
||||
return bytes_count;
|
||||
} /* lit_read_code_unit_from_utf8 */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return number of bytes occupied by code point in the string
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
||||
ecma_char_t *code_point) /**< [out] code point */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
|
||||
lit_utf8_decr (&buf_p);
|
||||
return lit_read_code_unit_from_utf8 (buf_p, code_point);
|
||||
} /* lit_read_prev_code_unit_from_utf8 */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
ecma_char_t ch;
|
||||
|
||||
*buf_p += lit_read_code_unit_from_utf8 (*buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_read_next */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return previous code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
ecma_char_t ch;
|
||||
|
||||
lit_utf8_decr (buf_p);
|
||||
lit_read_code_unit_from_utf8 (*buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_read_prev */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
ecma_char_t ch;
|
||||
|
||||
lit_read_code_unit_from_utf8 (buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_peek_next */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
*
|
||||
* @return previous code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
ecma_char_t ch;
|
||||
|
||||
lit_read_prev_code_unit_from_utf8 (buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_peek_prev */
|
||||
|
||||
/**
|
||||
* Increase cesu-8 encoded string pointer by one code unit.
|
||||
*/
|
||||
void
|
||||
lit_utf8_incr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
|
||||
*buf_p += lit_get_unicode_char_size_by_utf8_first_byte (**buf_p);
|
||||
} /* lit_utf8_incr */
|
||||
|
||||
/**
|
||||
* Decrease cesu-8 encoded string pointer by one code unit.
|
||||
*/
|
||||
void
|
||||
lit_utf8_decr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
const lit_utf8_byte_t *current_p = *buf_p;
|
||||
|
||||
do
|
||||
{
|
||||
current_p--;
|
||||
}
|
||||
while ((*(current_p) & LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_EXTRA_BYTE_MARKER);
|
||||
|
||||
*buf_p = current_p;
|
||||
} /* lit_utf8_decr */
|
||||
|
||||
/**
|
||||
* Calc hash using the specified hash_basis.
|
||||
*
|
||||
* NOTE:
|
||||
* This is implementation of FNV-1a hash function, which is released into public domain.
|
||||
* Constants used, are carefully picked primes by the authors.
|
||||
* More info: http://www.isthe.com/chongo/tech/comp/fnv/
|
||||
*
|
||||
* @return ecma-string's hash
|
||||
*/
|
||||
inline lit_string_hash_t __attr_always_inline___
|
||||
lit_utf8_string_hash_combine (lit_string_hash_t hash_basis, /**< hash to be combined with */
|
||||
const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */
|
||||
lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */
|
||||
{
|
||||
JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0);
|
||||
|
||||
uint32_t hash = hash_basis;
|
||||
|
||||
for (uint32_t i = 0; i < utf8_buf_size; i++)
|
||||
{
|
||||
// 16777619 is 32 bit FNV_prime = 2^24 + 2^8 + 0x93 = 16777619
|
||||
hash = (hash ^ utf8_buf_p[i]) * 16777619;
|
||||
}
|
||||
|
||||
return (lit_string_hash_t) hash;
|
||||
} /* lit_utf8_string_hash_combine */
|
||||
|
||||
/**
|
||||
* Calculate hash from the buffer.
|
||||
*
|
||||
* @return ecma-string's hash
|
||||
*/
|
||||
inline lit_string_hash_t __attr_always_inline___
|
||||
lit_utf8_string_calc_hash (const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */
|
||||
lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */
|
||||
{
|
||||
JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0);
|
||||
|
||||
// 32 bit offset_basis for FNV = 2166136261
|
||||
return lit_utf8_string_hash_combine ((lit_string_hash_t) 2166136261, utf8_buf_p, utf8_buf_size);
|
||||
} /* lit_utf8_string_calc_hash */
|
||||
|
||||
/**
|
||||
* Return code unit at the specified position in string
|
||||
*
|
||||
* NOTE:
|
||||
* code_unit_offset should be less then string's length
|
||||
*
|
||||
* @return code unit value
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
|
||||
lit_utf8_size_t utf8_buf_size, /**< string size in bytes */
|
||||
ecma_length_t code_unit_offset) /**< ofset of a code_unit */
|
||||
{
|
||||
lit_utf8_byte_t *current_p = (lit_utf8_byte_t *) utf8_buf_p;
|
||||
ecma_char_t code_unit;
|
||||
|
||||
do
|
||||
{
|
||||
JERRY_ASSERT (current_p < utf8_buf_p + utf8_buf_size);
|
||||
current_p += lit_read_code_unit_from_utf8 (current_p, &code_unit);
|
||||
}
|
||||
while (code_unit_offset--);
|
||||
|
||||
return code_unit;
|
||||
} /* lit_utf8_string_code_unit_at */
|
||||
|
||||
/**
|
||||
* Get CESU-8 encoded size of character
|
||||
*
|
||||
* @return number of bytes occupied in CESU-8
|
||||
*/
|
||||
inline lit_utf8_size_t __attr_always_inline___
|
||||
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
|
||||
{
|
||||
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
|
||||
return 3;
|
||||
}
|
||||
} /* lit_get_unicode_char_size_by_utf8_first_byte */
|
||||
|
||||
/**
|
||||
* Convert code unit to cesu-8 representation
|
||||
*
|
||||
* @return byte count required to represent the code unit
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_code_unit_to_utf8 (ecma_char_t code_unit, /**< code unit */
|
||||
lit_utf8_byte_t *buf_p) /**< buffer where to store the result,
|
||||
* its size should be at least MAX_BYTES_IN_CODE_UNIT */
|
||||
{
|
||||
if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
buf_p[0] = (lit_utf8_byte_t) code_unit;
|
||||
return 1;
|
||||
}
|
||||
else if (code_unit <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
uint32_t code_unit_bits = code_unit;
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_5_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_unit_bits);
|
||||
|
||||
buf_p[0] = LIT_UTF8_2_BYTE_MARKER | first_byte_bits;
|
||||
buf_p[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t code_unit_bits = code_unit;
|
||||
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_4_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_unit_bits);
|
||||
|
||||
buf_p[0] = LIT_UTF8_3_BYTE_MARKER | first_byte_bits;
|
||||
buf_p[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
buf_p[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
|
||||
return 3;
|
||||
}
|
||||
} /* lit_code_unit_to_utf8 */
|
||||
|
||||
/**
|
||||
* Convert code point to cesu-8 representation
|
||||
*
|
||||
* @return byte count required to represent the code point
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_code_point_to_cesu8 (lit_code_point_t code_point, /**< code point */
|
||||
lit_utf8_byte_t *buf) /**< buffer where to store the result,
|
||||
* its size should be at least 6 bytes */
|
||||
{
|
||||
if (code_point <= LIT_UTF16_CODE_UNIT_MAX)
|
||||
{
|
||||
return lit_code_unit_to_utf8 ((ecma_char_t) code_point, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
lit_utf8_size_t offset = lit_code_unit_to_utf8 (convert_code_point_to_high_surrogate (code_point), buf);
|
||||
offset += lit_code_unit_to_utf8 (convert_code_point_to_low_surrogate (code_point), buf + offset);
|
||||
return offset;
|
||||
}
|
||||
} /* lit_code_point_to_cesu8 */
|
||||
|
||||
/**
|
||||
* Convert code point to utf-8 representation
|
||||
*
|
||||
* @return byte count required to represent the code point
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
|
||||
lit_utf8_byte_t *buf) /**< buffer where to store the result,
|
||||
* its size should be at least 4 bytes */
|
||||
{
|
||||
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
buf[0] = (lit_utf8_byte_t) code_point;
|
||||
return 1;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_5_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_2_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
return 2;
|
||||
}
|
||||
else if (code_point <= LIT_UTF8_3_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_4_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_3_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MAX);
|
||||
|
||||
uint32_t code_point_bits = code_point;
|
||||
lit_utf8_byte_t fourth_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
|
||||
|
||||
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_3_BITS_MASK);
|
||||
JERRY_ASSERT (first_byte_bits == code_point_bits);
|
||||
|
||||
buf[0] = LIT_UTF8_4_BYTE_MARKER | first_byte_bits;
|
||||
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
|
||||
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
|
||||
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
|
||||
return 4;
|
||||
}
|
||||
} /* lit_code_point_to_utf8 */
|
||||
|
||||
/**
|
||||
* Convert surrogate pair to code point
|
||||
*
|
||||
* @return code point
|
||||
*/
|
||||
lit_code_point_t
|
||||
lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, /**< high surrogate code point */
|
||||
ecma_char_t low_surrogate) /**< low surrogate code point */
|
||||
{
|
||||
JERRY_ASSERT (lit_is_code_point_utf16_high_surrogate (high_surrogate));
|
||||
JERRY_ASSERT (lit_is_code_point_utf16_low_surrogate (low_surrogate));
|
||||
|
||||
lit_code_point_t code_point;
|
||||
code_point = (uint16_t) (high_surrogate - LIT_UTF16_HIGH_SURROGATE_MIN);
|
||||
code_point <<= LIT_UTF16_BITS_IN_SURROGATE;
|
||||
|
||||
code_point += LIT_UTF16_FIRST_SURROGATE_CODE_POINT;
|
||||
|
||||
code_point |= (uint16_t) (low_surrogate - LIT_UTF16_LOW_SURROGATE_MIN);
|
||||
return code_point;
|
||||
} /* lit_convert_surrogate_pair_to_code_point */
|
||||
|
||||
/**
|
||||
* Compare cesu-8 string to cesu-8 string
|
||||
*
|
||||
* @return true - if strings are equal;
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool
|
||||
lit_compare_utf8_strings (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string1_size, /**< string size */
|
||||
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string2_size) /**< string size */
|
||||
{
|
||||
if (string1_size != string2_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return memcmp (string1_p, string2_p, string1_size) == 0;
|
||||
} /* lit_compare_utf8_strings */
|
||||
|
||||
/**
|
||||
* Relational compare of cesu-8 strings
|
||||
*
|
||||
* First string is less than second string if:
|
||||
* - strings are not equal;
|
||||
* - first string is prefix of second or is lexicographically less than second.
|
||||
*
|
||||
* @return true - if first string is less than second string,
|
||||
* false - otherwise.
|
||||
*/
|
||||
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string1_size, /**< string size */
|
||||
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
|
||||
lit_utf8_size_t string2_size) /**< string size */
|
||||
{
|
||||
lit_utf8_byte_t *string1_pos = (lit_utf8_byte_t *) string1_p;
|
||||
lit_utf8_byte_t *string2_pos = (lit_utf8_byte_t *) string2_p;
|
||||
const lit_utf8_byte_t *string1_end_p = string1_p + string1_size;
|
||||
const lit_utf8_byte_t *string2_end_p = string2_p + string2_size;
|
||||
|
||||
while (string1_pos < string1_end_p && string2_pos < string2_end_p)
|
||||
{
|
||||
ecma_char_t ch1, ch2;
|
||||
string1_pos += lit_read_code_unit_from_utf8 (string1_pos, &ch1);
|
||||
string2_pos += lit_read_code_unit_from_utf8 (string2_pos, &ch2);
|
||||
|
||||
if (ch1 < ch2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (ch1 > ch2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (string1_pos >= string1_end_p && string2_pos < string2_end_p);
|
||||
} /* lit_compare_utf8_strings_relational */
|
143
third_party/jerryscript/jerry-core/lit/lit-strings.h
vendored
Normal file
143
third_party/jerryscript/jerry-core/lit/lit-strings.h
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LIT_STRINGS_H
|
||||
#define LIT_STRINGS_H
|
||||
|
||||
#include "jrt.h"
|
||||
#include "lit-globals.h"
|
||||
|
||||
/**
|
||||
* Null character (used in few cases as utf-8 string end marker)
|
||||
*/
|
||||
#define LIT_BYTE_NULL (0)
|
||||
|
||||
/**
|
||||
* For the formal definition of Unicode transformation formats (UTF) see Section 3.9, Unicode Encoding Forms in The
|
||||
* Unicode Standard (http://www.unicode.org/versions/Unicode3.0.0/ch03.pdf#G7404).
|
||||
*/
|
||||
#define LIT_UNICODE_CODE_POINT_NULL (0x0)
|
||||
#define LIT_UNICODE_CODE_POINT_MAX (0x10FFFF)
|
||||
|
||||
#define LIT_UTF16_CODE_UNIT_MAX (0xFFFF)
|
||||
#define LIT_UTF16_FIRST_SURROGATE_CODE_POINT (0x10000)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MARKER (0xDC00)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MARKER (0xD800)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MIN (0xD800)
|
||||
#define LIT_UTF16_HIGH_SURROGATE_MAX (0xDBFF)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MIN (0xDC00)
|
||||
#define LIT_UTF16_LOW_SURROGATE_MAX (0xDFFF)
|
||||
#define LIT_UTF16_BITS_IN_SURROGATE (10)
|
||||
#define LIT_UTF16_LAST_10_BITS_MASK (0x3FF)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_MARKER (0x00)
|
||||
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
|
||||
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
|
||||
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
|
||||
#define LIT_UTF8_5_BYTE_MARKER (0xF8)
|
||||
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_MASK (0x80)
|
||||
#define LIT_UTF8_2_BYTE_MASK (0xE0)
|
||||
#define LIT_UTF8_3_BYTE_MASK (0xF0)
|
||||
#define LIT_UTF8_4_BYTE_MASK (0xF8)
|
||||
#define LIT_UTF8_EXTRA_BYTE_MASK (0xC0)
|
||||
|
||||
#define LIT_UTF8_LAST_7_BITS_MASK (0x7F)
|
||||
#define LIT_UTF8_LAST_6_BITS_MASK (0x3F)
|
||||
#define LIT_UTF8_LAST_5_BITS_MASK (0x1F)
|
||||
#define LIT_UTF8_LAST_4_BITS_MASK (0x0F)
|
||||
#define LIT_UTF8_LAST_3_BITS_MASK (0x07)
|
||||
#define LIT_UTF8_LAST_2_BITS_MASK (0x03)
|
||||
#define LIT_UTF8_LAST_1_BIT_MASK (0x01)
|
||||
|
||||
#define LIT_UTF8_BITS_IN_EXTRA_BYTES (6)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_CODE_POINT_MAX (0x7F)
|
||||
#define LIT_UTF8_2_BYTE_CODE_POINT_MIN (0x80)
|
||||
#define LIT_UTF8_2_BYTE_CODE_POINT_MAX (0x7FF)
|
||||
#define LIT_UTF8_3_BYTE_CODE_POINT_MIN (0x800)
|
||||
#define LIT_UTF8_3_BYTE_CODE_POINT_MAX (LIT_UTF16_CODE_UNIT_MAX)
|
||||
#define LIT_UTF8_4_BYTE_CODE_POINT_MIN (0x10000)
|
||||
#define LIT_UTF8_4_BYTE_CODE_POINT_MAX (LIT_UNICODE_CODE_POINT_MAX)
|
||||
|
||||
/**
|
||||
* Differnce between byte count needed to represent code point greater than 0xFFFF
|
||||
* in common UTF-8 (4 bytes required) and CESU-8 (6 bytes required)
|
||||
*/
|
||||
#define LIT_UTF8_CESU8_SURROGATE_SIZE_DIF (2 * LIT_UTF8_MAX_BYTES_IN_CODE_UNIT - LIT_UTF8_MAX_BYTES_IN_CODE_POINT)
|
||||
|
||||
/**
|
||||
* Byte values >= LIT_UTF8_FIRST_BYTE_MAX are not allowed in internal strings
|
||||
*/
|
||||
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
|
||||
|
||||
/* validation */
|
||||
bool lit_is_utf8_string_valid (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
bool lit_is_cesu8_string_valid (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* checks */
|
||||
bool lit_is_code_point_utf16_low_surrogate (lit_code_point_t);
|
||||
bool lit_is_code_point_utf16_high_surrogate (lit_code_point_t);
|
||||
|
||||
/* size */
|
||||
lit_utf8_size_t lit_zt_utf8_string_size (const lit_utf8_byte_t *);
|
||||
lit_utf8_size_t lit_get_codepoint_utf8_size(lit_code_point_t code_point);
|
||||
|
||||
/* length */
|
||||
ecma_length_t lit_utf8_string_length (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* hash */
|
||||
lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
lit_string_hash_t lit_utf8_string_hash_combine (lit_string_hash_t, const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
/* code unit access */
|
||||
ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t);
|
||||
lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t);
|
||||
|
||||
/* conversion */
|
||||
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t, lit_utf8_byte_t *);
|
||||
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t, lit_utf8_byte_t *);
|
||||
lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t, lit_utf8_byte_t *);
|
||||
lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t, ecma_char_t);
|
||||
|
||||
/* cesu-8 <=> utf-8 conversion */
|
||||
lit_utf8_size_t lit_utf8_string_size_from_cesu8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
lit_utf8_size_t lit_utf8_string_convert_from_cesu8 (const lit_utf8_byte_t *, lit_utf8_size_t, lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
bool lit_compare_utf8_strings (const lit_utf8_byte_t *, lit_utf8_size_t,
|
||||
const lit_utf8_byte_t *, lit_utf8_size_t);
|
||||
|
||||
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, lit_utf8_size_t,
|
||||
const lit_utf8_byte_t *string2_p, lit_utf8_size_t);
|
||||
|
||||
/* read code point from buffer */
|
||||
lit_utf8_size_t lit_read_code_point_from_utf8 (const lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *);
|
||||
|
||||
lit_utf8_size_t lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *,
|
||||
ecma_char_t *);
|
||||
|
||||
lit_utf8_size_t lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *,
|
||||
ecma_char_t *);
|
||||
|
||||
ecma_char_t lit_utf8_read_next (const lit_utf8_byte_t **);
|
||||
ecma_char_t lit_utf8_read_prev (const lit_utf8_byte_t **);
|
||||
ecma_char_t lit_utf8_peek_next (const lit_utf8_byte_t *);
|
||||
ecma_char_t lit_utf8_peek_prev (const lit_utf8_byte_t *);
|
||||
void lit_utf8_incr (const lit_utf8_byte_t **);
|
||||
void lit_utf8_decr (const lit_utf8_byte_t **);
|
||||
|
||||
#endif /* !LIT_STRINGS_H */
|
211
third_party/jerryscript/jerry-core/lit/lit-unicode-ranges.inc.h
vendored
Normal file
211
third_party/jerryscript/jerry-core/lit/lit-unicode-ranges.inc.h
vendored
Normal file
|
@ -0,0 +1,211 @@
|
|||
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
|
||||
* Copyright 2015-2016 University of Szeged.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*
|
||||
* Unicode characters and ranges generated by tools/print-unicode-ranges.sh
|
||||
* from UnicodeData-3.0.0.txt.
|
||||
* See also:
|
||||
* http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
|
||||
* http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
|
||||
*/
|
||||
|
||||
#ifndef LIT_UNICODE_RANGES_INC_H_
|
||||
#define LIT_UNICODE_RANGES_INC_H_
|
||||
|
||||
/**
|
||||
* Character interval starting points for the unicode letters.
|
||||
*
|
||||
* The characters covered by these intervalse are from
|
||||
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
|
||||
*/
|
||||
static const uint16_t unicode_letter_interv_sps[] JERRY_CONST_DATA =
|
||||
{
|
||||
/*
|
||||
* these are handled separetely
|
||||
* 0x0041, len 25
|
||||
* 0x0061, len 25
|
||||
*/
|
||||
0x00C0, 0x00D8, 0XF8, 0X1F8, 0x0222, 0x0250, 0x02B0, 0x02BB,
|
||||
0x02D0, 0x02E0, 0x0388, 0x038E, 0x03A3, 0x03D0, 0x03DA, 0x0400, 0x048C, 0x04C7,
|
||||
0x04CB, 0x04D0, 0x04F8, 0x0531, 0x0561, 0x05D0, 0x05F0, 0x0621, 0x0640, 0x0671,
|
||||
0x06E5, 0x06FA, 0x0712, 0x0780, 0x0905, 0x0958, 0x0985, 0x098F, 0x0993, 0x09AA,
|
||||
0x09B6, 0x09DC, 0x09DF, 0x09F0, 0x0A05, 0x0A0F, 0x0A13, 0x0A2A, 0x0A32, 0x0A35,
|
||||
0x0A38, 0x0A59, 0x0A72, 0x0A85, 0x0A8F, 0x0A93, 0x0AAA, 0x0AB2, 0x0AB5, 0x0B05,
|
||||
0x0B0F, 0x0B13, 0x0B2A, 0x0B32, 0x0B36, 0x0B5C, 0x0B5F, 0x0B85, 0x0B8E, 0x0B92,
|
||||
0x0B99, 0x0B9E, 0x0BA3, 0x0BA8, 0x0BAE, 0x0BB7, 0x0C05, 0x0C0E, 0x0C12, 0x0C2A,
|
||||
0x0C35, 0x0C60, 0x0C85, 0x0C8E, 0x0C92, 0x0CAA, 0x0CB5, 0x0CE0, 0x0D05, 0x0D0E,
|
||||
0x0D12, 0x0D2A, 0x0D60, 0x0D85, 0x0D9A, 0x0DB3, 0x0DC0, 0x0E01, 0x0E32, 0x0E40,
|
||||
0x0E81, 0x0E87, 0x0E94, 0x0E99, 0x0EA1, 0x0EAA, 0x0EAD, 0x0EB2, 0x0EC0, 0x0EDC,
|
||||
0x0F40, 0x0F49, 0x0F88, 0x1000, 0x1023, 0x1029, 0x1050, 0x10A0, 0x10D0, 0x1100,
|
||||
0x115F, 0x11A8, 0x1200, 0x1208, 0x124A, 0x1250, 0x125A, 0x1260, 0x128A, 0x1290,
|
||||
0x12B2, 0x12B8, 0x12C2, 0x12C8, 0x12D0, 0x12D8, 0x12F0, 0x1312, 0x1318, 0x1320,
|
||||
0x1348, 0x13A0, 0X1401, 0X1501, 0X1601, 0x166F, 0x1681, 0x16A0, 0x1780, 0x1820,
|
||||
0x1880, 0x1E00, 0x1EA0, 0x1F00, 0x1F18, 0x1F20, 0x1F48, 0x1F50, 0x1F5F, 0x1F80,
|
||||
0x1FB6, 0x1FC2, 0x1FC6, 0x1FD0, 0x1FD6, 0x1FE0, 0x1FF2, 0x1FF6, 0x210A, 0x2119,
|
||||
0x212A, 0x212F, 0x2133, 0x2160, 0x3005, 0x3021, 0x3031, 0x3038, 0x3041, 0x309D,
|
||||
0x30A1, 0x30FC, 0x3105, 0x3131, 0x31A0, 0XA000, 0XA100, 0XA200, 0XA300, 0XA400,
|
||||
0XF900, 0XFA00, 0xFB00, 0xFB13, 0xFB1F, 0xFB2A, 0xFB38, 0xFB40, 0xFB43, 0xFB46,
|
||||
0XFBD3, 0XFCD3, 0xFD50, 0xFD92, 0xFDF0, 0xFE70, 0xFE76, 0xFF21, 0xFF41, 0xFF66,
|
||||
0xFFC2, 0xFFCA, 0xFFD2, 0xFFDA
|
||||
};
|
||||
|
||||
/**
|
||||
* Character lengths for the unicode letters.
|
||||
*
|
||||
* The characters covered by these intervalse are from
|
||||
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
|
||||
*/
|
||||
static const uint8_t unicode_letter_interv_lens[] JERRY_CONST_DATA =
|
||||
{
|
||||
22, 30, 255, 39, 17, 93, 8, 6,
|
||||
1, 4, 2, 19, 43, 7, 25, 129, 56, 1,
|
||||
1, 37, 1, 37, 38, 26, 2, 25, 10, 98,
|
||||
1, 2, 26, 37, 52, 9, 7, 1, 21, 6,
|
||||
3, 1, 2, 1, 5, 1, 21, 6, 1, 1,
|
||||
1, 3, 2, 6, 2, 21, 6, 1, 4, 7,
|
||||
1, 21, 6, 1, 3, 1, 2, 5, 2, 3,
|
||||
1, 1, 1, 2, 7, 2, 7, 2, 22, 9,
|
||||
4, 1, 7, 2, 22, 9, 4, 1, 7, 2,
|
||||
22, 15, 1, 17, 23, 8, 6, 47, 1, 6,
|
||||
1, 1, 3, 6, 2, 1, 3, 1, 4, 1,
|
||||
7, 33, 3, 33, 4, 1, 5, 37, 38, 89,
|
||||
67, 81, 6, 62, 3, 6, 3, 38, 3, 30,
|
||||
3, 6, 3, 6, 6, 22, 30, 3, 6, 38,
|
||||
18, 84, 255, 255, 107, 7, 25, 74, 51, 87,
|
||||
40, 155, 89, 21, 5, 37, 5, 7, 30, 52,
|
||||
6, 2, 6, 3, 5, 12, 2, 6, 9, 4,
|
||||
3, 2, 6, 35, 2, 8, 4, 2, 83, 1,
|
||||
89, 2, 39, 93, 23, 255, 255, 255, 255, 140,
|
||||
255, 45, 6, 4, 9, 12, 4, 1, 1, 107,
|
||||
255, 106, 63, 53, 11, 2, 134, 25, 25, 88,
|
||||
5, 5, 5, 2
|
||||
};
|
||||
|
||||
/**
|
||||
* Those unicode letter characters that are not inside any of
|
||||
* the intervals specified in unicode_letter_intervals array.
|
||||
*
|
||||
* The characters are from the following Unicode categories:
|
||||
* Lu, Ll, Lt, Lm, Lo, Nl
|
||||
*/
|
||||
static const uint16_t unicode_letter_chars[] JERRY_CONST_DATA =
|
||||
{
|
||||
0x00AA, 0x00B5, 0x00BA, 0x02EE, 0x037A, 0x0386, 0x038C, 0x0559, 0x06D5, 0x0710,
|
||||
0x093D, 0x0950, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C,
|
||||
0x0CDE, 0x0DBD, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EBD, 0x0EC6, 0x0F00,
|
||||
0x1248, 0x1258, 0x1288, 0x12B0, 0x12C0, 0x1310, 0x1F59, 0x1F5B, 0x1F5D, 0x1FBE,
|
||||
0x207F, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3400, 0x4DB5, 0x4E00,
|
||||
0x9FA5, 0xAC00, 0xD7A3, 0xFB1D, 0xFB3E, 0xFE74
|
||||
};
|
||||
|
||||
/**
|
||||
* Character interval starting points for non-letter character
|
||||
* that can be used as a non-first character of an identifier.
|
||||
*
|
||||
* The characters covered by these intervalse are from
|
||||
* the following Unicode categories: Nd, Mn, Mc, Pc
|
||||
*/
|
||||
static const uint16_t unicode_non_letter_ident_part_interv_sps[] JERRY_CONST_DATA =
|
||||
{
|
||||
/*
|
||||
* decimal digits: handled separately
|
||||
* 0x0030, len: 9
|
||||
*/
|
||||
0x0300, 0x0360, 0x0483, 0x0591, 0x05A3, 0x05BB, 0x05C1, 0x064B, 0x0660,
|
||||
0x06D6, 0x06DF, 0x06E7, 0x06EA, 0x06F0, 0x0730, 0x07A6, 0x0901, 0x093E, 0x0951,
|
||||
0x0962, 0x0966, 0x0981, 0x09BE, 0x09C7, 0x09CB, 0x09E2, 0x09E6, 0x0A3E, 0x0A47,
|
||||
0x0A4B, 0x0A66, 0x0A81, 0x0ABE, 0x0AC7, 0x0ACB, 0x0AE6, 0x0B01, 0x0B3E, 0x0B47,
|
||||
0x0B4B, 0x0B56, 0x0B66, 0x0B82, 0x0BBE, 0x0BC6, 0x0BCA, 0x0BE7, 0x0C01, 0x0C3E,
|
||||
0x0C46, 0x0C4A, 0x0C55, 0x0C66, 0x0C82, 0x0CBE, 0x0CC6, 0x0CCA, 0x0CD5, 0x0CE6,
|
||||
0x0D02, 0x0D3E, 0x0D46, 0x0D4A, 0x0D66, 0x0D82, 0x0DCF, 0x0DD8, 0x0DF2, 0x0E34,
|
||||
0x0E47, 0x0E50, 0x0EB4, 0x0EBB, 0x0EC8, 0x0ED0, 0x0F18, 0x0F20, 0x0F3E, 0x0F71,
|
||||
0x0F86, 0x0F90, 0x0F99, 0x102C, 0x1036, 0x1040, 0x1056, 0x1369, 0x17B4, 0x17E0,
|
||||
0x1810, 0x203F, 0x20D0, 0x302A, 0x3099, 0xFE20, 0xFE33, 0xFE4D, 0xFF10
|
||||
};
|
||||
|
||||
/**
|
||||
* Character interval lengths for non-letter character
|
||||
* that can be used as a non-first character of an identifier.
|
||||
*
|
||||
* The characters covered by these intervalse are from
|
||||
* the following Unicode categories: Nd, Mn, Mc, Pc
|
||||
*/
|
||||
static const uint8_t unicode_non_letter_ident_part_interv_lens[] =
|
||||
{
|
||||
78, 2, 3, 16, 22, 2, 1, 10, 9,
|
||||
6, 5, 1, 3, 9, 26, 10, 2, 15, 3,
|
||||
1, 9, 2, 6, 1, 2, 1, 9, 4, 1,
|
||||
2, 11, 2, 7, 2, 2, 9, 2, 5, 1,
|
||||
2, 1, 9, 1, 4, 2, 3, 8, 2, 6,
|
||||
2, 3, 1, 9, 1, 6, 2, 3, 1, 9,
|
||||
1, 5, 2, 3, 9, 1, 5, 7, 1, 6,
|
||||
7, 9, 5, 1, 5, 9, 1, 9, 1, 19,
|
||||
1, 7, 35, 6, 3, 9, 3, 8, 31, 9,
|
||||
9, 1, 12, 5, 1, 3, 1, 2, 9
|
||||
};
|
||||
|
||||
/**
|
||||
* Those non-letter characters that can be used as a non-first
|
||||
* character of an identifier and not included in any of the intervals
|
||||
* specified in unicode_non_letter_ident_part_intervals array.
|
||||
*
|
||||
* The characters are from the following Unicode categories:
|
||||
* Nd, Mn, Mc, Pc
|
||||
*/
|
||||
static const uint16_t unicode_non_letter_ident_part_chars[] =
|
||||
{
|
||||
0x005F, 0x05BF, 0x05C4, 0x0670, 0x0711, 0x093C, 0x09BC, 0x09D7, 0x0A02, 0x0A3C,
|
||||
0x0ABC, 0x0B3C, 0x0BD7, 0x0D57, 0x0DCA, 0x0DD6, 0x0E31, 0x0EB1, 0x0F35, 0x0F37,
|
||||
0x0F39, 0x0FC6, 0x18A9, 0x20E1, 0x30FB, 0xFB1E, 0xFF3F, 0xFF65
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Unicode separator character interval strting points from Unicode category: Zs
|
||||
*/
|
||||
static const uint16_t unicode_separator_char_interv_sps[] =
|
||||
{
|
||||
0x2000
|
||||
};
|
||||
|
||||
/**
|
||||
* Unicode separator character interval lengths from Unicode category: Zs
|
||||
*/
|
||||
static const uint8_t unicode_separator_char_interv_lens[] =
|
||||
{
|
||||
11
|
||||
};
|
||||
|
||||
/**
|
||||
* Unicode separator characters that are not in the
|
||||
* unicode_separator_char_intervals array.
|
||||
*
|
||||
* Unicode category: Zs
|
||||
*/
|
||||
static const uint16_t unicode_separator_chars[] =
|
||||
{
|
||||
/*
|
||||
* these two chars are handled separatly @see lit_char_is_space_separator
|
||||
* 0x0020, space
|
||||
* 0x00A0, non-braking space
|
||||
*/
|
||||
0x1680, \
|
||||
0x180E, /* manually added */ \
|
||||
0x202F, /* manually added */ \
|
||||
0x205F, \
|
||||
0x3000
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue