Import of the watch repository from Pebble

This commit is contained in:
Matthieu Jeanson 2024-12-12 16:43:03 -08:00 committed by Katharine Berry
commit 3b92768480
10334 changed files with 2564465 additions and 0 deletions

View file

@ -0,0 +1,514 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lit-char-helpers.h"
#include "lit/lit-unicode-ranges.inc.h"
#include "lit-strings.h"
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
/**
* Binary search algorithm that searches the a
* character in the given char array.
*
* @return true - if the character is in the given array
* false - otherwise
*/
static bool
search_char_in_char_array (ecma_char_t c, /**< code unit */
const ecma_char_t *array, /**< array */
int size_of_array) /**< length of the array */
{
int bottom = 0;
int top = size_of_array - 1;
while (bottom <= top)
{
int middle = (bottom + top) / 2;
ecma_char_t current = array[middle];
if (current == c)
{
return true;
}
if (c < current)
{
top = middle - 1;
}
else
{
bottom = middle + 1;
}
}
return false;
} /* search_char_in_char_array */
/**
* Binary search algorithm that searches a character in the given intervals.
* Intervals specifed by two arrays. The first one contains the starting points
* of the intervals, the second one contains the length of them.
*
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
* false - otherwise
*/
static bool
search_char_in_interval_array (ecma_char_t c, /**< code unit */
const ecma_char_t *array_sp, /**< array of interval starting points */
const uint8_t *lengths, /**< array of interval lengths */
int size_of_array) /**< length of the array */
{
int bottom = 0;
int top = size_of_array - 1;
while (bottom <= top)
{
int middle = (bottom + top) / 2;
ecma_char_t current_sp = array_sp[middle];
if (current_sp <= c && c <= current_sp + lengths[middle])
{
return true;
}
if (c > current_sp)
{
bottom = middle + 1;
}
else
{
top = middle - 1;
}
}
return false;
} /* search_char_in_interval_array */
/**
* Check if specified character is one of the Format-Control characters
*
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 1,
* false - otherwise.
*/
bool
lit_char_is_format_control (ecma_char_t c) /**< code unit */
{
return (c == LIT_CHAR_ZWNJ
|| c == LIT_CHAR_ZWJ
|| c == LIT_CHAR_BOM);
} /* lit_char_is_format_control */
/**
* Check if specified character is one of the Whitespace characters including those
* that fall into "Space, Separator" ("Zs") Unicode character category.
*
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
* false - otherwise.
*/
bool
lit_char_is_white_space (ecma_char_t c) /**< code unit */
{
if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return (c == LIT_CHAR_TAB
|| c == LIT_CHAR_VTAB
|| c == LIT_CHAR_FF
|| c == LIT_CHAR_SP);
}
else
{
return (c == LIT_CHAR_NBSP
|| c == LIT_CHAR_BOM
|| (c >= unicode_separator_char_interv_sps[0]
&& c <= unicode_separator_char_interv_sps[0] + unicode_separator_char_interv_lens[0])
|| search_char_in_char_array (c, unicode_separator_chars, NUM_OF_ELEMENTS (unicode_separator_chars)));
}
} /* lit_char_is_white_space */
/**
* Check if specified character is one of LineTerminator characters
*
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
* false - otherwise.
*/
bool
lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
{
return (c == LIT_CHAR_LF
|| c == LIT_CHAR_CR
|| c == LIT_CHAR_LS
|| c == LIT_CHAR_PS);
} /* lit_char_is_line_terminator */
/**
* Check if specified character is a unicode letter
*
* Note:
* Unicode letter is a character, included into one of the following categories:
* - Uppercase letter (Lu);
* - Lowercase letter (Ll);
* - Titlecase letter (Lt);
* - Modifier letter (Lm);
* - Other letter (Lo);
* - Letter number (Nl).
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into one of the listed categories,
* false - otherwise.
*/
static bool
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
{
return (search_char_in_interval_array (c, unicode_letter_interv_sps, unicode_letter_interv_lens,
NUM_OF_ELEMENTS (unicode_letter_interv_sps))
|| search_char_in_char_array (c, unicode_letter_chars, NUM_OF_ELEMENTS (unicode_letter_chars)));
} /* lit_char_is_unicode_letter */
/**
* Check if specified character is a non-letter character and can be used as a
* non-first character of an identifier.
* These characters coverd by the following unicode categories:
* - digit (Nd)
* - punctuation mark (Mn, Mc)
* - connector punctuation (Pc)
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into one of the listed categories,
* false - otherwise.
*/
static bool
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
{
return (search_char_in_interval_array (c, unicode_non_letter_ident_part_interv_sps,
unicode_non_letter_ident_part_interv_lens,
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_interv_sps))
|| search_char_in_char_array (c, unicode_non_letter_ident_part_chars,
NUM_OF_ELEMENTS (unicode_non_letter_ident_part_chars)));
} /* lit_char_is_unicode_non_letter_ident_part */
/**
* Checks whether the next UTF8 character is a valid identifier start.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_start (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
{
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return lit_char_is_identifier_start_character (*src_p);
}
return lit_char_is_identifier_start_character (lit_utf8_peek_next (src_p));
} /* lit_char_is_identifier_start */
/**
* Checks whether the character is a valid identifier start.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_start_character (uint16_t chr) /**< EcmaScript character */
{
// Fast path for ASCII-defined letters
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|| chr == LIT_CHAR_DOLLAR_SIGN
|| chr == LIT_CHAR_UNDERSCORE);
}
return lit_char_is_unicode_letter (chr);
} /* lit_char_is_identifier_start_character */
/**
* Checks whether the next UTF8 character is a valid identifier part.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
{
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return lit_char_is_identifier_part_character (*src_p);
}
return lit_char_is_identifier_part_character (lit_utf8_peek_next (src_p));
} /* lit_char_is_identifier_part */
/**
* Checks whether the character is a valid identifier part.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
{
// Fast path for ASCII-defined letters
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|| (chr >= LIT_CHAR_0 && chr <= LIT_CHAR_9)
|| chr == LIT_CHAR_DOLLAR_SIGN
|| chr == LIT_CHAR_UNDERSCORE);
}
return (lit_char_is_unicode_letter (chr)
|| lit_char_is_unicode_non_letter_ident_part (chr));
} /* lit_char_is_identifier_part_character */
/**
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
*
* @return true / false
*/
bool
lit_char_is_octal_digit (ecma_char_t c) /**< code unit */
{
return (c >= LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_OCTAL_DIGITS_END);
} /* lit_char_is_octal_digit */
/**
* Check if specified character is one of DecimalDigit characters (ECMA-262 v5, 7.8.3)
*
* @return true / false
*/
bool
lit_char_is_decimal_digit (ecma_char_t c) /**< code unit */
{
return (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END);
} /* lit_char_is_decimal_digit */
/**
* Check if specified character is one of HexDigit characters (ECMA-262 v5, 7.8.3)
*
* @return true / false
*/
bool
lit_char_is_hex_digit (ecma_char_t c) /**< code unit */
{
return ((c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|| (LEXER_TO_ASCII_LOWERCASE (c) >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
&& LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END));
} /* lit_char_is_hex_digit */
/**
* Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
*
* @return digit value, corresponding to the hex char
*/
uint32_t
lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
* one of HexDigit characters */
{
JERRY_ASSERT (lit_char_is_hex_digit (c));
if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
{
return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
}
else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
{
return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
}
else
{
return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
}
} /* lit_char_hex_to_int */
/**
* Converts a character to UTF8 bytes.
*
* @return length of the UTF8 representation.
*/
size_t
lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
ecma_char_t chr) /**< EcmaScript character */
{
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
{
/* 00000000 0xxxxxxx -> 0xxxxxxx */
*dst_p = (uint8_t) chr;
return 1;
}
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
{
/* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
*(dst_p++) = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
return 2;
}
JERRY_ASSERT (!(chr & ~LIT_UTF8_3_BYTE_CODE_POINT_MAX));
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
*(dst_p++) = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((chr >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
*(dst_p++) = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
return 3;
} /* lit_char_to_utf8_bytes */
/**
* Returns the length of the UTF8 representation of a character.
*
* @return length of the UTF8 representation.
*/
size_t
lit_char_get_utf8_length (ecma_char_t chr) /**< EcmaScript character */
{
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
{
/* 00000000 0xxxxxxx */
return 1;
}
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
{
/* 00000yyy yyxxxxxx */
return 2;
}
/* zzzzyyyy yyxxxxxx */
JERRY_ASSERT (!(chr & ~LIT_UTF8_3_BYTE_CODE_POINT_MAX));
return 3;
} /* lit_char_get_utf8_length */
/**
* Parse the next number_of_characters hexadecimal character,
* and construct a code unit from them. The buffer must
* be zero terminated.
*
* @return true if decoding was successful, false otherwise
*/
bool
lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
ecma_char_ptr_t out_code_unit_p) /**< [out] decoded result */
{
ecma_char_t code_unit = LIT_CHAR_NULL;
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
{
code_unit = (ecma_char_t) (code_unit << 4u);
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
{
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
}
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
{
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
}
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
{
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
}
else
{
return false;
}
buf_p++;
}
*out_code_unit_p = code_unit;
return true;
} /* lit_read_code_unit_from_hex */
/**
* Check if specified character is a word character (part of IsWordChar abstract operation)
*
* See also: ECMA-262 v5, 15.10.2.6 (IsWordChar)
*
* @return true - if the character is a word character
* false - otherwise.
*/
bool
lit_char_is_word_char (ecma_char_t c) /**< code unit */
{
return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|| c == LIT_CHAR_UNDERSCORE);
} /* lit_char_is_word_char */
/**
* Returns the lowercase character sequence of an ecma character.
*
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
*
* @return the length of the lowercase character sequence
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
*/
ecma_length_t
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
ecma_length_t buffer_size) /**< buffer size */
{
/* TODO: Needs a proper lower case implementation. See issue #323. */
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
{
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
return 1;
}
output_buffer_p[0] = character;
return 1;
} /* lit_char_to_lower_case */
/**
* Returns the uppercase character sequence of an ecma character.
*
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
*
* @return the length of the uppercase character sequence
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
*/
ecma_length_t
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
ecma_length_t buffer_size) /**< buffer size */
{
/* TODO: Needs a proper upper case implementation. See issue #323. */
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
{
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
return 1;
}
output_buffer_p[0] = character;
return 1;
} /* lit_char_to_upper_case */

View file

@ -0,0 +1,249 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIT_CHAR_HELPERS_H
#define LIT_CHAR_HELPERS_H
#include "lit-globals.h"
#define LIT_CHAR_UNDEF ((ecma_char_t) 0xFFFF) /* undefined character */
/*
* Format control characters (ECMA-262 v5, Table 1)
*/
#define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */
#define LIT_CHAR_ZWJ ((ecma_char_t) 0x200D) /* zero width joiner */
#define LIT_CHAR_BOM ((ecma_char_t) 0xFEFF) /* byte order mark */
extern bool lit_char_is_format_control (ecma_char_t);
/*
* Whitespace characters (ECMA-262 v5, Table 2)
*/
#define LIT_CHAR_TAB ((ecma_char_t) 0x0009) /* tab */
#define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */
#define LIT_CHAR_FF ((ecma_char_t) 0x000C) /* form feed */
#define LIT_CHAR_SP ((ecma_char_t) 0x0020) /* space */
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
/* LIT_CHAR_BOM is defined above */
extern bool lit_char_is_white_space (ecma_char_t);
/*
* Line terminator characters (ECMA-262 v5, Table 3)
*/
#define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */
#define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */
#define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */
#define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */
extern bool lit_char_is_line_terminator (ecma_char_t);
/*
* String Single Character Escape Sequences (ECMA-262 v5, Table 4)
*/
#define LIT_CHAR_BS ((ecma_char_t) 0x0008) /* backspace */
/* LIT_CHAR_TAB is defined above */
/* LIT_CHAR_LF is defined above */
/* LIT_CHAR_VTAB is defined above */
/* LIT_CHAR_FF is defined above */
/* LIT_CHAR_CR is defined above */
#define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */
#define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */
#define LIT_CHAR_BACKSLASH ((ecma_char_t) '\\') /* reverse solidus (backslash) */
/*
* Comment characters (ECMA-262 v5, 7.4)
*/
#define LIT_CHAR_SLASH ((ecma_char_t) '/') /* solidus */
#define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */
/*
* Identifier name characters (ECMA-262 v5, 7.6)
*/
#define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$') /* dollar sign */
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
/* LIT_CHAR_BACKSLASH defined above */
extern bool lit_char_is_identifier_start (const uint8_t *);
extern bool lit_char_is_identifier_part (const uint8_t *);
extern bool lit_char_is_identifier_start_character (ecma_char_t);
extern bool lit_char_is_identifier_part_character (ecma_char_t);
/*
* Punctuator characters (ECMA-262 v5, 7.7)
*/
#define LIT_CHAR_LEFT_BRACE ((ecma_char_t) '{') /* left curly bracket */
#define LIT_CHAR_RIGHT_BRACE ((ecma_char_t) '}') /* right curly bracket */
#define LIT_CHAR_LEFT_PAREN ((ecma_char_t) '(') /* left parenthesis */
#define LIT_CHAR_RIGHT_PAREN ((ecma_char_t) ')') /* right parenthesis */
#define LIT_CHAR_LEFT_SQUARE ((ecma_char_t) '[') /* left square bracket */
#define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */
#define LIT_CHAR_DOT ((ecma_char_t) '.') /* dot */
#define LIT_CHAR_SEMICOLON ((ecma_char_t) ';') /* semicolon */
#define LIT_CHAR_COMMA ((ecma_char_t) ',') /* comma */
#define LIT_CHAR_LESS_THAN ((ecma_char_t) '<') /* less-than sign */
#define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */
#define LIT_CHAR_EQUALS ((ecma_char_t) '=') /* equals sign */
#define LIT_CHAR_PLUS ((ecma_char_t) '+') /* plus sign */
#define LIT_CHAR_MINUS ((ecma_char_t) '-') /* hyphen-minus */
/* LIT_CHAR_ASTERISK is defined above */
#define LIT_CHAR_PERCENT ((ecma_char_t) '%') /* percent sign */
#define LIT_CHAR_AMPERSAND ((ecma_char_t) '&') /* ampersand */
#define LIT_CHAR_VLINE ((ecma_char_t) '|') /* vertical line */
#define LIT_CHAR_CIRCUMFLEX ((ecma_char_t) '^') /* circumflex accent */
#define LIT_CHAR_EXCLAMATION ((ecma_char_t) '!') /* exclamation mark */
#define LIT_CHAR_TILDE ((ecma_char_t) '~') /* tilde */
#define LIT_CHAR_QUESTION ((ecma_char_t) '?') /* question mark */
#define LIT_CHAR_COLON ((ecma_char_t) ':') /* colon */
/*
* Special characters for String.prototype.replace.
*/
#define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */
/**
* Uppercase ASCII letters
*/
#define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A')
#define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B')
#define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C')
#define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D')
#define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E')
#define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F')
#define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G')
#define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H')
#define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I')
#define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J')
#define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K')
#define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L')
#define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M')
#define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N')
#define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O')
#define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P')
#define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q')
#define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R')
#define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S')
#define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T')
#define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U')
#define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V')
#define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W')
#define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X')
#define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y')
#define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z')
/**
* Lowercase ASCII letters
*/
#define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a')
#define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b')
#define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c')
#define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd')
#define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e')
#define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f')
#define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g')
#define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h')
#define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i')
#define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j')
#define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k')
#define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l')
#define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm')
#define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n')
#define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o')
#define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p')
#define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q')
#define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r')
#define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's')
#define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't')
#define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u')
#define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v')
#define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w')
#define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x')
#define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y')
#define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z')
/**
* ASCII decimal digits
*/
#define LIT_CHAR_0 ((ecma_char_t) '0')
#define LIT_CHAR_1 ((ecma_char_t) '1')
#define LIT_CHAR_2 ((ecma_char_t) '2')
#define LIT_CHAR_3 ((ecma_char_t) '3')
#define LIT_CHAR_4 ((ecma_char_t) '4')
#define LIT_CHAR_5 ((ecma_char_t) '5')
#define LIT_CHAR_6 ((ecma_char_t) '6')
#define LIT_CHAR_7 ((ecma_char_t) '7')
#define LIT_CHAR_8 ((ecma_char_t) '8')
#define LIT_CHAR_9 ((ecma_char_t) '9')
/**
* ASCII character ranges
*/
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters range */
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END LIT_CHAR_UPPERCASE_Z
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters range */
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END LIT_CHAR_LOWERCASE_Z
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters for
* hexadecimal digits range */
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END LIT_CHAR_UPPERCASE_F
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters for
* hexadecimal digits range */
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END LIT_CHAR_LOWERCASE_F
#define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN LIT_CHAR_0 /* octal digits range */
#define LIT_CHAR_ASCII_OCTAL_DIGITS_END LIT_CHAR_7
#define LIT_CHAR_ASCII_DIGITS_BEGIN LIT_CHAR_0 /* decimal digits range */
#define LIT_CHAR_ASCII_DIGITS_END LIT_CHAR_9
#define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP)
extern bool lit_char_is_octal_digit (ecma_char_t);
extern bool lit_char_is_decimal_digit (ecma_char_t);
extern bool lit_char_is_hex_digit (ecma_char_t);
extern uint32_t lit_char_hex_to_int (ecma_char_t);
extern size_t lit_char_to_utf8_bytes (uint8_t *, ecma_char_t);
extern size_t lit_char_get_utf8_length (ecma_char_t);
/* read a hex encoded code point from a zero terminated buffer */
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t);
/**
* Null character
*/
#define LIT_CHAR_NULL ((ecma_char_t) '\0')
/*
* Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
*/
extern bool lit_char_is_word_char (ecma_char_t);
/*
* Utility functions for uppercasing / lowercasing
*/
/**
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
*/
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
ecma_length_t lit_char_to_lower_case (ecma_char_t, ecma_char_t *, ecma_length_t);
ecma_length_t lit_char_to_upper_case (ecma_char_t, ecma_char_t *, ecma_length_t);
#endif /* !LIT_CHAR_HELPERS_H */

View file

@ -0,0 +1,147 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIT_GLOBALS_H
#define LIT_GLOBALS_H
#include "jrt.h"
/**
* ECMAScript standard defines terms "code unit" and "character" as 16-bit unsigned value
* used to represent 16-bit unit of text, this is the same as code unit in UTF-16 (See ECMA-262 5.1 Chapter 6).
*
* The term "code point" or "Unicode character" is used to refer a single Unicode scalar value (may be longer
* than 16 bits: 0x0 - 0x10FFFFF). One code point could be represented with one ore two 16-bit code units.
*
* According to the standard all strings and source text are assumed to be a sequence of code units.
* Length of a string equals to number of code units in the string, which is not the same as number of Unicode
* characters in a string.
*
* Internally JerryScript engine uses UTF-8 representation of strings to reduce memory overhead. Unicode character
* occupies from one to four bytes in UTF-8 representation.
*
* Unicode scalar value | Bytes in UTF-8 | Bytes in UTF-16
* | (internal representation) |
* ----------------------------------------------------------------------
* 0x0 - 0x7F | 1 byte | 2 bytes
* 0x80 - 0x7FF | 2 bytes | 2 bytes
* 0x800 - 0xFFFF | 3 bytes | 2 bytes
* 0x10000 - 0x10FFFF | 4 bytes | 4 bytes
*
* Scalar values from 0xD800 to 0xDFFF are permanently reserved by Unicode standard to encode high and low
* surrogates in UTF-16 (Code points 0x10000 - 0x10FFFF are encoded via pair of surrogates in UTF-16).
* Despite that the official Unicode standard says that no UTF forms can encode these code points, we allow
* them to be encoded inside strings. The reason for that is compatibility with ECMA standard.
*
* For example, assume a string which consists one Unicode character: 0x1D700 (Mathematical Italic Small Epsilon).
* It has the following representation in UTF-16: 0xD835 0xDF00.
*
* ECMA standard allows extracting a substring from this string:
* > var str = String.fromCharCode (0xD835, 0xDF00); // Create a string containing one character: 0x1D700
* > str.length; // 2
* > var str1 = str.substring (0, 1);
* > str1.length; // 1
* > str1.charCodeAt (0); // 55349 (this equals to 0xD835)
*
* Internally original string would be represented in UTF-8 as the following byte sequence: 0xF0 0x9D 0x9C 0x80.
* After substring extraction high surrogate 0xD835 should be encoded via UTF-8: 0xED 0xA0 0xB5.
*
* Pair of low and high surrogates encoded separately should never occur in internal string representation,
* it should be encoded as any code point and occupy 4 bytes. So, when constructing a string from two surrogates,
* it should be processed gracefully;
* > var str1 = String.fromCharCode (0xD835); // 0xED 0xA0 0xB5 - internal representation
* > var str2 = String.fromCharCode (0xDF00); // 0xED 0xBC 0x80 - internal representation
* > var str = str1 + str2; // 0xF0 0x9D 0x9C 0x80 - internal representation,
* // !!! not 0xED 0xA0 0xB5 0xED 0xBC 0x80
*/
/**
* Description of an ecma-character, which represents 16-bit code unit,
* which is equal to UTF-16 character (see Chapter 6 from ECMA-262 5.1)
*/
typedef uint16_t ecma_char_t;
/**
* Description of a collection's/string's length
*/
typedef uint32_t ecma_length_t;
/**
* Description of an ecma-character pointer
*/
typedef ecma_char_t *ecma_char_ptr_t;
/**
* Max bytes needed to represent a code unit (utf-16 char) via utf-8 encoding
*/
#define LIT_UTF8_MAX_BYTES_IN_CODE_UNIT (3)
/**
* Max bytes needed to represent a code point (Unicode character) via utf-8 encoding
*/
#define LIT_UTF8_MAX_BYTES_IN_CODE_POINT (4)
/**
* Max bytes needed to represent a code unit (utf-16 char) via cesu-8 encoding
*/
#define LIT_CESU8_MAX_BYTES_IN_CODE_UNIT (3)
/**
* Max bytes needed to represent a code point (Unicode character) via cesu-8 encoding
*/
#define LIT_CESU8_MAX_BYTES_IN_CODE_POINT (6)
/**
* A byte of utf-8 string
*/
typedef uint8_t lit_utf8_byte_t;
/**
* Size of a utf-8 string in bytes
*/
typedef uint32_t lit_utf8_size_t;
/**
* Size of a magic string in bytes
*/
typedef uint8_t lit_magic_size_t;
/**
* Unicode code point
*/
typedef uint32_t lit_code_point_t;
/**
* ECMA string hash
*/
typedef uint16_t lit_string_hash_t;
/**
* Maximum value of ECMA string hash + 1
*
* Note:
* On ARM, this constant can be encoded as an immediate value
* while 0xffffu cannot be. Hence using this constant reduces
* binary size and improves performance.
*/
#define LIT_STRING_HASH_LIMIT 0x10000u
/**
* Hash of the frequently used "length" string.
*/
#define LIT_STRING_LENGTH_HASH 0x3615u
#endif /* !LIT_GLOBALS_H */

View file

@ -0,0 +1,261 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "jcontext.h"
#include "lit-magic-strings.h"
#include "lit-strings.h"
/**
* Get number of external magic strings
*
* @return number of the strings, if there were registered,
* zero - otherwise.
*/
uint32_t
lit_get_magic_string_ex_count (void)
{
return JERRY_CONTEXT (lit_magic_string_ex_count);
} /* lit_get_magic_string_ex_count */
/**
* Get specified magic string as zero-terminated string
*
* @return pointer to zero-terminated magic string
*/
const lit_utf8_byte_t *
lit_get_magic_string_utf8 (lit_magic_string_id_t id) /**< magic string id */
{
static const lit_utf8_byte_t * const magic_strings[] JERRY_CONST_DATA =
{
#define LIT_MAGIC_STRING_DEF(id, utf8_string) \
(const lit_utf8_byte_t *) utf8_string,
#include "lit-magic-strings.inc.h"
#undef LIT_MAGIC_STRING_DEF
};
JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
return magic_strings[id];
} /* lit_get_magic_string_utf8 */
/**
* Get size of specified magic string
*
* @return size in bytes
*/
lit_utf8_size_t
lit_get_magic_string_size (lit_magic_string_id_t id) /**< magic string id */
{
static const lit_magic_size_t lit_magic_string_sizes[] JERRY_CONST_DATA =
{
#define LIT_MAGIC_STRING_DEF(id, utf8_string) \
sizeof(utf8_string) - 1,
#include "lit-magic-strings.inc.h"
#undef LIT_MAGIC_STRING_DEF
};
JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
return lit_magic_string_sizes[id];
} /* lit_get_magic_string_size */
/**
* Get specified magic string as zero-terminated string from external table
*
* @return pointer to zero-terminated magic string
*/
const lit_utf8_byte_t *
lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t id) /**< extern magic string id */
{
if (JERRY_CONTEXT (lit_magic_string_ex_array) && id < JERRY_CONTEXT (lit_magic_string_ex_count))
{
return JERRY_CONTEXT (lit_magic_string_ex_array)[id];
}
JERRY_UNREACHABLE ();
} /* lit_get_magic_string_ex_utf8 */
/**
* Get size of specified external magic string
*
* @return size in bytes
*/
lit_utf8_size_t
lit_get_magic_string_ex_size (lit_magic_string_ex_id_t id) /**< external magic string id */
{
return JERRY_CONTEXT (lit_magic_string_ex_sizes)[id];
} /* lit_get_magic_string_ex_size */
/**
* Register external magic strings
*/
void
lit_magic_strings_ex_set (const lit_utf8_byte_t **ex_str_items, /**< character arrays, representing
* external magic strings' contents */
uint32_t count, /**< number of the strings */
const lit_utf8_size_t *ex_str_sizes) /**< sizes of the strings */
{
JERRY_ASSERT (ex_str_items != NULL);
JERRY_ASSERT (count > 0);
JERRY_ASSERT (ex_str_sizes != NULL);
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_array) == NULL);
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_count) == 0);
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes) == NULL);
/* Set external magic strings information */
JERRY_CONTEXT (lit_magic_string_ex_array) = ex_str_items;
JERRY_CONTEXT (lit_magic_string_ex_count) = count;
JERRY_CONTEXT (lit_magic_string_ex_sizes) = ex_str_sizes;
#ifndef JERRY_NDEBUG
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
id < JERRY_CONTEXT (lit_magic_string_ex_count);
id = (lit_magic_string_ex_id_t) (id + 1))
{
lit_utf8_size_t string_size = lit_zt_utf8_string_size (lit_get_magic_string_ex_utf8 (id));
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes)[id] == string_size);
JERRY_ASSERT (JERRY_CONTEXT (lit_magic_string_ex_sizes)[id] <= LIT_MAGIC_STRING_LENGTH_LIMIT);
}
#endif /* !JERRY_NDEBUG */
} /* lit_magic_strings_ex_set */
/**
* Check if passed cesu-8 string equals to one of magic strings
* and if equal magic string was found, return it's id in 'out_id_p' argument.
*
* @return true - if magic string equal to passed string was found,
* false - otherwise.
*/
bool
lit_is_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
lit_utf8_size_t string_size, /**< string size in bytes */
lit_magic_string_id_t *out_id_p) /**< [out] magic string's id */
{
/* TODO: Improve performance of search */
for (lit_magic_string_id_t id = (lit_magic_string_id_t) 0;
id < LIT_MAGIC_STRING__COUNT;
id = (lit_magic_string_id_t) (id + 1))
{
if (lit_compare_utf8_string_and_magic_string (string_p, string_size, id))
{
*out_id_p = id;
return true;
}
}
*out_id_p = LIT_MAGIC_STRING__COUNT;
return false;
} /* lit_is_utf8_string_magic */
/**
* Check if passed utf-8 string equals to one of external magic strings
* and if equal magic string was found, return it's id in 'out_id_p' argument.
*
* @return true - if external magic string equal to passed string was found,
* false - otherwise.
*/
bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *string_p, /**< utf-8 string */
lit_utf8_size_t string_size, /**< string size in bytes */
lit_magic_string_ex_id_t *out_id_p) /**< [out] magic string's id */
{
/* TODO: Improve performance of search */
for (lit_magic_string_ex_id_t id = (lit_magic_string_ex_id_t) 0;
id < JERRY_CONTEXT (lit_magic_string_ex_count);
id = (lit_magic_string_ex_id_t) (id + 1))
{
if (lit_compare_utf8_string_and_magic_string_ex (string_p, string_size, id))
{
*out_id_p = id;
return true;
}
}
*out_id_p = JERRY_CONTEXT (lit_magic_string_ex_count);
return false;
} /* lit_is_ex_utf8_string_magic */
/**
* Compare utf-8 string and magic string for equality
*
* @return true if strings are equal
* false otherwise
*/
bool
lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *string_p, /**< utf-8 string */
lit_utf8_size_t string_size, /**< string size in bytes */
lit_magic_string_id_t magic_string_id) /**< magic string's id */
{
return lit_compare_utf8_strings (string_p,
string_size,
lit_get_magic_string_utf8 (magic_string_id),
lit_get_magic_string_size (magic_string_id));
} /* lit_compare_utf8_string_and_magic_string */
/**
* Compare utf-8 string and external magic string for equality
*
* @return true if strings are equal
* false otherwise
*/
bool
lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *string_p, /**< utf-8 string */
lit_utf8_size_t string_size, /**< string size in bytes */
lit_magic_string_ex_id_t magic_string_ex_id) /**< external magic string's
* id */
{
return lit_compare_utf8_strings (string_p,
string_size,
lit_get_magic_string_ex_utf8 (magic_string_ex_id),
lit_get_magic_string_ex_size (magic_string_ex_id));
} /* lit_compare_utf8_string_and_magic_string_ex */
/**
* Copy magic string to buffer
*
* Warning:
* the routine requires that buffer size is enough
*
* @return pointer to the byte next to the last copied in the buffer
*/
extern lit_utf8_byte_t *
lit_copy_magic_string_to_buffer (lit_magic_string_id_t id, /**< magic string id */
lit_utf8_byte_t *buffer_p, /**< destination buffer */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
const lit_utf8_byte_t *magic_string_bytes_p = lit_get_magic_string_utf8 (id);
lit_utf8_size_t magic_string_bytes_count = lit_get_magic_string_size (id);
const lit_utf8_byte_t *str_iter_p = magic_string_bytes_p;
lit_utf8_byte_t *buf_iter_p = buffer_p;
lit_utf8_size_t bytes_copied = 0;
while (magic_string_bytes_count--)
{
bytes_copied ++;
JERRY_ASSERT (bytes_copied <= buffer_size);
*buf_iter_p++ = *str_iter_p++;
}
return buf_iter_p;
} /* lit_copy_magic_string_to_buffer */

View file

@ -0,0 +1,66 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIT_MAGIC_STRINGS_H
#define LIT_MAGIC_STRINGS_H
#include "lit-globals.h"
/**
* Limit for magic string length
*/
#define LIT_MAGIC_STRING_LENGTH_LIMIT 32
/**
* Identifiers of ECMA and implementation-defined magic string constants
*/
typedef enum
{
#define LIT_MAGIC_STRING_DEF(id, ascii_zt_string) \
id,
#include "lit-magic-strings.inc.h"
#undef LIT_MAGIC_STRING_DEF
LIT_MAGIC_STRING__COUNT, /**< number of magic strings */
LIT_MAGIC_STRING__FORCE_LARGE = INT32_MAX,
} lit_magic_string_id_t;
/**
* Identifiers of implementation-defined external magic string constants
*/
typedef uint32_t lit_magic_string_ex_id_t;
extern uint32_t lit_get_magic_string_ex_count (void);
extern const lit_utf8_byte_t *lit_get_magic_string_utf8 (lit_magic_string_id_t);
extern lit_utf8_size_t lit_get_magic_string_size (lit_magic_string_id_t);
extern const lit_utf8_byte_t *lit_get_magic_string_ex_utf8 (lit_magic_string_ex_id_t);
extern lit_utf8_size_t lit_get_magic_string_ex_size (lit_magic_string_ex_id_t);
extern void lit_magic_strings_ex_set (const lit_utf8_byte_t **, uint32_t, const lit_utf8_size_t *);
extern bool lit_is_utf8_string_magic (const lit_utf8_byte_t *, lit_utf8_size_t, lit_magic_string_id_t *);
extern bool lit_is_ex_utf8_string_magic (const lit_utf8_byte_t *, lit_utf8_size_t, lit_magic_string_ex_id_t *);
extern bool lit_compare_utf8_string_and_magic_string (const lit_utf8_byte_t *, lit_utf8_size_t,
lit_magic_string_id_t);
extern bool lit_compare_utf8_string_and_magic_string_ex (const lit_utf8_byte_t *, lit_utf8_size_t,
lit_magic_string_ex_id_t);
extern lit_utf8_byte_t *lit_copy_magic_string_to_buffer (lit_magic_string_id_t, lit_utf8_byte_t *, lit_utf8_size_t);
#endif /* !LIT_MAGIC_STRINGS_H */

View file

@ -0,0 +1,260 @@
/* Copyright 2014-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* List of ECMA magic strings
*
* These strings must be ascii strings. If non-ascii strings
* will be ever needed, a divider will be added to separate
* ascii and non-ascii groups.
*/
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARGUMENTS, "arguments")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVAL, "eval")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PROTOTYPE, "prototype")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONSTRUCTOR, "constructor")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALLER, "caller")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALLEE, "callee")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNDEFINED, "undefined")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NULL, "null")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FALSE, "false")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TRUE, "true")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BOOLEAN, "boolean")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NUMBER, "number")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING, "string")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_OBJECT, "object")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FUNCTION, "function")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LENGTH, "length")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOURCE, "source")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GLOBAL, "global")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IGNORECASE_UL, "ignoreCase")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MULTILINE, "multiline")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX, "index")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INPUT, "input")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LASTINDEX_UL, "lastIndex")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NAN, "NaN")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INFINITY_UL, "Infinity")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEGATIVE_INFINITY_UL, "-Infinity")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNDEFINED_UL, "Undefined")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NULL_UL, "Null")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_OBJECT_UL, "Object")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FUNCTION_UL, "Function")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARRAY_UL, "Array")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ARGUMENTS_UL, "Arguments")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING_UL, "String")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BOOLEAN_UL, "Boolean")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NUMBER_UL, "Number")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DATE_UL, "Date")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REGEXP_UL, "RegExp")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REGEXP_SOURCE_UL, "Source")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ERROR_UL, "Error")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVAL_ERROR_UL, "EvalError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RANGE_ERROR_UL, "RangeError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REFERENCE_ERROR_UL, "ReferenceError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SYNTAX_ERROR_UL, "SyntaxError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TYPE_ERROR_UL, "TypeError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_URI_ERROR_UL, "URIError")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MATH_UL, "Math")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JSON_U, "JSON")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRINGIFY, "stringify")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE, "parse")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE_INT, "parseInt")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE_FLOAT, "parseFloat")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_NAN, "isNaN")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_FINITE, "isFinite")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI, "decodeURI")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI_COMPONENT, "decodeURIComponent")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI, "encodeURI")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI_COMPONENT, "encodeURIComponent")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNESCAPE, "unescape")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_PROTOTYPE_OF_UL, "getPrototypeOf")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_DESCRIPTOR_UL, "getOwnPropertyDescriptor")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_NAMES_UL, "getOwnPropertyNames")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CREATE, "create")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DEFINE_PROPERTY_UL, "defineProperty")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DEFINE_PROPERTIES_UL, "defineProperties")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SEAL, "seal")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FREEZE, "freeze")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PREVENT_EXTENSIONS_UL, "preventExtensions")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_SEALED_UL, "isSealed")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_FROZEN_UL, "isFrozen")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_EXTENSIBLE, "isExtensible")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_KEYS, "keys")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_WRITABLE, "writable")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENUMERABLE, "enumerable")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONFIGURABLE, "configurable")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_VALUE, "value")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET, "get")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET, "set")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_E_U, "E")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LN10_U, "LN10")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LN2_U, "LN2")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG2E_U, "LOG2E")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG10E_U, "LOG10E")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PI_U, "PI")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT1_2_U, "SQRT1_2")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT2_U, "SQRT2")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ABS, "abs")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ACOS, "acos")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ASIN, "asin")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ATAN, "atan")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ATAN2, "atan2")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CEIL, "ceil")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COS, "cos")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EXP, "exp")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLOOR, "floor")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOG, "log")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAX, "max")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MIN, "min")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POW, "pow")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RANDOM, "random")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ROUND, "round")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SIN, "sin")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SQRT, "sqrt")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TAN, "tan")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FROM_CHAR_CODE_UL, "fromCharCode")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_ARRAY_UL, "isArray")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_STRING_UL, "toString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_VALUE_OF_UL, "valueOf")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_STRING_UL, "toLocaleString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_HAS_OWN_PROPERTY_UL, "hasOwnProperty")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_PROTOTYPE_OF_UL, "isPrototypeOf")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PROPERTY_IS_ENUMERABLE_UL, "propertyIsEnumerable")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CONCAT, "concat")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POP, "pop")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JOIN, "join")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PUSH, "push")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REVERSE, "reverse")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SHIFT, "shift")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLICE, "slice")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SORT, "sort")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLICE, "splice")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNSHIFT, "unshift")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX_OF_UL, "indexOf")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LAST_INDEX_OF_UL, "lastIndexOf")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVERY, "every")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOME, "some")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FOR_EACH_UL, "forEach")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAP, "map")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FILTER, "filter")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REDUCE, "reduce")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REDUCE_RIGHT_UL, "reduceRight")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CHAR_AT_UL, "charAt")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CHAR_CODE_AT_UL, "charCodeAt")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, "localeCompare")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MATCH, "match")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REPLACE, "replace")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SEARCH, "search")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLIT, "split")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SUBSTR, "substr")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SUBSTRING, "substring")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, "toLowerCase")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_LOWER_CASE_UL, "toLocaleLowerCase")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_UPPER_CASE_UL, "toUpperCase")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_UPPER_CASE_UL, "toLocaleUpperCase")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TRIM, "trim")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_FIXED_UL, "toFixed")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_EXPONENTIAL_UL, "toExponential")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_PRECISION_UL, "toPrecision")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NOW, "now")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_DATE_STRING_UL, "toDateString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_TIME_STRING_UL, "toTimeString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_DATE_STRING_UL, "toLocaleDateString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_LOCALE_TIME_STRING_UL, "toLocaleTimeString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_TIME_UL, "getTime")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_FULL_YEAR_UL, "getFullYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UTC_U, "UTC")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_FULL_YEAR_UL, "getUTCFullYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_YEAR_UL, "getYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MONTH_UL, "getMonth")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MONTH_UL, "getUTCMonth")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DATE_UL, "getDate")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_DATE_UL, "getUTCDate")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DAY_UL, "getDay")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_DAY_UL, "getUTCDay")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_HOURS_UL, "getHours")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_HOURS_UL, "getUTCHours")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MINUTES_UL, "getMinutes")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MINUTES_UL, "getUTCMinutes")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_SECONDS_UL, "getSeconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_SECONDS_UL, "getUTCSeconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MILLISECONDS_UL, "getMilliseconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_MILLISECONDS_UL, "getUTCMilliseconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_TIMEZONE_OFFSET_UL, "getTimezoneOffset")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_TIME_UL, "setTime")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MILLISECONDS_UL, "setMilliseconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MILLISECONDS_UL, "setUTCMilliseconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_SECONDS_UL, "setSeconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_SECONDS_UL, "setUTCSeconds")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MINUTES_UL, "setMinutes")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MINUTES_UL, "setUTCMinutes")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_HOURS_UL, "setHours")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_HOURS_UL, "setUTCHours")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_DATE_UL, "setDate")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_DATE_UL, "setUTCDate")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_MONTH_UL, "setMonth")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_MONTH_UL, "setUTCMonth")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_FULL_YEAR_UL, "setFullYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_UTC_FULL_YEAR_UL, "setUTCFullYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SET_YEAR_UL, "setYear")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_UTC_STRING_UL, "toUTCString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_ISO_STRING_UL, "toISOString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_GMT_STRING_UL, "toGMTString")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_JSON_UL, "toJSON")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MAX_VALUE_U, "MAX_VALUE")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MIN_VALUE_U, "MIN_VALUE")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_POSITIVE_INFINITY_U, "POSITIVE_INFINITY")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEGATIVE_INFINITY_U, "NEGATIVE_INFINITY")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INVALID_DATE_UL, "Invalid Date")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_APPLY, "apply")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CALL, "call")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BIND, "bind")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMPILE, "compile")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EXEC, "exec")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TEST, "test")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NAME, "name")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MESSAGE, "message")
#ifndef CONFIG_DISABLE_PRINT_BUILTIN
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PRINT, "print")
#endif
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_G_CHAR, "g")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_I_CHAR, "i")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_M_CHAR, "m")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TIME_SEP_U, "T")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_Z_CHAR, "Z")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLASH_CHAR, "/")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_BACKSLASH_CHAR, "\\")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP, "(?:)")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_SQUARE_CHAR, "[")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_SQUARE_CHAR, "]")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_BRACE_CHAR, "{")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_BRACE_CHAR, "}")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LEFT_PARENTHESIS_CHAR, "(")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_PARENTHESIS_CHAR, ")")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MINUS_CHAR, "-")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COLON_CHAR, ":")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMMA_CHAR, ",")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOT_CHAR, ".")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR, "\"")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_NEW_LINE_CHAR, "\n")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPACE_CHAR, " ")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING__EMPTY, "")
/*
* Implementation-defined magic strings
*/
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_JERRY_UL, "Jerry")
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING__FUNCTION_TO_STRING, "function(){/* ecmascript */}")

View file

@ -0,0 +1,966 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lit-strings.h"
#include "jrt-libc-includes.h"
/**
* Validate utf-8 string
*
* NOTE:
* Isolated surrogates are allowed.
* Correct pair of surrogates is not allowed, it should be represented as 4-byte utf-8 character.
*
* @return true if utf-8 string is well-formed
* false otherwise
*/
bool
lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_utf8_size_t buf_size) /**< string size */
{
lit_utf8_size_t idx = 0;
bool is_prev_code_point_high_surrogate = false;
while (idx < buf_size)
{
lit_utf8_byte_t c = utf8_buf_p[idx++];
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
is_prev_code_point_high_surrogate = false;
continue;
}
lit_code_point_t code_point = 0;
lit_code_point_t min_code_point = 0;
lit_utf8_size_t extra_bytes_count;
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
extra_bytes_count = 1;
min_code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN;
code_point = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
}
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
{
extra_bytes_count = 2;
min_code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN;
code_point = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
}
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
extra_bytes_count = 3;
min_code_point = LIT_UTF8_4_BYTE_CODE_POINT_MIN;
code_point = ((uint32_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
}
else
{
/* utf-8 string could not contain 5- and 6-byte sequences. */
return false;
}
if (idx + extra_bytes_count > buf_size)
{
/* utf-8 string breaks in the middle */
return false;
}
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
{
c = utf8_buf_p[idx + offset];
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
{
/* invalid continuation byte */
return false;
}
code_point <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
code_point |= (c & LIT_UTF8_LAST_6_BITS_MASK);
}
if (code_point < min_code_point
|| code_point > LIT_UNICODE_CODE_POINT_MAX)
{
/* utf-8 string doesn't encode valid unicode code point */
return false;
}
if (code_point >= LIT_UTF16_HIGH_SURROGATE_MIN
&& code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
{
is_prev_code_point_high_surrogate = true;
}
else if (code_point >= LIT_UTF16_LOW_SURROGATE_MIN
&& code_point <= LIT_UTF16_LOW_SURROGATE_MAX
&& is_prev_code_point_high_surrogate)
{
/* sequence of high and low surrogate is not allowed */
return false;
}
else
{
is_prev_code_point_high_surrogate = false;
}
idx += extra_bytes_count;
}
return true;
} /* lit_is_utf8_string_valid */
/**
* Validate cesu-8 string
*
* @return true if cesu-8 string is well-formed
* false otherwise
*/
bool
lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_utf8_size_t buf_size) /**< string size */
{
lit_utf8_size_t idx = 0;
while (idx < buf_size)
{
lit_utf8_byte_t c = utf8_buf_p[idx++];
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
continue;
}
lit_code_point_t code_point = 0;
lit_code_point_t min_code_point = 0;
lit_utf8_size_t extra_bytes_count;
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
extra_bytes_count = 1;
min_code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN;
code_point = ((uint32_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
}
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
{
extra_bytes_count = 2;
min_code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN;
code_point = ((uint32_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
}
else
{
return false;
}
if (idx + extra_bytes_count > buf_size)
{
/* cesu-8 string breaks in the middle */
return false;
}
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
{
c = utf8_buf_p[idx + offset];
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
{
/* invalid continuation byte */
return false;
}
code_point <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
code_point |= (c & LIT_UTF8_LAST_6_BITS_MASK);
}
if (code_point < min_code_point)
{
/* cesu-8 string doesn't encode valid unicode code point */
return false;
}
idx += extra_bytes_count;
}
return true;
} /* lit_is_cesu8_string_valid */
/**
* Check if the code point is UTF-16 low surrogate
*
* @return true / false
*/
bool
lit_is_code_point_utf16_low_surrogate (lit_code_point_t code_point) /**< code point */
{
return LIT_UTF16_LOW_SURROGATE_MIN <= code_point && code_point <= LIT_UTF16_LOW_SURROGATE_MAX;
} /* lit_is_code_point_utf16_low_surrogate */
/**
* Check if the code point is UTF-16 high surrogate
*
* @return true / false
*/
bool
lit_is_code_point_utf16_high_surrogate (lit_code_point_t code_point) /**< code point */
{
return LIT_UTF16_HIGH_SURROGATE_MIN <= code_point && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX;
} /* lit_is_code_point_utf16_high_surrogate */
/**
* Represents code point (>0xFFFF) as surrogate pair and returns its lower part
*
* @return lower code_unit of the surrogate pair
*/
static ecma_char_t
convert_code_point_to_low_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
{
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
ecma_char_t code_unit_bits;
code_unit_bits = (ecma_char_t) (code_point & LIT_UTF16_LAST_10_BITS_MASK);
return (ecma_char_t) (LIT_UTF16_LOW_SURROGATE_MARKER | code_unit_bits);
} /* convert_code_point_to_low_surrogate */
/**
* Represents code point (>0xFFFF) as surrogate pair and returns its higher part
*
* @return higher code_unit of the surrogate pair
*/
static ecma_char_t
convert_code_point_to_high_surrogate (lit_code_point_t code_point) /**< code point, should be > 0xFFFF */
{
JERRY_ASSERT (code_point > LIT_UTF16_CODE_UNIT_MAX);
JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
ecma_char_t code_unit_bits;
code_unit_bits = (ecma_char_t) ((code_point - LIT_UTF16_FIRST_SURROGATE_CODE_POINT) >> LIT_UTF16_BITS_IN_SURROGATE);
return (LIT_UTF16_HIGH_SURROGATE_MARKER | code_unit_bits);
} /* convert_code_point_to_high_surrogate */
/**
*
* @return byte count required to represent the code point
*/
lit_utf8_size_t
lit_get_codepoint_utf8_size(lit_code_point_t code_point) {
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return 1;
}
else if (code_point <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
{
return 2;
}
else if (code_point <= LIT_UTF8_3_BYTE_CODE_POINT_MAX)
{
return 3;
}
else
{
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MAX);
return 4;
}
}
/**
* Handle surrogate code point in CESU-8 string and increase the pointer by one code unit.
* When a complete surrogate code point is found, the callback will be called.
*
* NOTE:
* Half surrogate pairs will be ignored and "dropped" silently.
*
* @return required size for the UTF-8 buffer
*/
static lit_utf8_size_t
lit_cesu8_inc_and_handle_surrogate(const lit_utf8_byte_t **cesu8_str_p,
ecma_char_t *high_surrogate_ch,
void(*surrogate_pair_cb)(const lit_code_point_t cp, void *ctx),
void(*non_surrogate_ch_cb)(const lit_code_point_t cp, void *ctx),
void *ctx) {
ecma_char_t ch = 0;
lit_utf8_size_t surrogate_size = 0;
const lit_utf8_size_t codepoint_sz = lit_read_code_unit_from_utf8(*cesu8_str_p, &ch);
*cesu8_str_p += codepoint_sz;
if (lit_is_code_point_utf16_high_surrogate(ch)) {
*high_surrogate_ch = ch;
return codepoint_sz;
}
else if (lit_is_code_point_utf16_low_surrogate(ch)) {
surrogate_size = codepoint_sz;
if (0 != *high_surrogate_ch) {
const lit_code_point_t cp = lit_convert_surrogate_pair_to_code_point(*high_surrogate_ch, ch);
surrogate_pair_cb(cp, ctx);
}
} else if (non_surrogate_ch_cb) {
non_surrogate_ch_cb((lit_code_point_t)ch, ctx);
}
*high_surrogate_ch = 0;
return surrogate_size;
}
/**
* Helper/callback for lit_utf8_string_size_from_cesu8_string() implementation.
*/
static void lit_cesu8_handle_surrogates_utf8_size_callback(const lit_code_point_t cp,
void *ctx) {
lit_utf8_size_t *utf8_buf_size = (lit_utf8_size_t *)ctx;
*utf8_buf_size += lit_get_codepoint_utf8_size(cp);
}
/**
* Calculate the required size for a buffer to contain the UTF-8 encoded data, given a CESU-8
* encoded string.
*
* NOTE:
* Half surrogate pairs will be ignored and "dropped" silently.
*
* @return required size for the UTF-8 buffer
*/
lit_utf8_size_t
lit_utf8_string_size_from_cesu8_string (const lit_utf8_byte_t *cesu8_str_p, /**< cesu-8 string */
lit_utf8_size_t cesu8_buf_size) /**< cesu-8 string size */
{
lit_utf8_size_t utf8_buf_size = cesu8_buf_size;
ecma_char_t high_surrogate_ch = LIT_UNICODE_CODE_POINT_NULL;
const lit_utf8_byte_t *const end = cesu8_str_p + cesu8_buf_size;
while (cesu8_str_p < end) {
if ((*cesu8_str_p & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER) {
++cesu8_str_p;
high_surrogate_ch = 0;
continue;
}
utf8_buf_size -=
lit_cesu8_inc_and_handle_surrogate(&cesu8_str_p, &high_surrogate_ch,
lit_cesu8_handle_surrogates_utf8_size_callback, NULL,
&utf8_buf_size);
}
return utf8_buf_size;
} /** lit_utf8_string_size_from_cesu8_string */
typedef struct {
lit_utf8_byte_t **utf8_str_ptr_ptr;
const lit_utf8_byte_t *utf8_end;
bool is_buffer_too_small;
} Cesu8ToUtf8CbData;
/**
* Helper/callback for lit_utf8_string_convert_from_cesu8() implementation.
*/
static void lit_cesu8_handle_surrogates_utf8_convert_callback(const lit_code_point_t cp,
void *ctx) {
Cesu8ToUtf8CbData *cb_data = (Cesu8ToUtf8CbData *)ctx;
lit_utf8_byte_t **utf8_str_p = cb_data->utf8_str_ptr_ptr;
if (*utf8_str_p + lit_get_codepoint_utf8_size(cp) > cb_data->utf8_end) {
cb_data->is_buffer_too_small = true;
return;
}
*utf8_str_p += lit_code_point_to_utf8(cp, *utf8_str_p);
}
/**
* Copy & convert a CESU-8 encoded string into a UTF-8 encoded string.
*
* NOTE:
* Half surrogate pairs will be ignored and "dropped" silently.
*
* @return number of bytes of UTF-8 data written
*/
lit_utf8_size_t
lit_utf8_string_convert_from_cesu8 (const lit_utf8_byte_t *cesu8_str_p, /**< cesu-8 string */
lit_utf8_size_t cesu8_buf_size, /**< cesu-8 buffer size */
lit_utf8_byte_t *utf8_str_p, /**< [out] utf-8 buffer */
lit_utf8_size_t utf8_buf_size) /**< utf-8 buffer size */
{
ecma_char_t high_surrogate_ch = LIT_UNICODE_CODE_POINT_NULL;
const lit_utf8_byte_t *const cesu8_end = cesu8_str_p + cesu8_buf_size;
lit_utf8_byte_t *const begin = utf8_str_p;
const lit_utf8_byte_t *const utf8_end = utf8_str_p + utf8_buf_size;
Cesu8ToUtf8CbData cb_data = {
.utf8_str_ptr_ptr = &utf8_str_p,
.utf8_end = utf8_end,
.is_buffer_too_small = false,
};
while (cesu8_str_p < cesu8_end && *cesu8_str_p) {
const lit_utf8_byte_t byte = *cesu8_str_p;
if ((byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER) {
if (utf8_str_p >= utf8_end) {
return 0;
}
*(utf8_str_p++) = byte;
++cesu8_str_p;
high_surrogate_ch = 0;
continue;
}
lit_cesu8_inc_and_handle_surrogate(&cesu8_str_p, &high_surrogate_ch,
lit_cesu8_handle_surrogates_utf8_convert_callback,
lit_cesu8_handle_surrogates_utf8_convert_callback,
&cb_data);
if (cb_data.is_buffer_too_small) {
return 0;
}
}
return (lit_utf8_size_t) (utf8_str_p - begin);
} /** lit_utf8_string_size_from_cesu8_string */
/**
* Calculate size of a zero-terminated utf-8 string
*
* NOTE:
* string should not contain zero characters in the middel
*
* @return size of a string
*/
lit_utf8_size_t
lit_zt_utf8_string_size (const lit_utf8_byte_t *utf8_str_p) /**< zero-terminated utf-8 string */
{
return (lit_utf8_size_t) strlen ((const char *) utf8_str_p);
} /* lit_zt_utf8_string_size */
/**
* Calculate length of a cesu-8 encoded string
*
* @return UTF-16 code units count
*/
ecma_length_t
lit_utf8_string_length (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_utf8_size_t utf8_buf_size) /**< string size */
{
ecma_length_t length = 0;
lit_utf8_size_t size = 0;
while (size < utf8_buf_size)
{
size += lit_get_unicode_char_size_by_utf8_first_byte (*(utf8_buf_p + size));
length++;
}
JERRY_ASSERT (size == utf8_buf_size);
return length;
} /* lit_utf8_string_length */
/**
* Decodes a unicode code point from non-empty utf-8-encoded buffer
*
* @return number of bytes occupied by code point in the string
*/
lit_utf8_size_t
lit_read_code_point_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t buf_size, /**< size of the buffer in bytes */
lit_code_point_t *code_point) /**< [out] code point */
{
JERRY_ASSERT (buf_p && buf_size);
lit_utf8_byte_t c = buf_p[0];
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
*code_point = (lit_code_point_t) (c & LIT_UTF8_LAST_7_BITS_MASK);
return 1;
}
lit_code_point_t ret = LIT_UNICODE_CODE_POINT_NULL;
ecma_length_t bytes_count = 0;
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
bytes_count = 2;
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
}
else if ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
{
bytes_count = 3;
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
}
else if ((c & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
bytes_count = 4;
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_3_BITS_MASK));
}
else
{
JERRY_ASSERT (false);
}
JERRY_ASSERT (buf_size >= bytes_count);
for (uint32_t i = 1; i < bytes_count; ++i)
{
ret <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
ret |= (buf_p[i] & LIT_UTF8_LAST_6_BITS_MASK);
}
*code_point = ret;
return bytes_count;
} /* lit_read_code_point_from_utf8 */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return number of bytes occupied by code point in the string
*/
lit_utf8_size_t
lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
ecma_char_t *code_point) /**< [out] code point */
{
JERRY_ASSERT (buf_p);
lit_utf8_byte_t c = buf_p[0];
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
*code_point = (ecma_char_t) (c & LIT_UTF8_LAST_7_BITS_MASK);
return 1;
}
lit_code_point_t ret = LIT_UNICODE_CODE_POINT_NULL;
ecma_length_t bytes_count;
if ((c & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
bytes_count = 2;
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_5_BITS_MASK));
}
else
{
JERRY_ASSERT ((c & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
bytes_count = 3;
ret = ((lit_code_point_t) (c & LIT_UTF8_LAST_4_BITS_MASK));
}
for (uint32_t i = 1; i < bytes_count; ++i)
{
ret <<= LIT_UTF8_BITS_IN_EXTRA_BYTES;
ret |= (buf_p[i] & LIT_UTF8_LAST_6_BITS_MASK);
}
JERRY_ASSERT (ret <= LIT_UTF16_CODE_UNIT_MAX);
*code_point = (ecma_char_t) ret;
return bytes_count;
} /* lit_read_code_unit_from_utf8 */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return number of bytes occupied by code point in the string
*/
lit_utf8_size_t
lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
ecma_char_t *code_point) /**< [out] code point */
{
JERRY_ASSERT (buf_p);
lit_utf8_decr (&buf_p);
return lit_read_code_unit_from_utf8 (buf_p, code_point);
} /* lit_read_prev_code_unit_from_utf8 */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return next code unit
*/
ecma_char_t
lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
ecma_char_t ch;
*buf_p += lit_read_code_unit_from_utf8 (*buf_p, &ch);
return ch;
} /* lit_utf8_read_next */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return previous code unit
*/
ecma_char_t
lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
ecma_char_t ch;
lit_utf8_decr (buf_p);
lit_read_code_unit_from_utf8 (*buf_p, &ch);
return ch;
} /* lit_utf8_read_prev */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return next code unit
*/
ecma_char_t
lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (buf_p);
ecma_char_t ch;
lit_read_code_unit_from_utf8 (buf_p, &ch);
return ch;
} /* lit_utf8_peek_next */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
*
* @return previous code unit
*/
ecma_char_t
lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (buf_p);
ecma_char_t ch;
lit_read_prev_code_unit_from_utf8 (buf_p, &ch);
return ch;
} /* lit_utf8_peek_prev */
/**
* Increase cesu-8 encoded string pointer by one code unit.
*/
void
lit_utf8_incr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
*buf_p += lit_get_unicode_char_size_by_utf8_first_byte (**buf_p);
} /* lit_utf8_incr */
/**
* Decrease cesu-8 encoded string pointer by one code unit.
*/
void
lit_utf8_decr (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
const lit_utf8_byte_t *current_p = *buf_p;
do
{
current_p--;
}
while ((*(current_p) & LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_EXTRA_BYTE_MARKER);
*buf_p = current_p;
} /* lit_utf8_decr */
/**
* Calc hash using the specified hash_basis.
*
* NOTE:
* This is implementation of FNV-1a hash function, which is released into public domain.
* Constants used, are carefully picked primes by the authors.
* More info: http://www.isthe.com/chongo/tech/comp/fnv/
*
* @return ecma-string's hash
*/
inline lit_string_hash_t __attr_always_inline___
lit_utf8_string_hash_combine (lit_string_hash_t hash_basis, /**< hash to be combined with */
const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */
lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */
{
JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0);
uint32_t hash = hash_basis;
for (uint32_t i = 0; i < utf8_buf_size; i++)
{
// 16777619 is 32 bit FNV_prime = 2^24 + 2^8 + 0x93 = 16777619
hash = (hash ^ utf8_buf_p[i]) * 16777619;
}
return (lit_string_hash_t) hash;
} /* lit_utf8_string_hash_combine */
/**
* Calculate hash from the buffer.
*
* @return ecma-string's hash
*/
inline lit_string_hash_t __attr_always_inline___
lit_utf8_string_calc_hash (const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */
lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */
{
JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0);
// 32 bit offset_basis for FNV = 2166136261
return lit_utf8_string_hash_combine ((lit_string_hash_t) 2166136261, utf8_buf_p, utf8_buf_size);
} /* lit_utf8_string_calc_hash */
/**
* Return code unit at the specified position in string
*
* NOTE:
* code_unit_offset should be less then string's length
*
* @return code unit value
*/
ecma_char_t
lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_utf8_size_t utf8_buf_size, /**< string size in bytes */
ecma_length_t code_unit_offset) /**< ofset of a code_unit */
{
lit_utf8_byte_t *current_p = (lit_utf8_byte_t *) utf8_buf_p;
ecma_char_t code_unit;
do
{
JERRY_ASSERT (current_p < utf8_buf_p + utf8_buf_size);
current_p += lit_read_code_unit_from_utf8 (current_p, &code_unit);
}
while (code_unit_offset--);
return code_unit;
} /* lit_utf8_string_code_unit_at */
/**
* Get CESU-8 encoded size of character
*
* @return number of bytes occupied in CESU-8
*/
inline lit_utf8_size_t __attr_always_inline___
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
{
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
return 1;
}
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
return 2;
}
else
{
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
return 3;
}
} /* lit_get_unicode_char_size_by_utf8_first_byte */
/**
* Convert code unit to cesu-8 representation
*
* @return byte count required to represent the code unit
*/
lit_utf8_size_t
lit_code_unit_to_utf8 (ecma_char_t code_unit, /**< code unit */
lit_utf8_byte_t *buf_p) /**< buffer where to store the result,
* its size should be at least MAX_BYTES_IN_CODE_UNIT */
{
if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
buf_p[0] = (lit_utf8_byte_t) code_unit;
return 1;
}
else if (code_unit <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
{
uint32_t code_unit_bits = code_unit;
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_5_BITS_MASK);
JERRY_ASSERT (first_byte_bits == code_unit_bits);
buf_p[0] = LIT_UTF8_2_BYTE_MARKER | first_byte_bits;
buf_p[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
return 2;
}
else
{
uint32_t code_unit_bits = code_unit;
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_unit_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_unit_bits & LIT_UTF8_LAST_4_BITS_MASK);
JERRY_ASSERT (first_byte_bits == code_unit_bits);
buf_p[0] = LIT_UTF8_3_BYTE_MARKER | first_byte_bits;
buf_p[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
buf_p[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
return 3;
}
} /* lit_code_unit_to_utf8 */
/**
* Convert code point to cesu-8 representation
*
* @return byte count required to represent the code point
*/
lit_utf8_size_t
lit_code_point_to_cesu8 (lit_code_point_t code_point, /**< code point */
lit_utf8_byte_t *buf) /**< buffer where to store the result,
* its size should be at least 6 bytes */
{
if (code_point <= LIT_UTF16_CODE_UNIT_MAX)
{
return lit_code_unit_to_utf8 ((ecma_char_t) code_point, buf);
}
else
{
lit_utf8_size_t offset = lit_code_unit_to_utf8 (convert_code_point_to_high_surrogate (code_point), buf);
offset += lit_code_unit_to_utf8 (convert_code_point_to_low_surrogate (code_point), buf + offset);
return offset;
}
} /* lit_code_point_to_cesu8 */
/**
* Convert code point to utf-8 representation
*
* @return byte count required to represent the code point
*/
lit_utf8_size_t
lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
lit_utf8_byte_t *buf) /**< buffer where to store the result,
* its size should be at least 4 bytes */
{
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
buf[0] = (lit_utf8_byte_t) code_point;
return 1;
}
else if (code_point <= LIT_UTF8_2_BYTE_CODE_POINT_MAX)
{
uint32_t code_point_bits = code_point;
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_5_BITS_MASK);
JERRY_ASSERT (first_byte_bits == code_point_bits);
buf[0] = LIT_UTF8_2_BYTE_MARKER | first_byte_bits;
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
return 2;
}
else if (code_point <= LIT_UTF8_3_BYTE_CODE_POINT_MAX)
{
uint32_t code_point_bits = code_point;
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_4_BITS_MASK);
JERRY_ASSERT (first_byte_bits == code_point_bits);
buf[0] = LIT_UTF8_3_BYTE_MARKER | first_byte_bits;
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
return 3;
}
else
{
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MAX);
uint32_t code_point_bits = code_point;
lit_utf8_byte_t fourth_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t third_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t second_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_6_BITS_MASK);
code_point_bits >>= LIT_UTF8_BITS_IN_EXTRA_BYTES;
lit_utf8_byte_t first_byte_bits = (lit_utf8_byte_t) (code_point_bits & LIT_UTF8_LAST_3_BITS_MASK);
JERRY_ASSERT (first_byte_bits == code_point_bits);
buf[0] = LIT_UTF8_4_BYTE_MARKER | first_byte_bits;
buf[1] = LIT_UTF8_EXTRA_BYTE_MARKER | second_byte_bits;
buf[2] = LIT_UTF8_EXTRA_BYTE_MARKER | third_byte_bits;
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
return 4;
}
} /* lit_code_point_to_utf8 */
/**
* Convert surrogate pair to code point
*
* @return code point
*/
lit_code_point_t
lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, /**< high surrogate code point */
ecma_char_t low_surrogate) /**< low surrogate code point */
{
JERRY_ASSERT (lit_is_code_point_utf16_high_surrogate (high_surrogate));
JERRY_ASSERT (lit_is_code_point_utf16_low_surrogate (low_surrogate));
lit_code_point_t code_point;
code_point = (uint16_t) (high_surrogate - LIT_UTF16_HIGH_SURROGATE_MIN);
code_point <<= LIT_UTF16_BITS_IN_SURROGATE;
code_point += LIT_UTF16_FIRST_SURROGATE_CODE_POINT;
code_point |= (uint16_t) (low_surrogate - LIT_UTF16_LOW_SURROGATE_MIN);
return code_point;
} /* lit_convert_surrogate_pair_to_code_point */
/**
* Compare cesu-8 string to cesu-8 string
*
* @return true - if strings are equal;
* false - otherwise.
*/
bool
lit_compare_utf8_strings (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
lit_utf8_size_t string1_size, /**< string size */
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
lit_utf8_size_t string2_size) /**< string size */
{
if (string1_size != string2_size)
{
return false;
}
return memcmp (string1_p, string2_p, string1_size) == 0;
} /* lit_compare_utf8_strings */
/**
* Relational compare of cesu-8 strings
*
* First string is less than second string if:
* - strings are not equal;
* - first string is prefix of second or is lexicographically less than second.
*
* @return true - if first string is less than second string,
* false - otherwise.
*/
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, /**< utf-8 string */
lit_utf8_size_t string1_size, /**< string size */
const lit_utf8_byte_t *string2_p, /**< utf-8 string */
lit_utf8_size_t string2_size) /**< string size */
{
lit_utf8_byte_t *string1_pos = (lit_utf8_byte_t *) string1_p;
lit_utf8_byte_t *string2_pos = (lit_utf8_byte_t *) string2_p;
const lit_utf8_byte_t *string1_end_p = string1_p + string1_size;
const lit_utf8_byte_t *string2_end_p = string2_p + string2_size;
while (string1_pos < string1_end_p && string2_pos < string2_end_p)
{
ecma_char_t ch1, ch2;
string1_pos += lit_read_code_unit_from_utf8 (string1_pos, &ch1);
string2_pos += lit_read_code_unit_from_utf8 (string2_pos, &ch2);
if (ch1 < ch2)
{
return true;
}
else if (ch1 > ch2)
{
return false;
}
}
return (string1_pos >= string1_end_p && string2_pos < string2_end_p);
} /* lit_compare_utf8_strings_relational */

View file

@ -0,0 +1,143 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIT_STRINGS_H
#define LIT_STRINGS_H
#include "jrt.h"
#include "lit-globals.h"
/**
* Null character (used in few cases as utf-8 string end marker)
*/
#define LIT_BYTE_NULL (0)
/**
* For the formal definition of Unicode transformation formats (UTF) see Section 3.9, Unicode Encoding Forms in The
* Unicode Standard (http://www.unicode.org/versions/Unicode3.0.0/ch03.pdf#G7404).
*/
#define LIT_UNICODE_CODE_POINT_NULL (0x0)
#define LIT_UNICODE_CODE_POINT_MAX (0x10FFFF)
#define LIT_UTF16_CODE_UNIT_MAX (0xFFFF)
#define LIT_UTF16_FIRST_SURROGATE_CODE_POINT (0x10000)
#define LIT_UTF16_LOW_SURROGATE_MARKER (0xDC00)
#define LIT_UTF16_HIGH_SURROGATE_MARKER (0xD800)
#define LIT_UTF16_HIGH_SURROGATE_MIN (0xD800)
#define LIT_UTF16_HIGH_SURROGATE_MAX (0xDBFF)
#define LIT_UTF16_LOW_SURROGATE_MIN (0xDC00)
#define LIT_UTF16_LOW_SURROGATE_MAX (0xDFFF)
#define LIT_UTF16_BITS_IN_SURROGATE (10)
#define LIT_UTF16_LAST_10_BITS_MASK (0x3FF)
#define LIT_UTF8_1_BYTE_MARKER (0x00)
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
#define LIT_UTF8_5_BYTE_MARKER (0xF8)
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
#define LIT_UTF8_1_BYTE_MASK (0x80)
#define LIT_UTF8_2_BYTE_MASK (0xE0)
#define LIT_UTF8_3_BYTE_MASK (0xF0)
#define LIT_UTF8_4_BYTE_MASK (0xF8)
#define LIT_UTF8_EXTRA_BYTE_MASK (0xC0)
#define LIT_UTF8_LAST_7_BITS_MASK (0x7F)
#define LIT_UTF8_LAST_6_BITS_MASK (0x3F)
#define LIT_UTF8_LAST_5_BITS_MASK (0x1F)
#define LIT_UTF8_LAST_4_BITS_MASK (0x0F)
#define LIT_UTF8_LAST_3_BITS_MASK (0x07)
#define LIT_UTF8_LAST_2_BITS_MASK (0x03)
#define LIT_UTF8_LAST_1_BIT_MASK (0x01)
#define LIT_UTF8_BITS_IN_EXTRA_BYTES (6)
#define LIT_UTF8_1_BYTE_CODE_POINT_MAX (0x7F)
#define LIT_UTF8_2_BYTE_CODE_POINT_MIN (0x80)
#define LIT_UTF8_2_BYTE_CODE_POINT_MAX (0x7FF)
#define LIT_UTF8_3_BYTE_CODE_POINT_MIN (0x800)
#define LIT_UTF8_3_BYTE_CODE_POINT_MAX (LIT_UTF16_CODE_UNIT_MAX)
#define LIT_UTF8_4_BYTE_CODE_POINT_MIN (0x10000)
#define LIT_UTF8_4_BYTE_CODE_POINT_MAX (LIT_UNICODE_CODE_POINT_MAX)
/**
* Differnce between byte count needed to represent code point greater than 0xFFFF
* in common UTF-8 (4 bytes required) and CESU-8 (6 bytes required)
*/
#define LIT_UTF8_CESU8_SURROGATE_SIZE_DIF (2 * LIT_UTF8_MAX_BYTES_IN_CODE_UNIT - LIT_UTF8_MAX_BYTES_IN_CODE_POINT)
/**
* Byte values >= LIT_UTF8_FIRST_BYTE_MAX are not allowed in internal strings
*/
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
/* validation */
bool lit_is_utf8_string_valid (const lit_utf8_byte_t *, lit_utf8_size_t);
bool lit_is_cesu8_string_valid (const lit_utf8_byte_t *, lit_utf8_size_t);
/* checks */
bool lit_is_code_point_utf16_low_surrogate (lit_code_point_t);
bool lit_is_code_point_utf16_high_surrogate (lit_code_point_t);
/* size */
lit_utf8_size_t lit_zt_utf8_string_size (const lit_utf8_byte_t *);
lit_utf8_size_t lit_get_codepoint_utf8_size(lit_code_point_t code_point);
/* length */
ecma_length_t lit_utf8_string_length (const lit_utf8_byte_t *, lit_utf8_size_t);
/* hash */
lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_size_t);
lit_string_hash_t lit_utf8_string_hash_combine (lit_string_hash_t, const lit_utf8_byte_t *, lit_utf8_size_t);
/* code unit access */
ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t);
lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t);
/* conversion */
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t, lit_utf8_byte_t *);
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t, lit_utf8_byte_t *);
lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t, lit_utf8_byte_t *);
lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t, ecma_char_t);
/* cesu-8 <=> utf-8 conversion */
lit_utf8_size_t lit_utf8_string_size_from_cesu8_string (const lit_utf8_byte_t *, lit_utf8_size_t);
lit_utf8_size_t lit_utf8_string_convert_from_cesu8 (const lit_utf8_byte_t *, lit_utf8_size_t, lit_utf8_byte_t *, lit_utf8_size_t);
bool lit_compare_utf8_strings (const lit_utf8_byte_t *, lit_utf8_size_t,
const lit_utf8_byte_t *, lit_utf8_size_t);
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, lit_utf8_size_t,
const lit_utf8_byte_t *string2_p, lit_utf8_size_t);
/* read code point from buffer */
lit_utf8_size_t lit_read_code_point_from_utf8 (const lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *);
lit_utf8_size_t lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *,
ecma_char_t *);
lit_utf8_size_t lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *,
ecma_char_t *);
ecma_char_t lit_utf8_read_next (const lit_utf8_byte_t **);
ecma_char_t lit_utf8_read_prev (const lit_utf8_byte_t **);
ecma_char_t lit_utf8_peek_next (const lit_utf8_byte_t *);
ecma_char_t lit_utf8_peek_prev (const lit_utf8_byte_t *);
void lit_utf8_incr (const lit_utf8_byte_t **);
void lit_utf8_decr (const lit_utf8_byte_t **);
#endif /* !LIT_STRINGS_H */

View file

@ -0,0 +1,211 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2015-2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Unicode characters and ranges generated by tools/print-unicode-ranges.sh
* from UnicodeData-3.0.0.txt.
* See also:
* http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
* http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
*/
#ifndef LIT_UNICODE_RANGES_INC_H_
#define LIT_UNICODE_RANGES_INC_H_
/**
* Character interval starting points for the unicode letters.
*
* The characters covered by these intervalse are from
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
*/
static const uint16_t unicode_letter_interv_sps[] JERRY_CONST_DATA =
{
/*
* these are handled separetely
* 0x0041, len 25
* 0x0061, len 25
*/
0x00C0, 0x00D8, 0XF8, 0X1F8, 0x0222, 0x0250, 0x02B0, 0x02BB,
0x02D0, 0x02E0, 0x0388, 0x038E, 0x03A3, 0x03D0, 0x03DA, 0x0400, 0x048C, 0x04C7,
0x04CB, 0x04D0, 0x04F8, 0x0531, 0x0561, 0x05D0, 0x05F0, 0x0621, 0x0640, 0x0671,
0x06E5, 0x06FA, 0x0712, 0x0780, 0x0905, 0x0958, 0x0985, 0x098F, 0x0993, 0x09AA,
0x09B6, 0x09DC, 0x09DF, 0x09F0, 0x0A05, 0x0A0F, 0x0A13, 0x0A2A, 0x0A32, 0x0A35,
0x0A38, 0x0A59, 0x0A72, 0x0A85, 0x0A8F, 0x0A93, 0x0AAA, 0x0AB2, 0x0AB5, 0x0B05,
0x0B0F, 0x0B13, 0x0B2A, 0x0B32, 0x0B36, 0x0B5C, 0x0B5F, 0x0B85, 0x0B8E, 0x0B92,
0x0B99, 0x0B9E, 0x0BA3, 0x0BA8, 0x0BAE, 0x0BB7, 0x0C05, 0x0C0E, 0x0C12, 0x0C2A,
0x0C35, 0x0C60, 0x0C85, 0x0C8E, 0x0C92, 0x0CAA, 0x0CB5, 0x0CE0, 0x0D05, 0x0D0E,
0x0D12, 0x0D2A, 0x0D60, 0x0D85, 0x0D9A, 0x0DB3, 0x0DC0, 0x0E01, 0x0E32, 0x0E40,
0x0E81, 0x0E87, 0x0E94, 0x0E99, 0x0EA1, 0x0EAA, 0x0EAD, 0x0EB2, 0x0EC0, 0x0EDC,
0x0F40, 0x0F49, 0x0F88, 0x1000, 0x1023, 0x1029, 0x1050, 0x10A0, 0x10D0, 0x1100,
0x115F, 0x11A8, 0x1200, 0x1208, 0x124A, 0x1250, 0x125A, 0x1260, 0x128A, 0x1290,
0x12B2, 0x12B8, 0x12C2, 0x12C8, 0x12D0, 0x12D8, 0x12F0, 0x1312, 0x1318, 0x1320,
0x1348, 0x13A0, 0X1401, 0X1501, 0X1601, 0x166F, 0x1681, 0x16A0, 0x1780, 0x1820,
0x1880, 0x1E00, 0x1EA0, 0x1F00, 0x1F18, 0x1F20, 0x1F48, 0x1F50, 0x1F5F, 0x1F80,
0x1FB6, 0x1FC2, 0x1FC6, 0x1FD0, 0x1FD6, 0x1FE0, 0x1FF2, 0x1FF6, 0x210A, 0x2119,
0x212A, 0x212F, 0x2133, 0x2160, 0x3005, 0x3021, 0x3031, 0x3038, 0x3041, 0x309D,
0x30A1, 0x30FC, 0x3105, 0x3131, 0x31A0, 0XA000, 0XA100, 0XA200, 0XA300, 0XA400,
0XF900, 0XFA00, 0xFB00, 0xFB13, 0xFB1F, 0xFB2A, 0xFB38, 0xFB40, 0xFB43, 0xFB46,
0XFBD3, 0XFCD3, 0xFD50, 0xFD92, 0xFDF0, 0xFE70, 0xFE76, 0xFF21, 0xFF41, 0xFF66,
0xFFC2, 0xFFCA, 0xFFD2, 0xFFDA
};
/**
* Character lengths for the unicode letters.
*
* The characters covered by these intervalse are from
* the following Unicode categories: Lu, Ll, Lt, Lm, Lo, Nl
*/
static const uint8_t unicode_letter_interv_lens[] JERRY_CONST_DATA =
{
22, 30, 255, 39, 17, 93, 8, 6,
1, 4, 2, 19, 43, 7, 25, 129, 56, 1,
1, 37, 1, 37, 38, 26, 2, 25, 10, 98,
1, 2, 26, 37, 52, 9, 7, 1, 21, 6,
3, 1, 2, 1, 5, 1, 21, 6, 1, 1,
1, 3, 2, 6, 2, 21, 6, 1, 4, 7,
1, 21, 6, 1, 3, 1, 2, 5, 2, 3,
1, 1, 1, 2, 7, 2, 7, 2, 22, 9,
4, 1, 7, 2, 22, 9, 4, 1, 7, 2,
22, 15, 1, 17, 23, 8, 6, 47, 1, 6,
1, 1, 3, 6, 2, 1, 3, 1, 4, 1,
7, 33, 3, 33, 4, 1, 5, 37, 38, 89,
67, 81, 6, 62, 3, 6, 3, 38, 3, 30,
3, 6, 3, 6, 6, 22, 30, 3, 6, 38,
18, 84, 255, 255, 107, 7, 25, 74, 51, 87,
40, 155, 89, 21, 5, 37, 5, 7, 30, 52,
6, 2, 6, 3, 5, 12, 2, 6, 9, 4,
3, 2, 6, 35, 2, 8, 4, 2, 83, 1,
89, 2, 39, 93, 23, 255, 255, 255, 255, 140,
255, 45, 6, 4, 9, 12, 4, 1, 1, 107,
255, 106, 63, 53, 11, 2, 134, 25, 25, 88,
5, 5, 5, 2
};
/**
* Those unicode letter characters that are not inside any of
* the intervals specified in unicode_letter_intervals array.
*
* The characters are from the following Unicode categories:
* Lu, Ll, Lt, Lm, Lo, Nl
*/
static const uint16_t unicode_letter_chars[] JERRY_CONST_DATA =
{
0x00AA, 0x00B5, 0x00BA, 0x02EE, 0x037A, 0x0386, 0x038C, 0x0559, 0x06D5, 0x0710,
0x093D, 0x0950, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C,
0x0CDE, 0x0DBD, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EBD, 0x0EC6, 0x0F00,
0x1248, 0x1258, 0x1288, 0x12B0, 0x12C0, 0x1310, 0x1F59, 0x1F5B, 0x1F5D, 0x1FBE,
0x207F, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3400, 0x4DB5, 0x4E00,
0x9FA5, 0xAC00, 0xD7A3, 0xFB1D, 0xFB3E, 0xFE74
};
/**
* Character interval starting points for non-letter character
* that can be used as a non-first character of an identifier.
*
* The characters covered by these intervalse are from
* the following Unicode categories: Nd, Mn, Mc, Pc
*/
static const uint16_t unicode_non_letter_ident_part_interv_sps[] JERRY_CONST_DATA =
{
/*
* decimal digits: handled separately
* 0x0030, len: 9
*/
0x0300, 0x0360, 0x0483, 0x0591, 0x05A3, 0x05BB, 0x05C1, 0x064B, 0x0660,
0x06D6, 0x06DF, 0x06E7, 0x06EA, 0x06F0, 0x0730, 0x07A6, 0x0901, 0x093E, 0x0951,
0x0962, 0x0966, 0x0981, 0x09BE, 0x09C7, 0x09CB, 0x09E2, 0x09E6, 0x0A3E, 0x0A47,
0x0A4B, 0x0A66, 0x0A81, 0x0ABE, 0x0AC7, 0x0ACB, 0x0AE6, 0x0B01, 0x0B3E, 0x0B47,
0x0B4B, 0x0B56, 0x0B66, 0x0B82, 0x0BBE, 0x0BC6, 0x0BCA, 0x0BE7, 0x0C01, 0x0C3E,
0x0C46, 0x0C4A, 0x0C55, 0x0C66, 0x0C82, 0x0CBE, 0x0CC6, 0x0CCA, 0x0CD5, 0x0CE6,
0x0D02, 0x0D3E, 0x0D46, 0x0D4A, 0x0D66, 0x0D82, 0x0DCF, 0x0DD8, 0x0DF2, 0x0E34,
0x0E47, 0x0E50, 0x0EB4, 0x0EBB, 0x0EC8, 0x0ED0, 0x0F18, 0x0F20, 0x0F3E, 0x0F71,
0x0F86, 0x0F90, 0x0F99, 0x102C, 0x1036, 0x1040, 0x1056, 0x1369, 0x17B4, 0x17E0,
0x1810, 0x203F, 0x20D0, 0x302A, 0x3099, 0xFE20, 0xFE33, 0xFE4D, 0xFF10
};
/**
* Character interval lengths for non-letter character
* that can be used as a non-first character of an identifier.
*
* The characters covered by these intervalse are from
* the following Unicode categories: Nd, Mn, Mc, Pc
*/
static const uint8_t unicode_non_letter_ident_part_interv_lens[] =
{
78, 2, 3, 16, 22, 2, 1, 10, 9,
6, 5, 1, 3, 9, 26, 10, 2, 15, 3,
1, 9, 2, 6, 1, 2, 1, 9, 4, 1,
2, 11, 2, 7, 2, 2, 9, 2, 5, 1,
2, 1, 9, 1, 4, 2, 3, 8, 2, 6,
2, 3, 1, 9, 1, 6, 2, 3, 1, 9,
1, 5, 2, 3, 9, 1, 5, 7, 1, 6,
7, 9, 5, 1, 5, 9, 1, 9, 1, 19,
1, 7, 35, 6, 3, 9, 3, 8, 31, 9,
9, 1, 12, 5, 1, 3, 1, 2, 9
};
/**
* Those non-letter characters that can be used as a non-first
* character of an identifier and not included in any of the intervals
* specified in unicode_non_letter_ident_part_intervals array.
*
* The characters are from the following Unicode categories:
* Nd, Mn, Mc, Pc
*/
static const uint16_t unicode_non_letter_ident_part_chars[] =
{
0x005F, 0x05BF, 0x05C4, 0x0670, 0x0711, 0x093C, 0x09BC, 0x09D7, 0x0A02, 0x0A3C,
0x0ABC, 0x0B3C, 0x0BD7, 0x0D57, 0x0DCA, 0x0DD6, 0x0E31, 0x0EB1, 0x0F35, 0x0F37,
0x0F39, 0x0FC6, 0x18A9, 0x20E1, 0x30FB, 0xFB1E, 0xFF3F, 0xFF65
};
/**
* Unicode separator character interval strting points from Unicode category: Zs
*/
static const uint16_t unicode_separator_char_interv_sps[] =
{
0x2000
};
/**
* Unicode separator character interval lengths from Unicode category: Zs
*/
static const uint8_t unicode_separator_char_interv_lens[] =
{
11
};
/**
* Unicode separator characters that are not in the
* unicode_separator_char_intervals array.
*
* Unicode category: Zs
*/
static const uint16_t unicode_separator_chars[] =
{
/*
* these two chars are handled separatly @see lit_char_is_space_separator
* 0x0020, space
* 0x00A0, non-braking space
*/
0x1680, \
0x180E, /* manually added */ \
0x202F, /* manually added */ \
0x205F, \
0x3000
};
#endif