Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions LuaCATS/jsregexp/core.lua
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,21 @@ function jsregexp.compile(re, flags) end
---@return string? Error
function jsregexp.compile_safe(re, flags) end

---
---Escape a string so that it can be safely used as a pattern in `jsregexp.compile`.
---
---Example:
---```lua
--- local pat = jsregexp.escape("example.com") -- "\\x65xample\\.com"
--- local re = jsregexp.compile(pat)
--- re:test("exampleZcom") -- false
--- re:test("example.com") -- true
---```
---
---@param str string
---@return string pattern
function jsregexp.escape(str) end

---
---Convert a lua utf8 lua string to a utf16 js string. For internal use.
---
Expand All @@ -58,6 +73,7 @@ function jsregexp.to_jsstring(str) end
---@field multiline boolean is the multiline flag set?
---@field sticky boolean is the sticky flag set?
---@field unicode boolean is the unicode flag set?
---@field unicode_sets boolean is the unicode_sets flag set?
local re = {}

---
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ re.ignore_case -- is the ignore_case flag set?
re.multiline -- is the multiline flag set?
re.sticky -- is the sticky flag set?
re.unicode -- is the unicode flag set?
re.unicode_sets -- is the unicode_sets flag set?
```
Calling `tostring` on a RegExp object returns representation in the form of `"/<source>/<flags>"`.

Expand Down
65 changes: 61 additions & 4 deletions jsregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,15 @@
void *lre_realloc(void *opaque, void *ptr, size_t size) {
return realloc(ptr, size);
}

BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) {
return FALSE;
}

int lre_check_timeout(void *opaque) {
return 0;
}

struct regexp {
char *expr;
uint8_t *bc;
Expand Down Expand Up @@ -103,8 +108,9 @@ static inline uint16_t *utf8_to_utf16(const uint8_t *input, uint32_t n,
return NULL;
}
if ((unsigned)c > 0xffff) {
*q++ = (((c - 0x10000) >> 10) | (0xd8 << 8));
*q++ = (c & 0xfffff) | (0xdc << 8);
c -= 0x10000;
*q++ = 0xd800 | (c >> 10);
*q++ = 0xdc00 | (c & 0x3ff);
} else {
*q++ = c & 0xffff;
}
Expand Down Expand Up @@ -161,6 +167,50 @@ static int jsstring_new(lua_State *lstate) {
return 1;
}

static int jsregexp_escape(lua_State *L) {
size_t len;
char s[16];
int l;

const char *str = luaL_checklstring(L, 1, &len);

luaL_Buffer B;
luaL_buffinit(L, &B);

for (int i = 0; i < len; i++) {
uint8_t c = str[i];
if (c < 33) {
if (c >= 9 && c <= 13) {
luaL_addchar(&B, '\\');
luaL_addchar(&B, "tnvfr"[c - 9]);
} else {
goto hex2;
}
} else if (c < 128) {
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z')) {
if (i == 0)
goto hex2;
} else if (strchr(",-=<>#&!%:;@~'`\"", c)) {
goto hex2;
} else if (c != '_') {
luaL_addchar(&B, '\\');
}
luaL_addchar(&B, c);
} else {
hex2:
l = snprintf(s, sizeof s, "\\x%02x", c);
luaL_addlstring(&B, s, l);
}
// TODO: javascript's escape also deals with non-ascii whitespace and lone
// (utf16) surrogates. I don't think we have to deal with surrogates since
// we only pass utf8. We probably have to revisit (unicode) whitespace for
// a future x flag.
}
luaL_pushresult(&B);
return 1;
}

static int jsstring_gc(lua_State *lstate) {
struct jsstring *s = lua_touserdata(lstate, 1);
free(s->u.str8);
Expand Down Expand Up @@ -204,9 +254,10 @@ static void regexp_pushflags(lua_State *lstate, const struct regexp *r) {
const char *named_groups = (flags & LRE_FLAG_NAMED_GROUPS) ? "n" : "";
const char *dotall = (flags & LRE_FLAG_DOTALL) ? "s" : "";
const char *utf16 = (flags & LRE_FLAG_UNICODE) ? "u" : "";
const char *unicode_sets = (flags & LRE_FLAG_UNICODE_SETS) ? "v" : "";
const char *sticky = (flags & LRE_FLAG_STICKY) ? "y" : "";
lua_pushfstring(lstate, "%s%s%s%s%s%s%s%s", indices, ignorecase, global,
multiline, named_groups, dotall, utf16, sticky);
lua_pushfstring(lstate, "%s%s%s%s%s%s%s%s%s", indices, ignorecase, global,
multiline, named_groups, dotall, utf16, unicode_sets, sticky);
}

static int regexp_tostring(lua_State *lstate) {
Expand Down Expand Up @@ -437,6 +488,8 @@ static int regexp_index(lua_State *lstate) {
lua_pushboolean(lstate, lre_get_flags(r->bc) & LRE_FLAG_STICKY);
} else if (streq(key, "unicode")) {
lua_pushboolean(lstate, lre_get_flags(r->bc) & LRE_FLAG_UNICODE);
} else if (streq(key, "unicode_sets")) {
lua_pushboolean(lstate, lre_get_flags(r->bc) & LRE_FLAG_UNICODE_SETS);
} else if (streq(key, "has_indices")) {
lua_pushboolean(lstate, lre_get_flags(r->bc) & LRE_FLAG_INDICES);
} else if (streq(key, "source")) {
Expand Down Expand Up @@ -516,6 +569,9 @@ static int jsregexp_compile(lua_State *lstate) {
case 'u':
re_flags |= LRE_FLAG_UNICODE;
break;
case 'v':
re_flags |= LRE_FLAG_UNICODE_SETS;
break;
case 'y':
re_flags |= LRE_FLAG_STICKY;
break;
Expand Down Expand Up @@ -561,6 +617,7 @@ static int jsregexp_compile_safe(lua_State *lstate) {
static const struct luaL_Reg jsregexp_lib[] = {
{"compile", jsregexp_compile},
{"compile_safe", jsregexp_compile_safe},
{"escape", jsregexp_escape},
{"to_jsstring", jsstring_new},
{NULL, NULL}};

Expand Down
2 changes: 2 additions & 0 deletions libregexp/cutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ int __attribute__((format(printf, 2, 3))) dbuf_printf(DynBuf *s,
va_start(ap, fmt);
len = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
if (len < 0)
return -1;
if (len < sizeof(buf)) {
/* fast case */
return dbuf_put(s, (uint8_t *)buf, len);
Expand Down
76 changes: 76 additions & 0 deletions libregexp/cutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,80 @@ void rqsort(void *base, size_t nmemb, size_t size,
int (*cmp)(const void *, const void *, void *),
void *arg);

static inline uint64_t float64_as_uint64(double d)
{
union {
double d;
uint64_t u64;
} u;
u.d = d;
return u.u64;
}

static inline double uint64_as_float64(uint64_t u64)
{
union {
double d;
uint64_t u64;
} u;
u.u64 = u64;
return u.d;
}

static inline double fromfp16(uint16_t v)
{
double d;
uint32_t v1;
v1 = v & 0x7fff;
if (unlikely(v1 >= 0x7c00))
v1 += 0x1f8000; /* NaN or infinity */
d = uint64_as_float64(((uint64_t)(v >> 15) << 63) | ((uint64_t)v1 << (52 - 10)));
return d * 0x1p1008;
}

static inline uint16_t tofp16(double d)
{
uint64_t a, addend;
uint32_t v, sgn;
int shift;

a = float64_as_uint64(d);
sgn = a >> 63;
a = a & 0x7fffffffffffffff;
if (unlikely(a > 0x7ff0000000000000)) {
/* nan */
v = 0x7c01;
} else if (a < 0x3f10000000000000) { /* 0x1p-14 */
/* subnormal f16 number or zero */
if (a <= 0x3e60000000000000) { /* 0x1p-25 */
v = 0x0000; /* zero */
} else {
shift = 1051 - (a >> 52);
a = ((uint64_t)1 << 52) | (a & (((uint64_t)1 << 52) - 1));
addend = ((a >> shift) & 1) + (((uint64_t)1 << (shift - 1)) - 1);
v = (a + addend) >> shift;
}
} else {
/* normal number or infinity */
a -= 0x3f00000000000000; /* adjust the exponent */
/* round */
addend = ((a >> (52 - 10)) & 1) + (((uint64_t)1 << (52 - 11)) - 1);
v = (a + addend) >> (52 - 10);
/* overflow ? */
if (unlikely(v > 0x7c00))
v = 0x7c00;
}
return v | (sgn << 15);
}

static inline int isfp16nan(uint16_t v)
{
return (v & 0x7FFF) > 0x7C00;
}

static inline int isfp16zero(uint16_t v)
{
return (v & 0x7FFF) == 0;
}

#endif /* CUTILS_H */
12 changes: 11 additions & 1 deletion libregexp/libregexp-opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@

DEF(invalid, 1) /* never used */
DEF(char, 3)
DEF(char_i, 3)
DEF(char32, 5)
DEF(char32_i, 5)
DEF(dot, 1)
DEF(any, 1) /* same as dot but match any character including line terminator */
DEF(line_start, 1)
DEF(line_start_m, 1)
DEF(line_end, 1)
DEF(line_end_m, 1)
DEF(goto, 5)
DEF(split_goto_first, 5)
DEF(split_next_first, 5)
Expand All @@ -42,11 +46,17 @@ DEF(loop, 5) /* decrement the top the stack and goto if != 0 */
DEF(push_i32, 5) /* push integer on the stack */
DEF(drop, 1)
DEF(word_boundary, 1)
DEF(word_boundary_i, 1)
DEF(not_word_boundary, 1)
DEF(not_word_boundary_i, 1)
DEF(back_reference, 2)
DEF(backward_back_reference, 2) /* must come after back_reference */
DEF(back_reference_i, 2) /* must come after */
DEF(backward_back_reference, 2) /* must come after */
DEF(backward_back_reference_i, 2) /* must come after */
DEF(range, 3) /* variable length */
DEF(range_i, 3) /* variable length */
DEF(range32, 3) /* variable length */
DEF(range32_i, 3) /* variable length */
DEF(lookahead, 5)
DEF(negative_lookahead, 5)
DEF(push_char_pos, 1) /* push the character position on the stack */
Expand Down
Loading
Loading