From dc672f8485c229ae0e7de6b7bfc72a08fa5c08de Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Tue, 25 Nov 2025 23:27:12 +0200 Subject: [PATCH] fix(scanner): address memory safety and correctness issues in C code - Add len==0 check in set_contains() to prevent buffer overflow - Add missing stdlib.h include in scanner.c - Clear heredoc stack properly in deserialize when length==0 - Ensure NUL termination in delimiter deserialization - Create alloc.c to define ts_current_* symbols for TREE_SITTER_REUSE_ALLOCATOR All changes tested with full test suite: 61/61 tests passing. Addresses PR #1 review comments from CodeRabbit. --- src/scanner.c | 16 +++++++++++++++- src/tree_sitter/alloc.c | 9 +++++++++ src/tree_sitter/parser.h | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/tree_sitter/alloc.c diff --git a/src/scanner.c b/src/scanner.c index 65c7b91..f382f9b 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -124,6 +125,13 @@ static unsigned serialize(Scanner *scanner, char *buffer) { static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { if (length == 0) { + // Fully clear heredocs to avoid stale stack entries after reset + for (uint32_t i = 0; i < scanner->heredocs.size; i++) { + Heredoc *h = array_get(&scanner->heredocs, i); + array_delete(&h->current_leading_word); + array_delete(&h->delimiter); + } + array_clear(&scanner->heredocs); reset(scanner); } else { uint32_t size = 0; @@ -147,12 +155,18 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); size += sizeof(uint32_t); - array_reserve(&heredoc->delimiter, heredoc->delimiter.size); + array_reserve(&heredoc->delimiter, heredoc->delimiter.size > 0 ? heredoc->delimiter.size : 1); if (heredoc->delimiter.size > 0) { memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); size += heredoc->delimiter.size; + // Ensure NUL termination for safety + if (heredoc->delimiter.contents[heredoc->delimiter.size - 1] != '\0') { + array_reserve(&heredoc->delimiter, heredoc->delimiter.size + 1); + heredoc->delimiter.contents[heredoc->delimiter.size] = '\0'; + heredoc->delimiter.size++; + } } } assert(size == length); diff --git a/src/tree_sitter/alloc.c b/src/tree_sitter/alloc.c new file mode 100644 index 0000000..f62a6ae --- /dev/null +++ b/src/tree_sitter/alloc.c @@ -0,0 +1,9 @@ +#include "tree_sitter/alloc.h" +#include + +#ifdef TREE_SITTER_REUSE_ALLOCATOR +void *(*ts_current_malloc)(size_t) = malloc; +void *(*ts_current_calloc)(size_t,size_t) = calloc; +void *(*ts_current_realloc)(void*,size_t) = realloc; +void (*ts_current_free)(void*) = free; +#endif diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 858107d..bd98686 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -152,6 +152,7 @@ struct TSLanguage { }; static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + if (len == 0) return false; uint32_t index = 0; uint32_t size = len - index; while (size > 1) {