fix(scanner): address memory safety and correctness issues in C code

- Add len==0 check in set_contains() to prevent buffer overflow
- Add missing stdlib.h include in scanner.c
- Clear heredoc stack properly in deserialize when length==0
- Ensure NUL termination in delimiter deserialization
- Create alloc.c to define ts_current_* symbols for TREE_SITTER_REUSE_ALLOCATOR

All changes tested with full test suite: 61/61 tests passing.

Addresses PR #1 review comments from CodeRabbit.
This commit is contained in:
2025-11-25 23:27:12 +02:00
parent 4344567555
commit dc672f8485
3 changed files with 25 additions and 1 deletions

View File

@@ -3,6 +3,7 @@
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
@@ -124,6 +125,13 @@ static unsigned serialize(Scanner *scanner, char *buffer) {
static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
if (length == 0) {
// Fully clear heredocs to avoid stale stack entries after reset
for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
Heredoc *h = array_get(&scanner->heredocs, i);
array_delete(&h->current_leading_word);
array_delete(&h->delimiter);
}
array_clear(&scanner->heredocs);
reset(scanner);
} else {
uint32_t size = 0;
@@ -147,12 +155,18 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t));
size += sizeof(uint32_t);
array_reserve(&heredoc->delimiter, heredoc->delimiter.size);
array_reserve(&heredoc->delimiter, heredoc->delimiter.size > 0 ? heredoc->delimiter.size : 1);
if (heredoc->delimiter.size > 0) {
memcpy(heredoc->delimiter.contents, &buffer[size],
heredoc->delimiter.size);
size += heredoc->delimiter.size;
// Ensure NUL termination for safety
if (heredoc->delimiter.contents[heredoc->delimiter.size - 1] != '\0') {
array_reserve(&heredoc->delimiter, heredoc->delimiter.size + 1);
heredoc->delimiter.contents[heredoc->delimiter.size] = '\0';
heredoc->delimiter.size++;
}
}
}
assert(size == length);

9
src/tree_sitter/alloc.c Normal file
View File

@@ -0,0 +1,9 @@
#include "tree_sitter/alloc.h"
#include <stdlib.h>
#ifdef TREE_SITTER_REUSE_ALLOCATOR
void *(*ts_current_malloc)(size_t) = malloc;
void *(*ts_current_calloc)(size_t,size_t) = calloc;
void *(*ts_current_realloc)(void*,size_t) = realloc;
void (*ts_current_free)(void*) = free;
#endif

View File

@@ -152,6 +152,7 @@ struct TSLanguage {
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
if (len == 0) return false;
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {