feat: add post-generation script to preserve buffer overflow fix

Created scripts/post-generate.sh that automatically re-applies the critical
buffer overflow fix to parser.h after tree-sitter generate runs. This fix
prevents undefined behavior in set_contains() when accessing an empty array.

The script is automatically executed after tree-sitter generate via the npm
generate script. Added generate:only for cases where post-processing should
be skipped.
This commit is contained in:
2025-12-04 01:00:55 +02:00
parent 8ad4483b0b
commit a0bbc781f6
6 changed files with 41 additions and 6 deletions

View File

@@ -6,7 +6,8 @@
"author": "Ismo Vuorinen",
"license": "MIT",
"scripts": {
"generate": "tree-sitter generate",
"generate": "tree-sitter generate && ./scripts/post-generate.sh",
"generate:only": "tree-sitter generate",
"test": "tree-sitter test",
"parse": "tree-sitter parse",
"web": "tree-sitter web-ui",

33
scripts/post-generate.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -e
echo "Running post-generation fixes..."
# Apply critical safety fixes that get overwritten during generation
echo " - Applying critical safety fixes..."
# Fix 1: Buffer overflow prevention in parser.h
# The set_contains function needs a len==0 check to prevent accessing ranges[0]
# This fix gets overwritten every time tree-sitter generate runs
if ! grep -q "if (len == 0) return false;" src/tree_sitter/parser.h; then
# Insert the safety check right after the function opening
# Target: static inline bool set_contains(...) {
# Insert: if (len == 0) return false;
# Before: uint32_t index = 0;
# Use perl for cross-platform compatibility (macOS and Linux)
perl -i -pe '
BEGIN { $in_func = 0; $done = 0; }
if (/static inline bool set_contains/) { $in_func = 1; }
if ($in_func && /^\s+uint32_t index = 0;/ && !$done) {
print " if (len == 0) return false;\n";
$done = 1;
}
if (/^}/ && $in_func) { $in_func = 0; }
' src/tree_sitter/parser.h
echo " ✓ Applied buffer overflow fix to parser.h"
else
echo " ✓ Buffer overflow fix already present"
fi
echo "Post-generation fixes complete!"

View File

@@ -7923,4 +7923,4 @@
"_primary_expression"
],
"reserved": {}
}
}

View File

@@ -3307,4 +3307,4 @@
"type": "~",
"named": false
}
]
]

View File

@@ -62,7 +62,7 @@ extern "C" {
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
@@ -218,8 +218,8 @@ static inline void _array__grow(Array *self, uint32_t count, size_t element_size
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;

View File

@@ -152,6 +152,7 @@ struct TSLanguage {
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
if (len == 0) return false;
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {