From a0bbc781f607d3db6733b55172e05671dd70909f Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Thu, 4 Dec 2025 01:00:55 +0200 Subject: [PATCH] feat: add post-generation script to preserve buffer overflow fix Created scripts/post-generate.sh that automatically re-applies the critical buffer overflow fix to parser.h after tree-sitter generate runs. This fix prevents undefined behavior in set_contains() when accessing an empty array. The script is automatically executed after tree-sitter generate via the npm generate script. Added generate:only for cases where post-processing should be skipped. --- package.json | 3 ++- scripts/post-generate.sh | 33 +++++++++++++++++++++++++++++++++ src/grammar.json | 2 +- src/node-types.json | 2 +- src/tree_sitter/array.h | 6 +++--- src/tree_sitter/parser.h | 1 + 6 files changed, 41 insertions(+), 6 deletions(-) create mode 100755 scripts/post-generate.sh diff --git a/package.json b/package.json index 94613e3..094b7ee 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,8 @@ "author": "Ismo Vuorinen", "license": "MIT", "scripts": { - "generate": "tree-sitter generate", + "generate": "tree-sitter generate && ./scripts/post-generate.sh", + "generate:only": "tree-sitter generate", "test": "tree-sitter test", "parse": "tree-sitter parse", "web": "tree-sitter web-ui", diff --git a/scripts/post-generate.sh b/scripts/post-generate.sh new file mode 100755 index 0000000..6e3c2de --- /dev/null +++ b/scripts/post-generate.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -e + +echo "Running post-generation fixes..." + +# Apply critical safety fixes that get overwritten during generation +echo " - Applying critical safety fixes..." + +# Fix 1: Buffer overflow prevention in parser.h +# The set_contains function needs a len==0 check to prevent accessing ranges[0] +# This fix gets overwritten every time tree-sitter generate runs +if ! grep -q "if (len == 0) return false;" src/tree_sitter/parser.h; then + # Insert the safety check right after the function opening + # Target: static inline bool set_contains(...) { + # Insert: if (len == 0) return false; + # Before: uint32_t index = 0; + + # Use perl for cross-platform compatibility (macOS and Linux) + perl -i -pe ' + BEGIN { $in_func = 0; $done = 0; } + if (/static inline bool set_contains/) { $in_func = 1; } + if ($in_func && /^\s+uint32_t index = 0;/ && !$done) { + print " if (len == 0) return false;\n"; + $done = 1; + } + if (/^}/ && $in_func) { $in_func = 0; } + ' src/tree_sitter/parser.h + echo " ✓ Applied buffer overflow fix to parser.h" +else + echo " ✓ Buffer overflow fix already present" +fi + +echo "Post-generation fixes complete!" diff --git a/src/grammar.json b/src/grammar.json index b8fdd13..b134387 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -7923,4 +7923,4 @@ "_primary_expression" ], "reserved": {} -} \ No newline at end of file +} diff --git a/src/node-types.json b/src/node-types.json index 534a93d..cad17a1 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -3307,4 +3307,4 @@ "type": "~", "named": false } -] \ No newline at end of file +] diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h index a17a574..28b1a8d 100644 --- a/src/tree_sitter/array.h +++ b/src/tree_sitter/array.h @@ -62,7 +62,7 @@ extern "C" { /// Push a new `element` onto the end of the array. #define array_push(self, element) \ (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) + (self)->contents[(self)->size++] = (element)) /// Increase the array's size by `count` elements. /// New elements are zero-initialized. @@ -218,8 +218,8 @@ static inline void _array__grow(Array *self, uint32_t count, size_t element_size /// This is not what you're looking for, see `array_splice`. static inline void _array__splice(Array *self, size_t element_size, - uint32_t index, uint32_t old_count, - uint32_t new_count, const void *elements) { + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { uint32_t new_size = self->size + new_count - old_count; uint32_t old_end = index + old_count; uint32_t new_end = index + new_count; diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 858107d..bd98686 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -152,6 +152,7 @@ struct TSLanguage { }; static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + if (len == 0) return false; uint32_t index = 0; uint32_t size = len - index; while (size > 1) {