diff options
author | Wilfred Hughes <me@wilfred.me.uk> | 2023-08-08 23:33:05 -0700 |
---|---|---|
committer | Wilfred Hughes <me@wilfred.me.uk> | 2023-08-08 23:37:28 -0700 |
commit | 4e77f83dd938937ad06419243cb4fb6d72619356 (patch) | |
tree | 27148852e35e0985c77a3dba15ff67c54e71a7ed | |
parent | d0cf8c6d0d2ce9014a2c0d52ff908997a90b739d (diff) | |
parent | 7dc4fb60390218b09bc351062eeede7dcdbb4d9f (diff) |
Merge commit '7dc4fb60390218b09bc351062eeede7dcdbb4d9f'
19 files changed, 15417 insertions, 13782 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d098dc4e..9744a3ac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ### Parsing -Updated Elixir, Erlang and Go parsers. +Updated Elixir, Erlang, Go and Racket parsers. ### Display @@ -298,7 +298,7 @@ fn main() { TreeSitterParser { name: "tree-sitter-racket", src_dir: "vendored_parsers/tree-sitter-racket-src", - extra_files: vec!["scanner.cc"], + extra_files: vec!["scanner.c"], }, TreeSitterParser { name: "tree-sitter-ruby", diff --git a/vendored_parsers/tree-sitter-racket/Cargo.toml b/vendored_parsers/tree-sitter-racket/Cargo.toml index d0cb6195f..b7e5a7d09 100644 --- a/vendored_parsers/tree-sitter-racket/Cargo.toml +++ b/vendored_parsers/tree-sitter-racket/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-racket" description = "racket grammar for the tree-sitter parsing library" -version = "0.0.1" +version = "0.3.0" keywords = ["incremental", "parsing", "racket"] categories = ["parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter-racket" diff --git a/vendored_parsers/tree-sitter-racket/README.md b/vendored_parsers/tree-sitter-racket/README.md index 819c8f6d1..9d5035d35 100644 --- a/vendored_parsers/tree-sitter-racket/README.md +++ b/vendored_parsers/tree-sitter-racket/README.md @@ -8,7 +8,13 @@ This grammar only implements the Racket language with the default readtable. ## Status -It should recognize most grammar with the default readtable. +It should be complete and compatible with Racket 8.9. + +There are no plans to add support for new language currently. + +## News + +Starting from June 24, 2023, ([commit](https://github.com/6cdh/tree-sitter-racket/commit/989c3e631a7f2d87bb6a66a5394870aaeb6c56e7)) or release 0.3.0, the external scanner was written in C. ## Build and Try @@ -16,7 +22,6 @@ You need * nodejs * a C compiler -* a C++11 compiler then run diff --git a/vendored_parsers/tree-sitter-racket/binding.gyp b/vendored_parsers/tree-sitter-racket/binding.gyp index 9427c0267..a07cf9d9c 100644 --- a/vendored_parsers/tree-sitter-racket/binding.gyp +++ b/vendored_parsers/tree-sitter-racket/binding.gyp @@ -9,8 +9,7 @@ "sources": [ "bindings/node/binding.cc", "src/parser.c", - "src/scanner.cc", - # If your language uses an external scanner, add it here. + "src/scanner.c", ], "cflags_c": [ "-std=c99", diff --git a/vendored_parsers/tree-sitter-racket/bindings/rust/build.rs b/vendored_parsers/tree-sitter-racket/bindings/rust/build.rs index 66812e003..b177a6bdc 100644 --- a/vendored_parsers/tree-sitter-racket/bindings/rust/build.rs +++ b/vendored_parsers/tree-sitter-racket/bindings/rust/build.rs @@ -2,24 +2,18 @@ fn main() { let src_dir = std::path::Path::new("src"); let mut c_config = cc::Build::new(); - c_config.include(&src_dir); + c_config.include(src_dir); c_config .flag_if_supported("-Wno-unused-parameter") .flag_if_supported("-Wno-unused-but-set-variable") .flag_if_supported("-Wno-trigraphs"); let parser_path = src_dir.join("parser.c"); c_config.file(&parser_path); - c_config.compile("parser"); - println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); - let mut cpp_config = cc::Build::new(); - cpp_config.cpp(true); - cpp_config.include(&src_dir); - cpp_config - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable"); - let scanner_path = src_dir.join("scanner.cc"); - cpp_config.file(&scanner_path); - cpp_config.compile("scanner"); + let scanner_path = src_dir.join("scanner.c"); + c_config.file(&scanner_path); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + + println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); + c_config.compile("parser"); } diff --git a/vendored_parsers/tree-sitter-racket/corpus/here_string/.gitattributes b/vendored_parsers/tree-sitter-racket/corpus/here_string/.gitattributes index b65906f57..34b79e9b6 100644 --- a/vendored_parsers/tree-sitter-racket/corpus/here_string/.gitattributes +++ b/vendored_parsers/tree-sitter-racket/corpus/here_string/.gitattributes @@ -1,2 +1,2 @@ -win_* text eol=crlf +win_* eol=crlf * eol=lf diff --git a/vendored_parsers/tree-sitter-racket/corpus/simple.rkt b/vendored_parsers/tree-sitter-racket/corpus/simple.rkt index 434695459..f4c19f300 100644 --- a/vendored_parsers/tree-sitter-racket/corpus/simple.rkt +++ b/vendored_parsers/tree-sitter-racket/corpus/simple.rkt @@ -31,6 +31,8 @@ number #b101 +inf.t -nan.t ++i +0##+i --- @@ -47,6 +49,8 @@ number (number) (number) (number) + (number) + (number) (number)) === @@ -79,6 +83,8 @@ Ap\ ple app123app123 123app123 中文 +a +fec --- (program @@ -92,6 +98,8 @@ app123app123 (symbol) (symbol) (symbol) + (symbol) + (symbol) (symbol)) === diff --git a/vendored_parsers/tree-sitter-racket/fuzztest/.gitignore b/vendored_parsers/tree-sitter-racket/fuzztest/.gitignore new file mode 100644 index 000000000..2211df63d --- /dev/null +++ b/vendored_parsers/tree-sitter-racket/fuzztest/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/vendored_parsers/tree-sitter-racket/fuzztest/README.md b/vendored_parsers/tree-sitter-racket/fuzztest/README.md new file mode 100644 index 000000000..fd48d2e5e --- /dev/null +++ b/vendored_parsers/tree-sitter-racket/fuzztest/README.md @@ -0,0 +1,34 @@ +# fuzz test + +The directory contains the scripts that test the implementation to avoid the problem that a symbol is parsed as a number or vice versa. + +## Resource + +* ~6 minutes to run +* ~300M generated files + +## Run + +```shell +$ cd fuzztest +# 30s +$ racket gen_cases.rkt +cpu time: 26531 real time: 26835 gc time: 1069 +5114173 cases generated +$ cd .. +$ tree-sitter generate +# 3 minutes +$ tree-sitter parse fuzztest/case.txt > fuzztest/res1.txt +# 2 minutes +$ cd fuzztest && racket postprocess.rkt +# should show nothing +$ sdiff -s <(cat -n expect.txt) <(cat -n res.txt) + +# If there is some error, run +$ sdiff -s <(cat -n expect.txt) <(cat -n res.txt) | less +# then get the first error case at `N`-th line +$ cat case.txt | sed -n 'Np' +``` + +You can edit `gen_cases.rkt` to generate less cases during development. + diff --git a/vendored_parsers/tree-sitter-racket/fuzztest/gen_cases.rkt b/vendored_parsers/tree-sitter-racket/fuzztest/gen_cases.rkt new file mode 100644 index 000000000..611eefcb9 --- /dev/null +++ b/vendored_parsers/tree-sitter-racket/fuzztest/gen_cases.rkt @@ -0,0 +1,57 @@ +#lang racket + +(require racket/extflonum) + +;; all characters that can appear in a valid number/exflonum +;; remove some insignificant parts to improve performance +;; (define alphabet-char "abdefilnost") +(define alphabet-char "abdefilnostx") +(define special-char "#./@+-") +;; (define numeric-char "0123456789") +(define numeric-char "0179") +(define all-char + (string-append alphabet-char + special-char + numeric-char)) + +(define cnt 0) +(define max-len 5) + +(define case-port (open-output-file "case.txt" #:exists 'replace)) +(define expect-port (open-output-file "expect.txt" #:exists 'replace)) + +(define (gen i case) + (with-handlers ([exn:fail? (lambda _ (void))]) + (when (> i 0) + (define case-str (list->string case)) + ;; ".0@.0" should be a number according the document, + ;; but it's actually a symbol. + ;; It's a bug of Racket reader, and will fix in new Racket release. + ;; we skip these cases. + (when (not (string-contains? case-str "@.")) + (with-handlers ([exn:fail? void]) + (with-input-from-string case-str + (lambda () + (define fst (read)) + (define snd (read)) + (when (eof-object? snd) + (cond [(symbol? fst) + (set! cnt (add1 cnt)) + (displayln case-str case-port) + (displayln "symbol" expect-port)] + [(number? fst) + (set! cnt (add1 cnt)) + (displayln case-str case-port) + (displayln "number" expect-port)] + ;; it's here for possible future change that + ;; split extflonum from number + [(extflonum? fst) + (set! cnt (add1 cnt)) + (displayln case-str case-port) + (displayln "number" expect-port)])))))))) + (when (< i max-len) + (for ([c all-char]) + (gen (add1 i) (cons c case))))) + +(time (gen 0 '())) +(displayln (format "~a cases generated" cnt)) diff --git a/vendored_parsers/tree-sitter-racket/fuzztest/postprocess.rkt b/vendored_parsers/tree-sitter-racket/fuzztest/postprocess.rkt new file mode 100644 index 000000000..ffaf800a8 --- /dev/null +++ b/vendored_parsers/tree-sitter-racket/fuzztest/postprocess.rkt @@ -0,0 +1,26 @@ +#lang racket + +(define port (open-input-file "res1.txt")) +(define all-result (drop (read port) 4)) +(define all-result-line + (for/list ([r all-result]) + (cons (car r) (caadr r)))) +(with-output-to-file "res.txt" + #:exists 'replace + (lambda () + (let loop ([line 0] + [firstline? #t] + [lst all-result-line]) + (match lst + ['() (void)] + [(cons fst rem) + #:when (= (cdr fst) line) + (when (not firstline?) + (display " ")) + (display (car fst)) + (loop line (if firstline? #t #f) rem)] + [(cons fst rem) + (newline) + (display (car fst)) + (loop (add1 line) #f rem)])) + (newline))) diff --git a/vendored_parsers/tree-sitter-racket/grammar.js b/vendored_parsers/tree-sitter-racket/grammar.js index e2f681316..d5612024b 100644 --- a/vendored_parsers/tree-sitter-racket/grammar.js +++ b/vendored_parsers/tree-sitter-racket/grammar.js @@ -1,13 +1,11 @@ const PREC = { first: $ => prec(100, $), last: $ => prec(-1, $), - left: prec.left, - right: prec.right, }; const LEAF = { // https://en.wikipedia.org/wiki/Unicode_character_property#Whitespace - whitespace: /[ \t\n\v\f\r\u{0085}\u{00A0}\u{1680}\u{2000}-\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}]/u, + whitespace: /[ \t\n\v\f\r\u{0085}\u{00A0}\u{1680}\u{2000}-\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}]+/u, newline: /[\r\n\u{85}\u{2028}\u{2029}]/, delimiter: /[ \t\n\v\f\r\u{0085}\u{00A0}\u{1680}\u{2000}-\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}(){}",'`;\[\]]/u, non_delimiter: /[^ \t\n\v\f\r\u{0085}\u{00A0}\u{1680}\u{2000}-\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}(){}",'`;\[\]]/u, @@ -49,23 +47,21 @@ module.exports = grammar({ _token: $ => choice( - token(repeat1(LEAF.whitespace)), - $._all_comment, + $._skip, $.extension, $._datum), - _skip: $ => choice(token(repeat1(LEAF.whitespace)), $._all_comment), - - dot: _ => ".", - - // comment {{{ - - _all_comment: $ => + _skip: $ => choice( + LEAF.whitespace, $.comment, $.sexp_comment, $.block_comment), + dot: _ => ".", + + // comment {{{ + comment: $ => choice( token( @@ -101,6 +97,10 @@ module.exports = grammar({ $.here_string, $.byte_string, $.character, + + // number/symbol precedence + // for same length token, prefer number + // otherwise, prefer symbol which is also longer $.number, $.symbol, @@ -110,7 +110,15 @@ module.exports = grammar({ $.graph, $.structure, $.hash, - $._abbrev, + + $.quote, + $.quasiquote, + $.syntax, + $.quasisyntax, + $.unquote, + $.unquote_splicing, + $.unsyntax, + $.unsyntax_splicing, $.list, $.vector), @@ -158,10 +166,14 @@ module.exports = grammar({ number: _ => token( choice( - _number_base(2), - _number_base(8), - _number_base(10), - _number_base(16))), + extflonum(2), + extflonum(8), + extflonum(10), + extflonum(16), + number_base(2), + number_base(8), + number_base(10), + number_base(16))), decimal: _ => /[0-9]+/, @@ -178,14 +190,12 @@ module.exports = grammar({ /./))), symbol: _ => - PREC.last( - PREC.right( - token( - choice( - /#[cC][iIsS]/, // read-case-sensitive parameter - seq( - LEAF.symbol_start, - repeat(LEAF.symbol_remain)))))), + token( + choice( + /#[cC][iIsS]/, // read-case-sensitive parameter + seq( + LEAF.symbol_start, + repeat(LEAF.symbol_remain)))), keyword: _ => token( @@ -233,17 +243,6 @@ module.exports = grammar({ repeat($._skip), $._datum))), - _abbrev: $ => - choice( - $.quote, - $.quasiquote, - $.syntax, - $.quasisyntax, - $.unquote, - $.unquote_splicing, - $.unsyntax, - $.unsyntax_splicing), - quote: $ => seq( "'", @@ -309,115 +308,216 @@ module.exports = grammar({ // number {{{ -function _number_base(n) { - const number = _ => +function number_base(n) { + const digit = { + 2: /[01]/, + 8: /[0-7]/, + 10: /[0-9]/, + 16: /[0-9a-fA-F]/, + }[n]; + + const exp_mark = { + 2: /[sldefSLDEF]/, + 8: /[sldefSLDEF]/, + 10: /[sldefSLDEF]/, + 16: /[slSL]/, + }[n]; + + const prefix = { + 2: /#[bB]/, + 8: /#[oO]/, + 10: optional(/#[dD]/), + 16: /#[xX]/, + }[n]; + + const exactness = + /#[eiEI]/; + + const sign = /[+-]/; + + const digits_hash = seq( - choice( - seq(radix(), optional(exactness())), - seq(optional(exactness()), radix()), - ), - choice( - // Inexact number pattern already contains exact pattern. - // So we don't need to parse exact number explicitly - inexact())); - - const sign = _ => /[+-]/; - - const digit = _ => { - return { - 2: /[01]/, - 8: /[0-7]/, - 10: /[0-9]/, - 16: /[0-9a-fA-F]/, - }[n]; - }; - - const radix = _ => { - return { - 2: /#[bB]/, - 8: /#[oO]/, - 10: optional(/#[dD]/), - 16: /#[xX]/, - }[n]; - }; - - const exactness = _ => - choice("#e", "#E", "#i", "#I"); - - const exp_mark = _ => /[sldeftSLDEFT]/; - - const unsigned_integer = _ => - repeat1(digit()); - - const inexact = _ => + repeat1(digit), + repeat("#")); + + const unsigned_integer = + repeat1(digit); + + // exact + + const exact_integer = + seq( + optional(sign), + unsigned_integer); + + const unsigned_rational = + choice( + unsigned_integer, + seq(unsigned_integer, "/", unsigned_integer)); + + const exact_rational = + seq( + optional(sign), + unsigned_rational); + + const exact_complex = + seq( + optional(exact_rational), + sign, + optional(unsigned_rational), + /[iI]/); + + const exact = + choice(exact_rational, exact_complex); + + // inexact + + const inexact_special = choice( - inexact_real(), - inexact_complex()); + /[iI][nN][fF]\.[0fF]/, + /[nN][aA][nN]\.[0fF]/); + + const inexact_simple = + choice( + seq( + digits_hash, + optional("."), + repeat("#")), + seq( + optional(unsigned_integer), + ".", + digits_hash), + seq( + digits_hash, + "/", + digits_hash)); + + const inexact_normal = + seq( + inexact_simple, + optional( + seq( + exp_mark, + exact_integer))); + + const inexact_unsigned = + choice(inexact_normal, inexact_special); - const inexact_real = _ => + const inexact_real = choice( seq( - optional(sign()), - inexact_normal()), + optional(sign), + inexact_normal), seq( - sign(), - inexact_special())); + sign, + inexact_special)); - const inexact_complex = _ => + const inexact_complex = choice( seq( - optional(inexact_real()), - sign(), - inexact_unsigned(), + optional(inexact_real), + sign, + optional(inexact_unsigned), /[iI]/), seq( - inexact_real(), + inexact_real, "@", - inexact_real())); + inexact_real)); - const inexact_unsigned = _ => - choice( - inexact_normal(), - inexact_special()); + const inexact = + choice(inexact_real, inexact_complex); + + const number = + choice(exact, inexact); - const inexact_normal = _ => + const general_number = seq( - inexact_simple(), - optional( + choice( + seq( + optional(exactness), + prefix), seq( - exp_mark(), - optional(sign()), - unsigned_integer()))); + prefix, + optional(exactness))), + number); - const inexact_special = _ => + return general_number; +} + +function extflonum(n) { + const digit = { + 2: /[01]/, + 8: /[0-7]/, + 10: /[0-9]/, + 16: /[0-9a-fA-F]/, + }[n]; + + const exp_mark = /[tT]/; + + const prefix = { + 2: /#[bB]/, + 8: /#[oO]/, + 10: optional(/#[dD]/), + 16: /#[xX]/, + }[n]; + + const sign = /[+-]/; + + const digits_hash = + seq( + repeat1(digit), + repeat("#")); + + const unsigned_integer = + repeat1(digit); + + // exact + + const exact_integer = + seq( + optional(sign), + unsigned_integer); + + // inexact + + const inexact_special = choice( - /[iI][nN][fF]\.0/, - /[nN][aA][nN]\.0/, - /[iI][nN][fF]\.[fFtT]/, - /[nN][aA][nN]\.[fFtT]/, - ); + /[iI][nN][fF]\.[0fFtT]/, + /[nN][aA][nN]\.[0fFtT]/); - const inexact_simple = _ => + const inexact_simple = choice( seq( - digits(), + digits_hash, optional("."), repeat("#")), seq( - optional(unsigned_integer()), + optional(unsigned_integer), ".", - digits()), + digits_hash), seq( - digits(), + digits_hash, "/", - digits())); + digits_hash)); - const digits = _ => + const inexact_normal = seq( - unsigned_integer(), - repeat("#")); + inexact_simple, + optional( + seq( + exp_mark, + exact_integer))); + + const inexact_real = + choice( + seq( + optional(sign), + inexact_normal), + seq( + sign, + inexact_special)); - return token(number()); + return seq(prefix, inexact_real); } // number }}} diff --git a/vendored_parsers/tree-sitter-racket/package.json b/vendored_parsers/tree-sitter-racket/package.json index a61915a17..03d014260 100644 --- a/vendored_parsers/tree-sitter-racket/package.json +++ b/vendored_parsers/tree-sitter-racket/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-racket", - "version": "0.1.0", + "version": "0.3.0", "description": "Tree-sitter grammar for Racket", "main": "bindings/node", "scripts": { diff --git a/vendored_parsers/tree-sitter-racket/src/grammar.json b/vendored_parsers/tree-sitter-racket/src/grammar.json index fdd92da33..741fd1804 100644 --- a/vendored_parsers/tree-sitter-racket/src/grammar.json +++ b/vendored_parsers/tree-sitter-racket/src/grammar.json @@ -12,18 +12,8 @@ "type": "CHOICE", "members": [ { - "type": "TOKEN", - "content": { - "type": "REPEAT1", - "content": { - "type": "PATTERN", - "value": "[ \\t\\n\\v\\f\\r\\u{0085}\\u{00A0}\\u{1680}\\u{2000}-\\u{200A}\\u{2028}\\u{2029}\\u{202F}\\u{205F}\\u{3000}]" - } - } - }, - { "type": "SYMBOL", - "name": "_all_comment" + "name": "_skip" }, { "type": "SYMBOL", @@ -39,30 +29,11 @@ "type": "CHOICE", "members": [ { - "type": "TOKEN", - "content": { - "type": "REPEAT1", - "content": { - "type": "PATTERN", - "value": "[ \\t\\n\\v\\f\\r\\u{0085}\\u{00A0}\\u{1680}\\u{2000}-\\u{200A}\\u{2028}\\u{2029}\\u{202F}\\u{205F}\\u{3000}]" - } - } + "type": "PATTERN", + "value": "[ \\t\\n\\v\\f\\r\\u{0085}\\u{00A0}\\u{1680}\\u{2000}-\\u{200A}\\u{2028}\\u{2029}\\u{202F}\\u{205F}\\u{3000}]+" }, { "type": "SYMBOL", - "name": "_all_comment" - } - ] - }, - "dot": { - "type": "STRING", - "value": "." - }, - "_all_comment": { - "type": "CHOICE", - "members": [ - { - "type": "SYMBOL", "name": "comment" }, { @@ -75,6 +46,10 @@ } ] }, + "dot": { + "type": "STRING", + "value": "." + }, "comment": { "type": "CHOICE", "members": [ @@ -255,7 +230,35 @@ }, { "type": "SYMBOL", - "name": "_abbrev" + "name": "quote" + }, + { + "type": "SYMBOL", + "name": "quasiquote" + }, + { + "type": "SYMBOL", + "name": "syntax" + }, + { + "type": "SYMBOL", + "name": "quasisyntax" + }, + { + "type": "SYMBOL", + "name": "unquote" + }, + { + "type": "SYMBOL", + "name": "unquote_splicing" + }, + { + "type": "SYMBOL", + "name": "unsyntax" + }, + { + "type": "SYMBOL", + "name": "unsyntax_splicing" }, { "type": "SYMBOL", @@ -527,5072 +530,6544 @@ "type": "CHOICE", "members": [ { - "type": "TOKEN", - "content": { - "type": "SEQ", - "members": [ - { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - { - "type": "PATTERN", - "value": "#[bB]" - }, - { - "type": "CHOICE", - "members": [ - { - "type": "CHOICE", - "members": [ - { - "type": "STRING", - "value": "#e" - }, - { - "type": "STRING", - "value": "#E" - }, - { - "type": "STRING", - "value": "#i" - }, - { - "type": "STRING", - "value": "#I" - } - ] - }, - { - "type": "BLANK" - } - ] - } - ] - }, - { - "type": "SEQ", - "members": [ - { - "type": "CHOICE", - "members": [ - { - "type": "CHOICE", - "members": [ - { - "type": "STRING", - "value": "#e" - }, - { - "type": "STRING", - |